mirror of
https://github.com/apple/foundationdb.git
synced 2026-01-25 04:18:18 +00:00
Eliminate poorly motivated trace field name validation, and change SimpleCounter to emit Prometheus-compatible metric names (#12356)
Trace.cpp does not provide a rationale for validateField() and validateFormat(). It appears to be some kind of XML related validation. Why we should care about this is not clear. The output is going to Splunk. As far as I know, Splunk is supposed to be pretty liberal in what it accepts as input. Add logic in SimpleCounter.cpp to convert hierarchical metric names to Prometheus compatible metric names by the simple rule of converting intermediate '/' chars into '_', i.e. something like /flow/arena/bytesAllocated becomes flow_arena_bytesAllocated. I feel hierarchical names are still slightly better, and very easy to reason about when creating new metric names on the fly, but ensuring that they are at least Prometheus compatible should allow targeting to future metrics platforms down the road. Testing: 20250905-010257-gglass-25f3ef43ccc1c130 compressed=True data_size=41538755 duration=6389116 ended=100000 fail_fast=10 max_runs=100000 pass=100000 priority=100 remaining=0 runtime=1:00:34 sanity=False started=100000 stopped=20250905-020331 submitted=20250905-010257 timeout=5400 username=gglass * replace undocumented trace event field name rules with a rule that enforces that field names must be valid Prometheus metric names. No idea why the old code declines to even state what it is trying to be compatible with * move Prometheus metric name validation to SimpleCounter.cpp. Remove validation from Trace.cpp. This stuff is going to Splunk. Splunk takes what we give it.
This commit is contained in:
@@ -23,30 +23,16 @@
|
||||
#include "flow/SimpleCounter.h"
|
||||
#include "flow/UnitTest.h"
|
||||
|
||||
// Trace.cpp::validateField insists on applying some rules to trace
|
||||
// field names. Instead of fighting this, for now just make our
|
||||
// hierarchical names comply by converting / to 'U'. Yes, 'U'.
|
||||
//
|
||||
// If this annoys you, there are several options:
|
||||
// 1) Don't use hierarchical metric names.
|
||||
// 2) Go figure out why Trace.cpp has this restriction (it itself offers
|
||||
// no explanation whatsoever), and consider relaxing it. HOWEVER: consider that
|
||||
// Prometheus imposes very strict rules on metric names. The most useful thing one
|
||||
// can do with Prometheus-compatible metric names and still retain some manner
|
||||
// of hierarchical naming is to use underscore as the component separator.
|
||||
// Longer term, if we are emitting Prometheus-compatible metrics, then replacing
|
||||
// '/' with '_' could be a strategy. However, we would want to ensure that we don't end
|
||||
// up with random naming collisions from people who might use '_' for its normal purpose of
|
||||
// separating any old words anywhere, including within the same path component.
|
||||
// More background: https://chatgpt.com/share/68ad3e33-00b0-800b-9fa9-644ef201feb9
|
||||
//
|
||||
// So basically, Trace.cpp rules suck, and Prometheus rules suck, but we should be
|
||||
// aware of them and play ball.
|
||||
static std::string mungeName(const std::string input) {
|
||||
// Convert hierarchical metric names into Prometheus-compatible metric
|
||||
// names. Do this by a) removing initial '/' chars, and b) converting
|
||||
// remaining '/' chars to '_' chars.
|
||||
static std::string hierarchicalToPrometheus(const std::string input) {
|
||||
std::string output;
|
||||
for (char ch : input) {
|
||||
if (ch == '/' || ch == ':') {
|
||||
output += "U";
|
||||
if (ch == '/') {
|
||||
if (output.size() > 0) {
|
||||
output += '_';
|
||||
}
|
||||
} else {
|
||||
output += ch;
|
||||
}
|
||||
@@ -54,6 +40,37 @@ static std::string mungeName(const std::string input) {
|
||||
return output;
|
||||
}
|
||||
|
||||
// ChatGPT generated code to return true iff `name` is a valid Prometheus
|
||||
// metric name. Below we call this on trace event fields for the reason
|
||||
// that we are contemplating converting fields in trace events to metrics
|
||||
// in downstream Prometheus-compatible metrics systems.
|
||||
static bool isValidPrometheusMetricName(std::string_view name) {
|
||||
if (name.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// First character: [a-zA-Z_:]
|
||||
char first = name.front();
|
||||
if (!(std::isalpha(static_cast<unsigned char>(first)) || first == '_' || first == ':')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Rest: [a-zA-Z0-9_:]*
|
||||
for (size_t i = 1; i < name.size(); ++i) {
|
||||
char c = name[i];
|
||||
if (!(std::isalnum(static_cast<unsigned char>(c)) || c == '_' || c == ':')) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Reserved prefix check: names starting with "__" are reserved
|
||||
if (name.size() >= 2 && name[0] == '_' && name[1] == '_') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// This should be called periodically by higher level code somewhere.
|
||||
void simpleCounterReport(Severity severity) {
|
||||
static SimpleCounter<int64_t>* reportCount = SimpleCounter<int64_t>::makeCounter("/flow/counters/reports");
|
||||
@@ -71,8 +88,8 @@ void simpleCounterReport(Severity severity) {
|
||||
do {
|
||||
SimpleCounter<int64_t>* ic = intCounters[i];
|
||||
std::string n = ic->name();
|
||||
n = mungeName(n);
|
||||
ASSERT(validateField(n.c_str(), /* allowUnderscores= */ true));
|
||||
n = hierarchicalToPrometheus(n);
|
||||
ASSERT(isValidPrometheusMetricName(n));
|
||||
traceEvent.detail(std::move(n), ic->get());
|
||||
i++;
|
||||
c++;
|
||||
@@ -85,8 +102,8 @@ void simpleCounterReport(Severity severity) {
|
||||
do {
|
||||
SimpleCounter<double>* dc = doubleCounters[i];
|
||||
std::string n = dc->name();
|
||||
n = mungeName(n);
|
||||
ASSERT(validateField(n.c_str(), /* allowUnderscores= */ true));
|
||||
n = hierarchicalToPrometheus(n);
|
||||
ASSERT(isValidPrometheusMetricName(n));
|
||||
traceEvent.detail(std::move(n), dc->get());
|
||||
i++;
|
||||
c++;
|
||||
|
||||
@@ -266,7 +266,6 @@ public:
|
||||
};
|
||||
void action(WriteBuffer& a) {
|
||||
for (const auto& event : a.events) {
|
||||
event.validateFormat();
|
||||
logWriter->write(formatter->formatEvent(event));
|
||||
}
|
||||
|
||||
@@ -1787,41 +1786,6 @@ std::string TraceEventFields::toString() const {
|
||||
return str;
|
||||
}
|
||||
|
||||
// FIXME: give the rationale for the naming scheme that this is enforcing
|
||||
bool validateField(const char* key, bool allowUnderscores) {
|
||||
if ((key[0] < 'A' || key[0] > 'Z') && key[0] != '_') {
|
||||
return false;
|
||||
}
|
||||
|
||||
const char* underscore = strchr(key, '_');
|
||||
while (underscore) {
|
||||
if (!allowUnderscores || ((underscore[1] < 'A' || underscore[1] > 'Z') && key[0] != '_' && key[0] != '\0')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
underscore = strchr(&underscore[1], '_');
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void TraceEventFields::validateFormat() const {
|
||||
// FIXME: this condition should be expanded to include any unit test mode.
|
||||
if (g_network && g_network->isSimulated()) {
|
||||
for (Field field : fields) {
|
||||
if (!validateField(field.first.c_str(), false)) {
|
||||
fprintf(stderr,
|
||||
"Trace event detail name `%s' is invalid in:\n\t%s\n",
|
||||
field.first.c_str(),
|
||||
toString().c_str());
|
||||
}
|
||||
if (field.first == "Type" && !validateField(field.second.c_str(), true)) {
|
||||
fprintf(stderr, "Trace event detail Type `%s' is invalid\n", field.second.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string traceableStringToString(const char* value, size_t S) {
|
||||
if (g_network) {
|
||||
ASSERT_WE_THINK(S > 0 && value[S - 1] == '\0');
|
||||
|
||||
@@ -586,8 +586,6 @@ void disposeTraceFileWriter();
|
||||
std::string getTraceFormatExtension();
|
||||
uint64_t getTraceThreadId();
|
||||
|
||||
bool validateField(const char* key, bool allowUnderscores);
|
||||
|
||||
template <class T>
|
||||
class Future;
|
||||
class Void;
|
||||
|
||||
Reference in New Issue
Block a user