Merge metrics descriptions in textfile collector (#2475)

The textfile collector will now provide a unified metric description
(that will look like "Metric read from file/a.prom, file/b.prom")
for metrics collected accross several text-files that don't already
have a description.

Also change the error handling in the textfile collector tests to
ContinueOnError to better mirror the real-life use-case.

Signed-off-by: Guillaume Espanel <guillaume.espanel.ext@ovhcloud.com>

Signed-off-by: Guillaume Espanel <guillaume.espanel.ext@ovhcloud.com>
This commit is contained in:
Guillaume E 2022-09-20 12:49:21 +02:00 committed by GitHub
parent 31f9aefe2f
commit 863f3ac54c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 134 additions and 20 deletions

View file

@ -0,0 +1,11 @@
# HELP events_total A nice help message.
# TYPE events_total counter
events_total{file="a",foo="bar"} 10
events_total{file="a",foo="baz"} 20
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_different_help/a.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_different_help/b.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0

View file

@ -0,0 +1,5 @@
# HELP events_total A nice help message.
# TYPE events_total counter
events_total{foo="bar",file="a"} 10
events_total{foo="baz",file="a"} 20

View file

@ -0,0 +1,5 @@
# HELP events_total A different help message.
# TYPE events_total counter
events_total{foo="bar",file="b"} 30
events_total{foo="baz",file="b"} 40

View file

@ -0,0 +1,13 @@
# HELP events_total Metric read from fixtures/textfile/metrics_merge_empty_help/a.prom, fixtures/textfile/metrics_merge_empty_help/b.prom
# TYPE events_total counter
events_total{file="a",foo="bar"} 10
events_total{file="a",foo="baz"} 20
events_total{file="b",foo="bar"} 30
events_total{file="b",foo="baz"} 40
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_empty_help/a.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_empty_help/b.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0

View file

@ -0,0 +1,5 @@
# HELP events_total
# TYPE events_total counter
events_total{foo="bar",file="a"} 10
events_total{foo="baz",file="a"} 20

View file

@ -0,0 +1,5 @@
# HELP events_total
# TYPE events_total counter
events_total{foo="bar",file="b"} 30
events_total{foo="baz",file="b"} 40

View file

@ -0,0 +1,13 @@
# HELP events_total Metric read from fixtures/textfile/metrics_merge_no_help/a.prom, fixtures/textfile/metrics_merge_no_help/b.prom
# TYPE events_total counter
events_total{file="a",foo="bar"} 10
events_total{file="a",foo="baz"} 20
events_total{file="b",foo="bar"} 30
events_total{file="b",foo="baz"} 40
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_no_help/a.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_no_help/b.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0

View file

@ -0,0 +1,4 @@
# TYPE events_total counter
events_total{foo="bar",file="a"} 10
events_total{foo="baz",file="a"} 20

View file

@ -0,0 +1,4 @@
# TYPE events_total counter
events_total{foo="bar",file="b"} 30
events_total{foo="baz",file="b"} 40

View file

@ -0,0 +1,13 @@
# HELP events_total The same help.
# TYPE events_total counter
events_total{file="a",foo="bar"} 10
events_total{file="a",foo="baz"} 20
events_total{file="b",foo="bar"} 30
events_total{file="b",foo="baz"} 40
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_same_help/a.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_same_help/b.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0

View file

@ -0,0 +1,5 @@
# HELP events_total The same help.
# TYPE events_total counter
events_total{foo="bar",file="a"} 10
events_total{foo="baz",file="a"} 20

View file

@ -0,0 +1,5 @@
# HELP events_total The same help.
# TYPE events_total counter
events_total{foo="bar",file="b"} 30
events_total{foo="baz",file="b"} 40

View file

@ -191,6 +191,8 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error {
// Iterate over files and accumulate their metrics, but also track any // Iterate over files and accumulate their metrics, but also track any
// parsing errors so an error metric can be reported. // parsing errors so an error metric can be reported.
var errored bool var errored bool
var parsedFamilies []*dto.MetricFamily
metricsNamesToFiles := map[string][]string{}
paths, err := filepath.Glob(c.path) paths, err := filepath.Glob(c.path)
if err != nil || len(paths) == 0 { if err != nil || len(paths) == 0 {
@ -208,20 +210,39 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error {
} }
for _, f := range files { for _, f := range files {
metricsFilePath := filepath.Join(path, f.Name())
if !strings.HasSuffix(f.Name(), ".prom") { if !strings.HasSuffix(f.Name(), ".prom") {
continue continue
} }
mtime, err := c.processFile(path, f.Name(), ch) mtime, families, err := c.processFile(path, f.Name(), ch)
for _, mf := range families {
metricsNamesToFiles[*mf.Name] = append(metricsNamesToFiles[*mf.Name], metricsFilePath)
parsedFamilies = append(parsedFamilies, mf)
}
if err != nil { if err != nil {
errored = true errored = true
level.Error(c.logger).Log("msg", "failed to collect textfile data", "file", f.Name(), "err", err) level.Error(c.logger).Log("msg", "failed to collect textfile data", "file", f.Name(), "err", err)
continue continue
} }
mtimes[filepath.Join(path, f.Name())] = *mtime mtimes[metricsFilePath] = *mtime
} }
} }
for _, mf := range parsedFamilies {
if mf.Help == nil {
help := fmt.Sprintf("Metric read from %s", strings.Join(metricsNamesToFiles[*mf.Name], ", "))
mf.Help = &help
}
}
for _, mf := range parsedFamilies {
convertMetricFamily(mf, ch, c.logger)
}
c.exportMTimes(mtimes, ch) c.exportMTimes(mtimes, ch)
// Export if there were errors. // Export if there were errors.
@ -243,44 +264,33 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error {
} }
// processFile processes a single file, returning its modification time on success. // processFile processes a single file, returning its modification time on success.
func (c *textFileCollector) processFile(dir, name string, ch chan<- prometheus.Metric) (*time.Time, error) { func (c *textFileCollector) processFile(dir, name string, ch chan<- prometheus.Metric) (*time.Time, map[string]*dto.MetricFamily, error) {
path := filepath.Join(dir, name) path := filepath.Join(dir, name)
f, err := os.Open(path) f, err := os.Open(path)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to open textfile data file %q: %w", path, err) return nil, nil, fmt.Errorf("failed to open textfile data file %q: %w", path, err)
} }
defer f.Close() defer f.Close()
var parser expfmt.TextParser var parser expfmt.TextParser
families, err := parser.TextToMetricFamilies(f) families, err := parser.TextToMetricFamilies(f)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to parse textfile data from %q: %w", path, err) return nil, nil, fmt.Errorf("failed to parse textfile data from %q: %w", path, err)
} }
if hasTimestamps(families) { if hasTimestamps(families) {
return nil, fmt.Errorf("textfile %q contains unsupported client-side timestamps, skipping entire file", path) return nil, nil, fmt.Errorf("textfile %q contains unsupported client-side timestamps, skipping entire file", path)
}
for _, mf := range families {
if mf.Help == nil {
help := fmt.Sprintf("Metric read from %s", path)
mf.Help = &help
}
}
for _, mf := range families {
convertMetricFamily(mf, ch, c.logger)
} }
// Only stat the file once it has been parsed and validated, so that // Only stat the file once it has been parsed and validated, so that
// a failure does not appear fresh. // a failure does not appear fresh.
stat, err := f.Stat() stat, err := f.Stat()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to stat %q: %w", path, err) return nil, families, fmt.Errorf("failed to stat %q: %w", path, err)
} }
t := stat.ModTime() t := stat.ModTime()
return &t, nil return &t, families, nil
} }
// hasTimestamps returns true when metrics contain unsupported timestamps. // hasTimestamps returns true when metrics contain unsupported timestamps.

View file

@ -95,6 +95,22 @@ func TestTextfileCollector(t *testing.T) {
path: "fixtures/textfile/*_extra_dimension", path: "fixtures/textfile/*_extra_dimension",
out: "fixtures/textfile/glob_extra_dimension.out", out: "fixtures/textfile/glob_extra_dimension.out",
}, },
{
path: "fixtures/textfile/metrics_merge_empty_help",
out: "fixtures/textfile/metrics_merge_empty_help.out",
},
{
path: "fixtures/textfile/metrics_merge_no_help",
out: "fixtures/textfile/metrics_merge_no_help.out",
},
{
path: "fixtures/textfile/metrics_merge_same_help",
out: "fixtures/textfile/metrics_merge_same_help.out",
},
{
path: "fixtures/textfile/metrics_merge_different_help",
out: "fixtures/textfile/metrics_merge_different_help.out",
},
} }
for i, test := range tests { for i, test := range tests {
@ -117,7 +133,7 @@ func TestTextfileCollector(t *testing.T) {
registry.MustRegister(collectorAdapter{c}) registry.MustRegister(collectorAdapter{c})
rw := httptest.NewRecorder() rw := httptest.NewRecorder()
promhttp.HandlerFor(registry, promhttp.HandlerOpts{}).ServeHTTP(rw, &http.Request{}) promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}).ServeHTTP(rw, &http.Request{})
got := string(rw.Body.String()) got := string(rw.Body.String())
want, err := os.ReadFile(test.out) want, err := os.ReadFile(test.out)