diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go new file mode 100644 index 000000000..b98d430e2 --- /dev/null +++ b/cmd/promtool/backfill.go @@ -0,0 +1,204 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bufio" + "context" + "io" + "math" + "os" + + "github.com/go-kit/kit/log" + "github.com/pkg/errors" + "github.com/prometheus/common/expfmt" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/pkg/textparse" + "github.com/prometheus/prometheus/tsdb" + tsdb_errors "github.com/prometheus/prometheus/tsdb/errors" +) + +// OpenMetricsParser implements textparse.Parser. +type OpenMetricsParser struct { + textparse.Parser + s *bufio.Scanner +} + +// NewOpenMetricsParser returns an OpenMetricsParser reading from the provided reader. +func NewOpenMetricsParser(r io.Reader) *OpenMetricsParser { + return &OpenMetricsParser{s: bufio.NewScanner(r)} +} + +// Next advances the parser to the next sample. It returns io.EOF if no +// more samples were read. +func (p *OpenMetricsParser) Next() (textparse.Entry, error) { + for p.s.Scan() { + line := p.s.Bytes() + line = append(line, '\n') + + p.Parser = textparse.New(line, string(expfmt.FmtOpenMetrics)) + if et, err := p.Parser.Next(); err != io.EOF { + return et, err + } + } + + if err := p.s.Err(); err != nil { + return 0, err + } + return 0, io.EOF +} + +func getMinAndMaxTimestamps(p textparse.Parser) (int64, int64, error) { + var maxt, mint int64 = math.MinInt64, math.MaxInt64 + + for { + entry, err := p.Next() + if err == io.EOF { + break + } + if err != nil { + return 0, 0, errors.Wrap(err, "next") + } + + if entry != textparse.EntrySeries { + continue + } + + _, ts, _ := p.Series() + if ts == nil { + return 0, 0, errors.Errorf("expected timestamp for series got none") + } + + if *ts > maxt { + maxt = *ts + } + if *ts < mint { + mint = *ts + } + } + + if maxt == math.MinInt64 { + maxt = 0 + } + if mint == math.MaxInt64 { + mint = 0 + } + + return maxt, mint, nil +} + +func createBlocks(input *os.File, mint, maxt int64, maxSamplesInAppender int, outputDir string) (returnErr error) { + blockDuration := tsdb.DefaultBlockDuration + mint = blockDuration * (mint / blockDuration) + + db, err := tsdb.OpenDBReadOnly(outputDir, nil) + if err != nil { + return err + } + defer func() { + returnErr = tsdb_errors.NewMulti(returnErr, db.Close()).Err() + }() + + for t := mint; t <= maxt; t = t + blockDuration { + err := func() error { + w, err := tsdb.NewBlockWriter(log.NewNopLogger(), outputDir, blockDuration) + if err != nil { + return errors.Wrap(err, "block writer") + } + defer func() { + err = tsdb_errors.NewMulti(err, w.Close()).Err() + }() + + if _, err := input.Seek(0, 0); err != nil { + return errors.Wrap(err, "seek file") + } + ctx := context.Background() + app := w.Appender(ctx) + p := NewOpenMetricsParser(input) + tsUpper := t + blockDuration + samplesCount := 0 + for { + e, err := p.Next() + if err == io.EOF { + break + } + if err != nil { + return errors.Wrap(err, "parse") + } + if e != textparse.EntrySeries { + continue + } + + l := labels.Labels{} + p.Metric(&l) + _, ts, v := p.Series() + if ts == nil { + return errors.Errorf("expected timestamp for series %v, got none", l) + } + if *ts < t || *ts >= tsUpper { + continue + } + + if _, err := app.Add(l, *ts, v); err != nil { + return errors.Wrap(err, "add sample") + } + + samplesCount++ + if samplesCount < maxSamplesInAppender { + continue + } + + // If we arrive here, the samples count is greater than the maxSamplesInAppender. + // Therefore the old appender is committed and a new one is created. + // This prevents keeping too many samples lined up in an appender and thus in RAM. + if err := app.Commit(); err != nil { + return errors.Wrap(err, "commit") + } + + app = w.Appender(ctx) + samplesCount = 0 + } + if err := app.Commit(); err != nil { + return errors.Wrap(err, "commit") + } + if _, err := w.Flush(ctx); err != nil && err != tsdb.ErrNoSeriesAppended { + return errors.Wrap(err, "flush") + } + return nil + }() + + if err != nil { + return errors.Wrap(err, "process blocks") + } + + blocks, err := db.Blocks() + if err != nil { + return errors.Wrap(err, "get blocks") + } + if len(blocks) <= 0 { + continue + } + printBlocks(blocks[len(blocks)-1:], true) + } + return nil +} + +func backfill(maxSamplesInAppender int, input *os.File, outputDir string) (err error) { + p := NewOpenMetricsParser(input) + maxt, mint, err := getMinAndMaxTimestamps(p) + if err != nil { + return errors.Wrap(err, "getting min and max timestamp") + } + return errors.Wrap(createBlocks(input, mint, maxt, maxSamplesInAppender, outputDir), "block creation") +} diff --git a/cmd/promtool/backfill_test.go b/cmd/promtool/backfill_test.go new file mode 100644 index 000000000..612ee2fb8 --- /dev/null +++ b/cmd/promtool/backfill_test.go @@ -0,0 +1,418 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "io/ioutil" + "math" + "os" + "sort" + "testing" + + "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/tsdb" + "github.com/stretchr/testify/require" +) + +type backfillSample struct { + Timestamp int64 + Value float64 + Labels labels.Labels +} + +func createTemporaryOpenMetricsFile(t *testing.T, text string) string { + newf, err := ioutil.TempFile("", "") + require.NoError(t, err) + + _, err = newf.WriteString(text) + require.NoError(t, err) + require.NoError(t, newf.Close()) + + return newf.Name() +} + +func sortSamples(samples []backfillSample) { + sort.Slice(samples, func(x, y int) bool { + sx, sy := samples[x], samples[y] + if sx.Timestamp != sy.Timestamp { + return sx.Timestamp < sy.Timestamp + } + return sx.Value < sy.Value + }) +} + +func queryAllSeries(t testing.TB, q storage.Querier, expectedMinTime, expectedMaxTime int64) []backfillSample { + ss := q.Select(false, nil, labels.MustNewMatcher(labels.MatchRegexp, "", ".*")) + samples := []backfillSample{} + for ss.Next() { + series := ss.At() + it := series.Iterator() + require.NoError(t, it.Err()) + for it.Next() { + ts, v := it.At() + samples = append(samples, backfillSample{Timestamp: ts, Value: v, Labels: series.Labels()}) + } + } + return samples +} + +func testBlocks(t *testing.T, db *tsdb.DB, expectedMinTime, expectedMaxTime int64, expectedSamples []backfillSample, expectedNumBlocks int) { + blocks := db.Blocks() + require.Equal(t, expectedNumBlocks, len(blocks)) + + for _, block := range blocks { + require.Equal(t, true, block.MinTime()/tsdb.DefaultBlockDuration == (block.MaxTime()-1)/tsdb.DefaultBlockDuration) + } + + q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64) + require.NoError(t, err) + defer func() { + require.NoError(t, q.Close()) + }() + + allSamples := queryAllSeries(t, q, expectedMinTime, expectedMaxTime) + sortSamples(allSamples) + sortSamples(expectedSamples) + require.Equal(t, expectedSamples, allSamples) + + if len(allSamples) > 0 { + require.Equal(t, expectedMinTime, allSamples[0].Timestamp) + require.Equal(t, expectedMaxTime, allSamples[len(allSamples)-1].Timestamp) + } +} + +func TestBackfill(t *testing.T) { + tests := []struct { + ToParse string + IsOk bool + Description string + MaxSamplesInAppender int + Expected struct { + MinTime int64 + MaxTime int64 + NumBlocks int + Samples []backfillSample + } + }{ + { + ToParse: `# EOF`, + IsOk: true, + Description: "Empty file.", + MaxSamplesInAppender: 5000, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + Samples []backfillSample + }{ + MinTime: math.MaxInt64, + MaxTime: math.MinInt64, + NumBlocks: 0, + Samples: []backfillSample{}, + }, + }, + { + ToParse: `# HELP http_requests_total The total number of HTTP requests. +# TYPE http_requests_total counter +http_requests_total{code="200"} 1021 1565133713.989 +http_requests_total{code="400"} 1 1565133713.990 +# EOF +`, + IsOk: true, + Description: "Multiple samples with different timestamp for different series.", + MaxSamplesInAppender: 5000, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + Samples []backfillSample + }{ + MinTime: 1565133713989, + MaxTime: 1565133713990, + NumBlocks: 1, + Samples: []backfillSample{ + { + Timestamp: 1565133713989, + Value: 1021, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565133713990, + Value: 1, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "400"), + }, + }, + }, + }, + { + ToParse: `# HELP http_requests_total The total number of HTTP requests. +# TYPE http_requests_total counter +http_requests_total{code="200"} 1021 1565133713.989 +http_requests_total{code="200"} 1 1565133714.989 +http_requests_total{code="400"} 2 1565133715.989 +# EOF +`, + IsOk: true, + Description: "Multiple samples with different timestamp for the same series.", + MaxSamplesInAppender: 5000, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + Samples []backfillSample + }{ + MinTime: 1565133713989, + MaxTime: 1565133715989, + NumBlocks: 1, + Samples: []backfillSample{ + { + Timestamp: 1565133713989, + Value: 1021, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565133714989, + Value: 1, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565133715989, + Value: 2, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "400"), + }, + }, + }, + }, + { + ToParse: `# HELP http_requests_total The total number of HTTP requests. +# TYPE http_requests_total counter +http_requests_total{code="200"} 1021 1565133713.989 +http_requests_total{code="200"} 1022 1565144513.989 +http_requests_total{code="400"} 2 1565155313.989 +http_requests_total{code="400"} 1 1565166113.989 +# EOF +`, + IsOk: true, + Description: "Multiple samples that end up in different blocks.", + MaxSamplesInAppender: 5000, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + Samples []backfillSample + }{ + MinTime: 1565133713989, + MaxTime: 1565166113989, + NumBlocks: 4, + Samples: []backfillSample{ + { + Timestamp: 1565133713989, + Value: 1021, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565144513989, + Value: 1022, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565155313989, + Value: 2, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "400"), + }, + { + Timestamp: 1565166113989, + Value: 1, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "400"), + }, + }, + }, + }, + { + ToParse: `# HELP http_requests_total The total number of HTTP requests. +# TYPE http_requests_total counter +http_requests_total{code="200"} 1021 1565133713.989 +http_requests_total{code="200"} 1022 1565133714 +http_requests_total{code="200"} 1023 1565133716 +http_requests_total{code="200"} 1022 1565144513.989 +http_requests_total{code="400"} 2 1565155313.989 +http_requests_total{code="400"} 3 1565155314 +http_requests_total{code="400"} 1 1565166113.989 +# EOF +`, + IsOk: true, + Description: "Number of samples are greater than the sample batch size.", + MaxSamplesInAppender: 2, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + Samples []backfillSample + }{ + MinTime: 1565133713989, + MaxTime: 1565166113989, + NumBlocks: 4, + Samples: []backfillSample{ + { + Timestamp: 1565133713989, + Value: 1021, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565133714000, + Value: 1022, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565133716000, + Value: 1023, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565144513989, + Value: 1022, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "200"), + }, + { + Timestamp: 1565155313989, + Value: 2, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "400"), + }, + { + Timestamp: 1565155314000, + Value: 3, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "400"), + }, + { + Timestamp: 1565166113989, + Value: 1, + Labels: labels.FromStrings("__name__", "http_requests_total", "code", "400"), + }, + }, + }, + }, + { + ToParse: `no_help_no_type{foo="bar"} 42 6900 +# EOF +`, + IsOk: true, + Description: "Sample with no #HELP or #TYPE keyword.", + MaxSamplesInAppender: 5000, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + Samples []backfillSample + }{ + MinTime: 6900000, + MaxTime: 6900000, + NumBlocks: 1, + Samples: []backfillSample{ + { + Timestamp: 6900000, + Value: 42, + Labels: labels.FromStrings("__name__", "no_help_no_type", "foo", "bar"), + }, + }, + }, + }, + { + ToParse: `bare_metric 42.24 1001 +# EOF +`, + IsOk: true, + Description: "Bare sample.", + MaxSamplesInAppender: 5000, + Expected: struct { + MinTime int64 + MaxTime int64 + NumBlocks int + Samples []backfillSample + }{ + MinTime: 1001000, + MaxTime: 1001000, + NumBlocks: 1, + Samples: []backfillSample{ + { + Timestamp: 1001000, + Value: 42.24, + Labels: labels.FromStrings("__name__", "bare_metric"), + }, + }, + }, + }, + { + ToParse: `# HELP rpc_duration_seconds A summary of the RPC duration in seconds. +# TYPE rpc_duration_seconds summary +rpc_duration_seconds{quantile="0.01"} 3102 +rpc_duration_seconds{quantile="0.05"} 3272 +# EOF +`, + IsOk: false, + Description: "Does not have timestamp.", + }, + { + ToParse: `# HELP bad_metric This a bad metric +# TYPE bad_metric bad_type +bad_metric{type="has a bad type information"} 0.0 111 +# EOF +`, + IsOk: false, + Description: "Has a bad type information.", + }, + { + ToParse: `# HELP no_nl This test has no newline so will fail +# TYPE no_nl gauge +no_nl{type="no newline"} +# EOF +`, + IsOk: false, + Description: "No newline.", + }, + } + for _, test := range tests { + t.Logf("Test:%s", test.Description) + + openMetricsFile := createTemporaryOpenMetricsFile(t, test.ToParse) + input, err := os.Open(openMetricsFile) + require.NoError(t, err) + defer func() { + require.NoError(t, input.Close()) + }() + + outputDir, err := ioutil.TempDir("", "myDir") + require.NoError(t, err) + defer func() { + require.NoError(t, os.RemoveAll(outputDir)) + }() + + err = backfill(test.MaxSamplesInAppender, input, outputDir) + + if !test.IsOk { + require.Error(t, err, test.Description) + continue + } + + require.NoError(t, err) + db, err := tsdb.Open(outputDir, nil, nil, tsdb.DefaultOptions()) + require.NoError(t, err) + defer func() { + require.NoError(t, db.Close()) + }() + + testBlocks(t, db, test.Expected.MinTime, test.Expected.MaxTime, test.Expected.Samples, test.Expected.NumBlocks) + } +} diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 8dcb02d88..7de799664 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -133,6 +133,12 @@ func main() { dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() + importCmd := tsdbCmd.Command("create-blocks-from", "[Experimental] Import samples from input and produce TSDB blocks. Please refer to the storage docs for more details.") + openMetricsImportCmd := importCmd.Command("openmetrics", "Import samples from OpenMetrics input and produce TSDB blocks. Please refer to the storage docs for more details.") + // TODO(aSquare14): add flag to set default block duration + importFilePath := openMetricsImportCmd.Arg("input file", "OpenMetrics file to read samples from.").Required().String() + importDBPath := openMetricsImportCmd.Arg("output directory", "Output directory for generated blocks.").Default(defaultDBPath).String() + parsedCmd := kingpin.MustParse(app.Parse(os.Args[1:])) var p printer @@ -188,6 +194,9 @@ func main() { case tsdbDumpCmd.FullCommand(): os.Exit(checkErr(dumpSamples(*dumpPath, *dumpMinTime, *dumpMaxTime))) + //TODO(aSquare14): Work on adding support for custom block size. + case openMetricsImportCmd.FullCommand(): + os.Exit(checkErr(backfillOpenMetrics(*importFilePath, *importDBPath))) } } diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index dfcfc320c..c2ccf82d0 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -613,3 +613,14 @@ func checkErr(err error) int { } return 0 } + +func backfillOpenMetrics(path string, outputDir string) (err error) { + input, err := os.Open(path) + if err != nil { + return err + } + defer func() { + input.Close() + }() + return backfill(5000, input, outputDir) +} diff --git a/tsdb/blockwriter.go b/tsdb/blockwriter.go index 0cd05eb77..baf87f140 100644 --- a/tsdb/blockwriter.go +++ b/tsdb/blockwriter.go @@ -40,6 +40,9 @@ type BlockWriter struct { chunkDir string } +// ErrNoSeriesAppended is returned if the series count is zero while flushing blocks. +var ErrNoSeriesAppended error = errors.New("no series appended, aborting") + // NewBlockWriter create a new block writer. // // The returned writer accumulates all the series in the Head block until `Flush` is called. @@ -88,7 +91,7 @@ func (w *BlockWriter) Appender(ctx context.Context) storage.Appender { func (w *BlockWriter) Flush(ctx context.Context) (ulid.ULID, error) { seriesCount := w.head.NumSeries() if w.head.NumSeries() == 0 { - return ulid.ULID{}, errors.New("no series appended, aborting") + return ulid.ULID{}, ErrNoSeriesAppended } mint := w.head.MinTime()