2020-01-08 05:28:43 -08:00
|
|
|
// Copyright 2020 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package logging
|
|
|
|
|
|
|
|
import (
|
fix!: stop unbounded memory usage from query log
Resolves: #15433
When I converted prometheus to use slog in #14906, I update both the
`QueryLogger` interface, as well as how the log calls to the
`QueryLogger` were built up in `promql.Engine.exec()`. The backing
logger for the `QueryLogger` in the engine is a
`util/logging.JSONFileLogger`, and it's implementation of the `With()`
method updates the logger the logger in place with the new keyvals added
onto the underlying slog.Logger, which means they get inherited onto
everything after. All subsequent calls to `With()`, even in later
queries, would continue to then append on more and more keyvals for the
various params and fields built up in the logger. In turn, this causes
unbounded growth of the logger, leading to increased memory usage, and
in at least one report was the likely cause of an OOM kill. More
information can be found in the issue and the linked slack thread.
This commit does a few things:
- It was referenced in feedback in #14906 that it would've been better
to not change the `QueryLogger` interface if possible, this PR
proposes changes that bring it closer to alignment with the pre-3.0
`QueryLogger` interface contract
- reverts `promql.Engine.exec()`'s usage of the query logger to the
pattern of building up an array of args to pass at once to the end log
call. Avoiding the repetitious calls to `.With()` are what resolve the
issue with the logger growth/memory usage.
- updates the scrape failure logger to use the update `QueryLogger`
methods in the contract.
- updates tests accordingly
- cleans up unused methods
Builds and passes tests successfully. Tested locally and confirmed I
could no longer reproduce the issue/it resolved the issue.
Signed-off-by: TJ Hoplock <t.hoplock@gmail.com>
2024-11-23 11:20:37 -08:00
|
|
|
"context"
|
|
|
|
"log/slog"
|
2020-01-08 05:28:43 -08:00
|
|
|
"os"
|
2020-01-20 05:17:11 -08:00
|
|
|
"strings"
|
2020-01-08 05:28:43 -08:00
|
|
|
"testing"
|
|
|
|
|
2022-02-12 15:58:27 -08:00
|
|
|
"github.com/grafana/regexp"
|
2020-10-29 02:43:23 -07:00
|
|
|
"github.com/stretchr/testify/require"
|
2020-01-08 05:28:43 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestJSONFileLogger_basic(t *testing.T) {
|
2022-04-27 02:24:36 -07:00
|
|
|
f, err := os.CreateTemp("", "logging")
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
2020-08-24 00:33:22 -07:00
|
|
|
defer func() {
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, f.Close())
|
|
|
|
require.NoError(t, os.Remove(f.Name()))
|
2020-08-24 00:33:22 -07:00
|
|
|
}()
|
2020-01-08 05:28:43 -08:00
|
|
|
|
|
|
|
l, err := NewJSONFileLogger(f.Name())
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, l, "logger can't be nil")
|
2020-01-08 05:28:43 -08:00
|
|
|
|
fix!: stop unbounded memory usage from query log
Resolves: #15433
When I converted prometheus to use slog in #14906, I update both the
`QueryLogger` interface, as well as how the log calls to the
`QueryLogger` were built up in `promql.Engine.exec()`. The backing
logger for the `QueryLogger` in the engine is a
`util/logging.JSONFileLogger`, and it's implementation of the `With()`
method updates the logger the logger in place with the new keyvals added
onto the underlying slog.Logger, which means they get inherited onto
everything after. All subsequent calls to `With()`, even in later
queries, would continue to then append on more and more keyvals for the
various params and fields built up in the logger. In turn, this causes
unbounded growth of the logger, leading to increased memory usage, and
in at least one report was the likely cause of an OOM kill. More
information can be found in the issue and the linked slack thread.
This commit does a few things:
- It was referenced in feedback in #14906 that it would've been better
to not change the `QueryLogger` interface if possible, this PR
proposes changes that bring it closer to alignment with the pre-3.0
`QueryLogger` interface contract
- reverts `promql.Engine.exec()`'s usage of the query logger to the
pattern of building up an array of args to pass at once to the end log
call. Avoiding the repetitious calls to `.With()` are what resolve the
issue with the logger growth/memory usage.
- updates the scrape failure logger to use the update `QueryLogger`
methods in the contract.
- updates tests accordingly
- cleans up unused methods
Builds and passes tests successfully. Tested locally and confirmed I
could no longer reproduce the issue/it resolved the issue.
Signed-off-by: TJ Hoplock <t.hoplock@gmail.com>
2024-11-23 11:20:37 -08:00
|
|
|
l.Log(context.Background(), slog.LevelInfo, "test", "hello", "world")
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
2020-01-08 05:28:43 -08:00
|
|
|
r := make([]byte, 1024)
|
|
|
|
_, err = f.Read(r)
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
2024-09-09 18:41:53 -07:00
|
|
|
|
2024-11-05 02:48:03 -08:00
|
|
|
result, err := regexp.Match(`^{"time":"[^"]+","level":"INFO","source":"file.go:\d+","msg":"test","hello":"world"}\n`, r)
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
|
|
|
require.True(t, result, "unexpected content: %s", r)
|
2020-01-08 05:28:43 -08:00
|
|
|
|
|
|
|
err = l.Close()
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
2020-01-08 05:28:43 -08:00
|
|
|
|
|
|
|
err = l.file.Close()
|
2020-10-29 02:43:23 -07:00
|
|
|
require.Error(t, err)
|
|
|
|
require.True(t, strings.HasSuffix(err.Error(), os.ErrClosed.Error()), "file not closed")
|
2020-01-20 05:17:11 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestJSONFileLogger_parallel(t *testing.T) {
|
2022-04-27 02:24:36 -07:00
|
|
|
f, err := os.CreateTemp("", "logging")
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
2020-08-24 00:33:22 -07:00
|
|
|
defer func() {
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, f.Close())
|
|
|
|
require.NoError(t, os.Remove(f.Name()))
|
2020-08-24 00:33:22 -07:00
|
|
|
}()
|
2020-01-20 05:17:11 -08:00
|
|
|
|
|
|
|
l, err := NewJSONFileLogger(f.Name())
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, l, "logger can't be nil")
|
2020-01-20 05:17:11 -08:00
|
|
|
|
fix!: stop unbounded memory usage from query log
Resolves: #15433
When I converted prometheus to use slog in #14906, I update both the
`QueryLogger` interface, as well as how the log calls to the
`QueryLogger` were built up in `promql.Engine.exec()`. The backing
logger for the `QueryLogger` in the engine is a
`util/logging.JSONFileLogger`, and it's implementation of the `With()`
method updates the logger the logger in place with the new keyvals added
onto the underlying slog.Logger, which means they get inherited onto
everything after. All subsequent calls to `With()`, even in later
queries, would continue to then append on more and more keyvals for the
various params and fields built up in the logger. In turn, this causes
unbounded growth of the logger, leading to increased memory usage, and
in at least one report was the likely cause of an OOM kill. More
information can be found in the issue and the linked slack thread.
This commit does a few things:
- It was referenced in feedback in #14906 that it would've been better
to not change the `QueryLogger` interface if possible, this PR
proposes changes that bring it closer to alignment with the pre-3.0
`QueryLogger` interface contract
- reverts `promql.Engine.exec()`'s usage of the query logger to the
pattern of building up an array of args to pass at once to the end log
call. Avoiding the repetitious calls to `.With()` are what resolve the
issue with the logger growth/memory usage.
- updates the scrape failure logger to use the update `QueryLogger`
methods in the contract.
- updates tests accordingly
- cleans up unused methods
Builds and passes tests successfully. Tested locally and confirmed I
could no longer reproduce the issue/it resolved the issue.
Signed-off-by: TJ Hoplock <t.hoplock@gmail.com>
2024-11-23 11:20:37 -08:00
|
|
|
l.Log(context.Background(), slog.LevelInfo, "test", "hello", "world")
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
2020-01-20 05:17:11 -08:00
|
|
|
|
|
|
|
l2, err := NewJSONFileLogger(f.Name())
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
|
|
|
require.NotNil(t, l, "logger can't be nil")
|
2020-01-20 05:17:11 -08:00
|
|
|
|
fix!: stop unbounded memory usage from query log
Resolves: #15433
When I converted prometheus to use slog in #14906, I update both the
`QueryLogger` interface, as well as how the log calls to the
`QueryLogger` were built up in `promql.Engine.exec()`. The backing
logger for the `QueryLogger` in the engine is a
`util/logging.JSONFileLogger`, and it's implementation of the `With()`
method updates the logger the logger in place with the new keyvals added
onto the underlying slog.Logger, which means they get inherited onto
everything after. All subsequent calls to `With()`, even in later
queries, would continue to then append on more and more keyvals for the
various params and fields built up in the logger. In turn, this causes
unbounded growth of the logger, leading to increased memory usage, and
in at least one report was the likely cause of an OOM kill. More
information can be found in the issue and the linked slack thread.
This commit does a few things:
- It was referenced in feedback in #14906 that it would've been better
to not change the `QueryLogger` interface if possible, this PR
proposes changes that bring it closer to alignment with the pre-3.0
`QueryLogger` interface contract
- reverts `promql.Engine.exec()`'s usage of the query logger to the
pattern of building up an array of args to pass at once to the end log
call. Avoiding the repetitious calls to `.With()` are what resolve the
issue with the logger growth/memory usage.
- updates the scrape failure logger to use the update `QueryLogger`
methods in the contract.
- updates tests accordingly
- cleans up unused methods
Builds and passes tests successfully. Tested locally and confirmed I
could no longer reproduce the issue/it resolved the issue.
Signed-off-by: TJ Hoplock <t.hoplock@gmail.com>
2024-11-23 11:20:37 -08:00
|
|
|
l2.Log(context.Background(), slog.LevelInfo, "test", "hello", "world")
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
2020-01-20 05:17:11 -08:00
|
|
|
|
|
|
|
err = l.Close()
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
2020-01-20 05:17:11 -08:00
|
|
|
|
|
|
|
err = l.file.Close()
|
2020-10-29 02:43:23 -07:00
|
|
|
require.Error(t, err)
|
|
|
|
require.True(t, strings.HasSuffix(err.Error(), os.ErrClosed.Error()), "file not closed")
|
2020-01-20 05:17:11 -08:00
|
|
|
|
|
|
|
err = l2.Close()
|
2020-10-29 02:43:23 -07:00
|
|
|
require.NoError(t, err)
|
2020-01-20 05:17:11 -08:00
|
|
|
|
|
|
|
err = l2.file.Close()
|
2020-10-29 02:43:23 -07:00
|
|
|
require.Error(t, err)
|
|
|
|
require.True(t, strings.HasSuffix(err.Error(), os.ErrClosed.Error()), "file not closed")
|
2020-01-08 05:28:43 -08:00
|
|
|
}
|