Merge branch 'main' into 3.0-main-sync-24-08-30

using -Xours Signed-off-by: Jan Fajerski <jfajersk@redhat.com>
2025-03-05 20:59:13 -08:00 · 2024-09-02 11:26:38 +02:00 · 2024-09-02 11:26:38 +02:00 · 00315ce15e
parent b860327989 d63f5b35df
commit 00315ce15e
114 changed files with 4958 additions and 1070 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -190,7 +190,7 @@ jobs:
        with:
          args: --verbose
          # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml.
-          version: v1.60.1
+          version: v1.60.2
  fuzzing:
    uses: ./.github/workflows/fuzzing.yml
    if: github.event_name == 'pull_request'
--- a/.golangci.yml
+++ b/.golangci.yml
@ -25,15 +25,34 @@ linters:
    - loggercheck

 issues:
+  max-issues-per-linter: 0
  max-same-issues: 0
+  # The default exclusions are too aggressive. For one, they
+  # essentially disable any linting on doc comments. We disable
+  # default exclusions here and add exclusions fitting our codebase
+  # further down.
+  exclude-use-default: false
  exclude-files:
    # Skip autogenerated files.
    - ^.*\.(pb|y)\.go$
  exclude-dirs:
-    # Copied it from a different source
+    # Copied it from a different source.
    - storage/remote/otlptranslator/prometheusremotewrite
    - storage/remote/otlptranslator/prometheus
  exclude-rules:
+    - linters:
+        - errcheck
+      # Taken from the default exclusions (that are otherwise disabled above).
+      text: Error return value of .((os\.)?std(out|err)\..*|.*Close|.*Flush|os\.Remove(All)?|.*print(f|ln)?|os\.(Un)?Setenv). is not checked
+    - linters:
+        - govet
+      # We use many Seek methods that do not follow the usual pattern.
+      text: "stdmethods: method Seek.* should have signature Seek"
+    - linters:
+        - revive
+      # We have stopped at some point to write doc comments on exported symbols.
+      # TODO(beorn7): Maybe we should enforce this again? There are ~500 offenders right now.
+      text: exported (.+) should have comment( \(or a comment on this block\))? or be unexported
    - linters:
        - gocritic
      text: "appendAssign"
@ -94,15 +113,14 @@ linters-settings:
    errorf: false
  revive:
    # By default, revive will enable only the linting rules that are named in the configuration file.
-    # So, it's needed to explicitly set in configuration all required rules.
-    # The following configuration enables all the rules from the defaults.toml
-    # https://github.com/mgechev/revive/blob/master/defaults.toml
+    # So, it's needed to explicitly enable all required rules here.
    rules:
      # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md
      - name: blank-imports
+      - name: comment-spacings
      - name: context-as-argument
        arguments:
-          # allow functions with test or bench signatures
+          # Allow functions with test or bench signatures.
          - allowTypesBefore: "*testing.T,testing.TB"
      - name: context-keys-type
      - name: dot-imports
@ -118,6 +136,8 @@ linters-settings:
      - name: increment-decrement
      - name: indent-error-flow
      - name: package-comments
+        # TODO(beorn7): Currently, we have a lot of missing package doc comments. Maybe we should have them.
+        disabled: true
      - name: range
      - name: receiver-naming
      - name: redefines-builtin-id
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -11,9 +11,18 @@ _Please add changes here that are only in the release-3.0 branch. These will be
 ## unreleased

 * [FEATURE] OTLP receiver: Add new option `otlp.promote_resource_attributes`, for any OTel resource attributes that should be promoted to metric labels. #14200
+* [ENHANCEMENT] OTLP receiver: Warn when encountering exponential histograms with zero count and non-zero sum. #14706
 * [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042

-## 2.54.0-rc.1 / 2024-08-05
+## 2.54.1 / 2024-08-27
+
+* [BUGFIX] Scraping: allow multiple samples on same series, with explicit timestamps. #14685
+* [BUGFIX] Docker SD: fix crash in `match_first_network` mode when container is reconnected to a new network. #14654
+* [BUGFIX] PromQL: fix experimental native histograms getting corrupted due to vector selector bug in range queries. #14538
+* [BUGFIX] PromQL: fix experimental native histogram counter reset detection on stale samples. #14514
+* [BUGFIX] PromQL: fix native histograms getting corrupted due to vector selector bug in range queries. #14605
+
+## 2.54.0 / 2024-08-09

 Release 2.54 brings a release candidate of a major new version of [Remote Write: 2.0](https://prometheus.io/docs/specs/remote_write_spec_2_0/).
 This is experimental at this time and may still change.
@ -42,6 +51,7 @@ Remote-write v2 is enabled by default, but can be disabled via feature-flag `web
 * [ENHANCEMENT] Notifier: Send any outstanding Alertmanager notifications when shutting down. #14290
 * [ENHANCEMENT] Rules: Add label-matcher support to Rules API. #10194
 * [ENHANCEMENT] HTTP API: Add url to message logged on error while sending response. #14209
+* [BUGFIX] TSDB: Exclude OOO chunks mapped after compaction starts (introduced by #14396). #14584
 * [BUGFIX] CLI: escape `|` characters when generating docs. #14420
 * [BUGFIX] PromQL (experimental native histograms): Fix some binary operators between native histogram values. #14454
 * [BUGFIX] TSDB: LabelNames API could fail during compaction. #14279
--- a/Makefile.common
+++ b/Makefile.common
@ -61,7 +61,7 @@ PROMU_URL     := https://github.com/prometheus/promu/releases/download/v$(PROMU_
 SKIP_GOLANGCI_LINT :=
 GOLANGCI_LINT :=
 GOLANGCI_LINT_OPTS ?=
-GOLANGCI_LINT_VERSION ?= v1.60.1
+GOLANGCI_LINT_VERSION ?= v1.60.2
 # golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64.
 # windows isn't included here because of the path separator being different.
 ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin))
--- a/cmd/prometheus/main.go
+++ b/cmd/prometheus/main.go
@ -169,6 +169,8 @@ type flagConfig struct {
 	corsRegexString string

 	promlogConfig promlog.Config
+
+	promqlEnableDelayedNameRemoval bool
 }

 // setFeatureListOptions sets the corresponding options from the featureList.
@ -235,6 +237,9 @@ func (c *flagConfig) setFeatureListOptions(logger log.Logger) error {
 			case "delayed-compaction":
 				c.tsdb.EnableDelayedCompaction = true
 				level.Info(logger).Log("msg", "Experimental delayed compaction is enabled.")
+			case "promql-delayed-name-removal":
+				c.promqlEnableDelayedNameRemoval = true
+				level.Info(logger).Log("msg", "Experimental PromQL delayed name removal enabled.")
 			case "utf8-names":
 				model.NameValidationScheme = model.UTF8Validation
 				level.Info(logger).Log("msg", "Experimental UTF-8 support enabled")
@ -287,8 +292,8 @@ func main() {
 	a.Flag("config.file", "Prometheus configuration file path.").
 		Default("prometheus.yml").StringVar(&cfg.configFile)

-	a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry.").
-		Default("0.0.0.0:9090").StringVar(&cfg.web.ListenAddress)
+	a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry. Can be repeated.").
+		Default("0.0.0.0:9090").StringsVar(&cfg.web.ListenAddresses)

 	a.Flag("auto-gomemlimit.ratio", "The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory").
 		Default("0.9").FloatVar(&cfg.memlimitRatio)
@ -302,7 +307,7 @@ func main() {
 		"Maximum duration before timing out read of the request, and closing idle connections.").
 		Default("5m").SetValue(&cfg.webTimeout)

-	a.Flag("web.max-connections", "Maximum number of simultaneous connections.").
+	a.Flag("web.max-connections", "Maximum number of simultaneous connections across all listeners.").
 		Default("512").IntVar(&cfg.web.MaxConnections)

 	a.Flag("web.external-url",
@ -461,6 +466,8 @@ func main() {
 	a.Flag("scrape.discovery-reload-interval", "Interval used by scrape manager to throttle target groups updates.").
 		Hidden().Default("5s").SetValue(&cfg.scrape.DiscoveryReloadInterval)

+	a.Flag("scrape.name-escaping-scheme", `Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots".`).Default(scrape.DefaultNameEscapingScheme.String()).StringVar(&cfg.nameEscapingScheme)
+
 	a.Flag("enable-feature", "Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details.").
 		Default("").StringsVar(&cfg.featureList)

@ -518,7 +525,7 @@ func main() {
 		localStoragePath = cfg.agentStoragePath
 	}

-	cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddress)
+	cfg.web.ExternalURL, err = computeExternalURL(cfg.prometheusURL, cfg.web.ListenAddresses[0])
 	if err != nil {
 		fmt.Fprintln(os.Stderr, fmt.Errorf("parse external URL %q: %w", cfg.prometheusURL, err))
 		os.Exit(2)
@ -761,6 +768,7 @@ func main() {
 			EnableAtModifier:         true,
 			EnableNegativeOffset:     true,
 			EnablePerStepStats:       cfg.enablePerStepStats,
+			EnableDelayedNameRemoval: cfg.promqlEnableDelayedNameRemoval,
 		}

 		queryEngine = promql.NewEngine(opts)
@ -949,9 +957,9 @@ func main() {
 		})
 	}

-	listener, err := webHandler.Listener()
+	listeners, err := webHandler.Listeners()
 	if err != nil {
-		level.Error(logger).Log("msg", "Unable to start web listener", "err", err)
+		level.Error(logger).Log("msg", "Unable to start web listeners", "err", err)
 		os.Exit(1)
 	}

@ -1246,7 +1254,7 @@ func main() {
 		// Web handler.
 		g.Add(
 			func() error {
-				if err := webHandler.Run(ctxWeb, listener, *webConfig); err != nil {
+				if err := webHandler.Run(ctxWeb, listeners, *webConfig); err != nil {
 					return fmt.Errorf("error starting web server: %w", err)
 				}
 				return nil
--- a/cmd/promtool/backfill.go
+++ b/cmd/promtool/backfill.go
@ -85,7 +85,7 @@ func getCompatibleBlockDuration(maxBlockDuration int64) int64 {
 	return blockDuration
 }

-func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool) (returnErr error) {
+func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesInAppender int, outputDir string, humanReadable, quiet bool, customLabels map[string]string) (returnErr error) {
 	blockDuration := getCompatibleBlockDuration(maxBlockDuration)
 	mint = blockDuration * (mint / blockDuration)

@ -102,6 +102,8 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
 		nextSampleTs int64 = math.MaxInt64
 	)

+	lb := labels.NewBuilder(labels.EmptyLabels())
+
 	for t := mint; t <= maxt; t += blockDuration {
 		tsUpper := t + blockDuration
 		if nextSampleTs != math.MaxInt64 && nextSampleTs >= tsUpper {
@ -162,7 +164,13 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
 				l := labels.Labels{}
 				p.Metric(&l)

-				if _, err := app.Append(0, l, *ts, v); err != nil {
+				lb.Reset(l)
+				for name, value := range customLabels {
+					lb.Set(name, value)
+				}
+				lbls := lb.Labels()
+
+				if _, err := app.Append(0, lbls, *ts, v); err != nil {
 					return fmt.Errorf("add sample: %w", err)
 				}

@ -221,13 +229,13 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn
 	return nil
 }

-func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) (err error) {
+func backfill(maxSamplesInAppender int, input []byte, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration, customLabels map[string]string) (err error) {
 	p := textparse.NewOpenMetricsParser(input, nil) // Don't need a SymbolTable to get max and min timestamps.
 	maxt, mint, err := getMinAndMaxTimestamps(p)
 	if err != nil {
 		return fmt.Errorf("getting min and max timestamp: %w", err)
 	}
-	if err = createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet); err != nil {
+	if err = createBlocks(input, mint, maxt, int64(maxBlockDuration/time.Millisecond), maxSamplesInAppender, outputDir, humanReadable, quiet, customLabels); err != nil {
 		return fmt.Errorf("block creation: %w", err)
 	}
 	return nil
--- a/cmd/promtool/backfill_test.go
+++ b/cmd/promtool/backfill_test.go
@ -92,6 +92,7 @@ func TestBackfill(t *testing.T) {
 		Description          string
 		MaxSamplesInAppender int
 		MaxBlockDuration     time.Duration
+		Labels               map[string]string
 		Expected             struct {
 			MinTime       int64
 			MaxTime       int64
@ -636,6 +637,49 @@ http_requests_total{code="400"} 1024 7199
 				},
 			},
 		},
+		{
+			ToParse: `# HELP http_requests_total The total number of HTTP requests.
+# TYPE http_requests_total counter
+http_requests_total{code="200"} 1 1624463088.000
+http_requests_total{code="200"} 2 1629503088.000
+http_requests_total{code="200"} 3 1629863088.000
+# EOF
+`,
+			IsOk:                 true,
+			Description:          "Sample with external labels.",
+			MaxSamplesInAppender: 5000,
+			MaxBlockDuration:     2048 * time.Hour,
+			Labels:               map[string]string{"cluster_id": "123", "org_id": "999"},
+			Expected: struct {
+				MinTime       int64
+				MaxTime       int64
+				NumBlocks     int
+				BlockDuration int64
+				Samples       []backfillSample
+			}{
+				MinTime:       1624463088000,
+				MaxTime:       1629863088000,
+				NumBlocks:     2,
+				BlockDuration: int64(1458 * time.Hour / time.Millisecond),
+				Samples: []backfillSample{
+					{
+						Timestamp: 1624463088000,
+						Value:     1,
+						Labels:    labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
+					},
+					{
+						Timestamp: 1629503088000,
+						Value:     2,
+						Labels:    labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
+					},
+					{
+						Timestamp: 1629863088000,
+						Value:     3,
+						Labels:    labels.FromStrings("__name__", "http_requests_total", "code", "200", "cluster_id", "123", "org_id", "999"),
+					},
+				},
+			},
+		},
 		{
 			ToParse: `# HELP rpc_duration_seconds A summary of the RPC duration in seconds.
 # TYPE rpc_duration_seconds summary
@ -689,7 +733,7 @@ after_eof 1 2

 			outputDir := t.TempDir()

-			err := backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration)
+			err := backfill(test.MaxSamplesInAppender, []byte(test.ToParse), outputDir, false, false, test.MaxBlockDuration, test.Labels)

 			if !test.IsOk {
 				require.Error(t, err, test.Description)
--- a/cmd/promtool/main.go
+++ b/cmd/promtool/main.go
@ -253,6 +253,7 @@ func main() {
 	importQuiet := importCmd.Flag("quiet", "Do not print created blocks.").Short('q').Bool()
 	maxBlockDuration := importCmd.Flag("max-block-duration", "Maximum duration created blocks may span. Anything less than 2h is ignored.").Hidden().PlaceHolder("<duration>").Duration()
 	openMetricsImportCmd := importCmd.Command("openmetrics", "Import samples from OpenMetrics input and produce TSDB blocks. Please refer to the storage docs for more details.")
+	openMetricsLabels := openMetricsImportCmd.Flag("label", "Label to attach to metrics. Can be specified multiple times. Example --label=label_name=label_value").StringMap()
 	importFilePath := openMetricsImportCmd.Arg("input file", "OpenMetrics file to read samples from.").Required().String()
 	importDBPath := openMetricsImportCmd.Arg("output directory", "Output directory for generated blocks.").Default(defaultDBPath).String()
 	importRulesCmd := importCmd.Command("rules", "Create blocks of data for new recording rules.")
@ -406,7 +407,7 @@ func main() {
 		os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsSandboxDirRoot, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics)))
 	// TODO(aSquare14): Work on adding support for custom block size.
 	case openMetricsImportCmd.FullCommand():
-		os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration))
+		os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration, *openMetricsLabels))

 	case importRulesCmd.FullCommand():
 		os.Exit(checkErr(importRules(serverURL, httpRoundTripper, *importRulesStart, *importRulesEnd, *importRulesOutputDir, *importRulesEvalInterval, *maxBlockDuration, *importRulesFiles...)))
@ -469,7 +470,7 @@ func (ls lintConfig) lintDuplicateRules() bool {
 	return ls.all || ls.duplicateRules
 }

-// Check server status - healthy & ready.
+// CheckServerStatus - healthy & ready.
 func CheckServerStatus(serverURL *url.URL, checkEndpoint string, roundTripper http.RoundTripper) error {
 	if serverURL.Scheme == "" {
 		serverURL.Scheme = "http"
--- a/cmd/promtool/metrics.go
+++ b/cmd/promtool/metrics.go
@ -31,7 +31,7 @@ import (
 	"github.com/prometheus/prometheus/util/fmtutil"
 )

-// Push metrics to a prometheus remote write (for testing purpose only).
+// PushMetrics to a prometheus remote write (for testing purpose only).
 func PushMetrics(url *url.URL, roundTripper http.RoundTripper, headers map[string]string, timeout time.Duration, labels map[string]string, files ...string) int {
 	addressURL, err := url.Parse(url.String())
 	if err != nil {
--- a/cmd/promtool/tsdb.go
+++ b/cmd/promtool/tsdb.go
@ -823,7 +823,7 @@ func checkErr(err error) int {
 	return 0
 }

-func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration) int {
+func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxBlockDuration time.Duration, customLabels map[string]string) int {
 	inputFile, err := fileutil.OpenMmapFile(path)
 	if err != nil {
 		return checkErr(err)
@ -834,7 +834,7 @@ func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxB
 		return checkErr(fmt.Errorf("create output dir: %w", err))
 	}

-	return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration))
+	return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration, customLabels))
 }

 func displayHistogram(dataType string, datas []int, total int) {
--- a/cmd/promtool/tsdb_test.go
+++ b/cmd/promtool/tsdb_test.go
@ -186,7 +186,7 @@ func TestTSDBDumpOpenMetricsRoundTrip(t *testing.T) {

 	dbDir := t.TempDir()
 	// Import samples from OM format
-	err = backfill(5000, initialMetrics, dbDir, false, false, 2*time.Hour)
+	err = backfill(5000, initialMetrics, dbDir, false, false, 2*time.Hour, map[string]string{})
 	require.NoError(t, err)
 	db, err := tsdb.Open(dbDir, nil, nil, tsdb.DefaultOptions(), nil)
 	require.NoError(t, err)
--- a/config/config.go
+++ b/config/config.go
@ -221,6 +221,7 @@ var (
 	// DefaultRemoteReadConfig is the default remote read configuration.
 	DefaultRemoteReadConfig = RemoteReadConfig{
 		RemoteTimeout:        model.Duration(1 * time.Minute),
+		ChunkedReadLimit:     DefaultChunkedReadLimit,
 		HTTPClientConfig:     config.DefaultHTTPClientConfig,
 		FilterExternalLabels: true,
 	}
@ -781,7 +782,9 @@ func (c *ScrapeConfig) Validate(globalConfig GlobalConfig) error {
 	default:
 		return fmt.Errorf("unknown name validation method specified, must be either 'legacy' or 'utf8', got %s", globalConfig.MetricNameValidationScheme)
 	}
+	if c.MetricNameValidationScheme == "" {
 		c.MetricNameValidationScheme = globalConfig.MetricNameValidationScheme
+	}

 	return nil
 }
@ -1277,10 +1280,17 @@ type MetadataConfig struct {
 	MaxSamplesPerSend int `yaml:"max_samples_per_send,omitempty"`
 }

+const (
+	// DefaultChunkedReadLimit is the default value for the maximum size of the protobuf frame client allows.
+	// 50MB is the default. This is equivalent to ~100k full XOR chunks and average labelset.
+	DefaultChunkedReadLimit = 5e+7
+)
+
 // RemoteReadConfig is the configuration for reading from remote storage.
 type RemoteReadConfig struct {
 	URL              *config.URL       `yaml:"url"`
 	RemoteTimeout    model.Duration    `yaml:"remote_timeout,omitempty"`
+	ChunkedReadLimit uint64            `yaml:"chunked_read_limit,omitempty"`
 	Headers          map[string]string `yaml:"headers,omitempty"`
 	ReadRecent       bool              `yaml:"read_recent,omitempty"`
 	Name             string            `yaml:"name,omitempty"`
--- a/config/config_test.go
+++ b/config/config_test.go
@ -16,6 +16,7 @@ package config
 import (
 	"crypto/tls"
 	"encoding/json"
+	"fmt"
 	"net/url"
 	"os"
 	"path/filepath"
@ -166,6 +167,7 @@ var expectedConf = &Config{
 		{
 			URL:              mustParseURL("http://remote1/read"),
 			RemoteTimeout:    model.Duration(1 * time.Minute),
+			ChunkedReadLimit: DefaultChunkedReadLimit,
 			ReadRecent:       true,
 			Name:             "default",
 			HTTPClientConfig: config.HTTPClientConfig{
@ -177,6 +179,7 @@ var expectedConf = &Config{
 		{
 			URL:              mustParseURL("http://remote3/read"),
 			RemoteTimeout:    model.Duration(1 * time.Minute),
+			ChunkedReadLimit: DefaultChunkedReadLimit,
 			ReadRecent:       false,
 			Name:             "read_special",
 			RequiredMatchers: model.LabelSet{"job": "special"},
@ -2300,3 +2303,52 @@ func TestScrapeConfigDisableCompression(t *testing.T) {

 	require.False(t, got.ScrapeConfigs[0].EnableCompression)
 }
+
+func TestScrapeConfigNameValidationSettings(t *testing.T) {
+	model.NameValidationScheme = model.UTF8Validation
+	defer func() {
+		model.NameValidationScheme = model.LegacyValidation
+	}()
+
+	tests := []struct {
+		name         string
+		inputFile    string
+		expectScheme string
+	}{
+		{
+			name:         "blank config implies default",
+			inputFile:    "scrape_config_default_validation_mode",
+			expectScheme: "",
+		},
+		{
+			name:         "global setting implies local settings",
+			inputFile:    "scrape_config_global_validation_mode",
+			expectScheme: "utf8",
+		},
+		{
+			name:         "local setting",
+			inputFile:    "scrape_config_local_validation_mode",
+			expectScheme: "utf8",
+		},
+		{
+			name:         "local setting overrides global setting",
+			inputFile:    "scrape_config_local_global_validation_mode",
+			expectScheme: "legacy",
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			want, err := LoadFile(fmt.Sprintf("testdata/%s.yml", tc.inputFile), false, false, log.NewNopLogger())
+			require.NoError(t, err)
+
+			out, err := yaml.Marshal(want)
+
+			require.NoError(t, err)
+			got := &Config{}
+			require.NoError(t, yaml.UnmarshalStrict(out, got))
+
+			require.Equal(t, tc.expectScheme, got.ScrapeConfigs[0].MetricNameValidationScheme)
+		})
+	}
+}
--- a/config/testdata/scrape_config_default_validation_mode.yml
+++ b/config/testdata/scrape_config_default_validation_mode.yml
@ -0,0 +1,2 @@
+scrape_configs:
+  - job_name: prometheus
--- a/config/testdata/scrape_config_global_validation_mode.yml
+++ b/config/testdata/scrape_config_global_validation_mode.yml
@ -0,0 +1,4 @@
+global:
+  metric_name_validation_scheme: utf8
+scrape_configs:
+  - job_name: prometheus
--- a/config/testdata/scrape_config_local_global_validation_mode.yml
+++ b/config/testdata/scrape_config_local_global_validation_mode.yml
@ -0,0 +1,5 @@
+global:
+  metric_name_validation_scheme: utf8
+scrape_configs:
+  - job_name: prometheus
+    metric_name_validation_scheme: legacy
--- a/config/testdata/scrape_config_local_validation_mode.yml
+++ b/config/testdata/scrape_config_local_validation_mode.yml
@ -0,0 +1,3 @@
+scrape_configs:
+  - job_name: prometheus
+    metric_name_validation_scheme: utf8
--- a/discovery/discoverer_metrics_noop.go
+++ b/discovery/discoverer_metrics_noop.go
@ -13,7 +13,7 @@

 package discovery

-// Create a dummy metrics struct, because this SD doesn't have any metrics.
+// NoopDiscovererMetrics creates a dummy metrics struct, because this SD doesn't have any metrics.
 type NoopDiscovererMetrics struct{}

 var _ DiscovererMetrics = (*NoopDiscovererMetrics)(nil)
--- a/discovery/discovery.go
+++ b/discovery/discovery.go
@ -39,7 +39,7 @@ type Discoverer interface {
 	Run(ctx context.Context, up chan<- []*targetgroup.Group)
 }

-// Internal metrics of service discovery mechanisms.
+// DiscovererMetrics are internal metrics of service discovery mechanisms.
 type DiscovererMetrics interface {
 	Register() error
 	Unregister()
@ -56,7 +56,7 @@ type DiscovererOptions struct {
 	HTTPClientOptions []config.HTTPClientOption
 }

-// Metrics used by the "refresh" package.
+// RefreshMetrics are used by the "refresh" package.
 // We define them here in the "discovery" package in order to avoid a cyclic dependency between
 // "discovery" and "refresh".
 type RefreshMetrics struct {
@ -64,17 +64,18 @@ type RefreshMetrics struct {
 	Duration prometheus.Observer
 }

-// Instantiate the metrics used by the "refresh" package.
+// RefreshMetricsInstantiator instantiates the metrics used by the "refresh" package.
 type RefreshMetricsInstantiator interface {
 	Instantiate(mech string) *RefreshMetrics
 }

-// An interface for registering, unregistering, and instantiating metrics for the "refresh" package.
-// Refresh metrics are registered and unregistered outside of the service discovery mechanism.
-// This is so that the same metrics can be reused across different service discovery mechanisms.
-// To manage refresh metrics inside the SD mechanism, we'd need to use const labels which are
-// specific to that SD. However, doing so would also expose too many unused metrics on
-// the Prometheus /metrics endpoint.
+// RefreshMetricsManager is an interface for registering, unregistering, and
+// instantiating metrics for the "refresh" package. Refresh metrics are
+// registered and unregistered outside of the service discovery mechanism. This
+// is so that the same metrics can be reused across different service discovery
+// mechanisms. To manage refresh metrics inside the SD mechanism, we'd need to
+// use const labels which are specific to that SD. However, doing so would also
+// expose too many unused metrics on the Prometheus /metrics endpoint.
 type RefreshMetricsManager interface {
 	DiscovererMetrics
 	RefreshMetricsInstantiator
@ -145,7 +146,8 @@ func (c StaticConfig) NewDiscoverer(DiscovererOptions) (Discoverer, error) {
 	return staticDiscoverer(c), nil
 }

-// No metrics are needed for this service discovery mechanism.
+// NewDiscovererMetrics returns NoopDiscovererMetrics because no metrics are
+// needed for this service discovery mechanism.
 func (c StaticConfig) NewDiscovererMetrics(prometheus.Registerer, RefreshMetricsInstantiator) DiscovererMetrics {
 	return &NoopDiscovererMetrics{}
 }
--- a/discovery/manager.go
+++ b/discovery/manager.go
@ -64,7 +64,7 @@ func (p *Provider) Config() interface{} {
 	return p.config
 }

-// Registers the metrics needed for SD mechanisms.
+// CreateAndRegisterSDMetrics registers the metrics needed for SD mechanisms.
 // Does not register the metrics for the Discovery Manager.
 // TODO(ptodev): Add ability to unregister the metrics?
 func CreateAndRegisterSDMetrics(reg prometheus.Registerer) (map[string]DiscovererMetrics, error) {
@ -213,8 +213,6 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {

 	var (
 		wg           sync.WaitGroup
-		// keep shows if we keep any providers after reload.
-		keep         bool
 		newProviders []*Provider
 	)
 	for _, prov := range m.providers {
@ -228,13 +226,12 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
 			continue
 		}
 		newProviders = append(newProviders, prov)
-		// refTargets keeps reference targets used to populate new subs' targets
+		// refTargets keeps reference targets used to populate new subs' targets as they should be the same.
 		var refTargets map[string]*targetgroup.Group
 		prov.mu.Lock()

 		m.targetsMtx.Lock()
 		for s := range prov.subs {
-			keep = true
 			refTargets = m.targets[poolKey{s, prov.name}]
 			// Remove obsolete subs' targets.
 			if _, ok := prov.newSubs[s]; !ok {
@ -267,7 +264,9 @@ func (m *Manager) ApplyConfig(cfg map[string]Configs) error {
 	// While startProvider does pull the trigger, it may take some time to do so, therefore
 	// we pull the trigger as soon as possible so that downstream managers can populate their state.
 	// See https://github.com/prometheus/prometheus/pull/8639 for details.
-	if keep {
+	// This also helps making the downstream managers drop stale targets as soon as possible.
+	// See https://github.com/prometheus/prometheus/pull/13147 for details.
+	if len(m.providers) > 0 {
 		select {
 		case m.triggerSend <- struct{}{}:
 		default:
@ -288,7 +287,9 @@ func (m *Manager) StartCustomProvider(ctx context.Context, name string, worker D
 			name: {},
 		},
 	}
+	m.mtx.Lock()
 	m.providers = append(m.providers, p)
+	m.mtx.Unlock()
 	m.startProvider(ctx, p)
 }

@ -403,19 +404,33 @@ func (m *Manager) allGroups() map[string][]*targetgroup.Group {
 	tSets := map[string][]*targetgroup.Group{}
 	n := map[string]int{}

+	m.mtx.RLock()
 	m.targetsMtx.Lock()
-	defer m.targetsMtx.Unlock()
-	for pkey, tsets := range m.targets {
+	for _, p := range m.providers {
+		p.mu.RLock()
+		for s := range p.subs {
+			// Send empty lists for subs without any targets to make sure old stale targets are dropped by consumers.
+			// See: https://github.com/prometheus/prometheus/issues/12858 for details.
+			if _, ok := tSets[s]; !ok {
+				tSets[s] = []*targetgroup.Group{}
+				n[s] = 0
+			}
+			if tsets, ok := m.targets[poolKey{s, p.name}]; ok {
 				for _, tg := range tsets {
-			// Even if the target group 'tg' is empty we still need to send it to the 'Scrape manager'
-			// to signal that it needs to stop all scrape loops for this target set.
-			tSets[pkey.setName] = append(tSets[pkey.setName], tg)
-			n[pkey.setName] += len(tg.Targets)
+					tSets[s] = append(tSets[s], tg)
+					n[s] += len(tg.Targets)
 				}
 			}
+		}
+		p.mu.RUnlock()
+	}
+	m.targetsMtx.Unlock()
+	m.mtx.RUnlock()
+
 	for setName, v := range n {
 		m.metrics.DiscoveredTargets.WithLabelValues(setName).Set(float64(v))
 	}
+
 	return tSets
 }

--- a/discovery/manager_test.go
+++ b/discovery/manager_test.go
@ -939,11 +939,13 @@ func TestTargetSetTargetGroupsPresentOnConfigChange(t *testing.T) {
 	discoveryManager.ApplyConfig(c)

 	// Original targets should be present as soon as possible.
+	// An empty list should be sent for prometheus2 to drop any stale targets
 	syncedTargets = <-discoveryManager.SyncCh()
 	mu.Unlock()
-	require.Len(t, syncedTargets, 1)
+	require.Len(t, syncedTargets, 2)
 	verifySyncedPresence(t, syncedTargets, "prometheus", "{__address__=\"foo:9090\"}", true)
 	require.Len(t, syncedTargets["prometheus"], 1)
+	require.Empty(t, syncedTargets["prometheus2"])

 	// prometheus2 configs should be ready on second sync.
 	syncedTargets = <-discoveryManager.SyncCh()
@ -1275,6 +1277,7 @@ func TestCoordinationWithReceiver(t *testing.T) {
 								Targets: []model.LabelSet{{"__instance__": "1"}},
 							},
 						},
+						"mock1": {},
 					},
 				},
 				{
--- a/discovery/metrics_refresh.go
+++ b/discovery/metrics_refresh.go
@ -17,7 +17,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 )

-// Metric vectors for the "refresh" package.
+// RefreshMetricsVecs are metric vectors for the "refresh" package.
 // We define them here in the "discovery" package in order to avoid a cyclic dependency between
 // "discovery" and "refresh".
 type RefreshMetricsVecs struct {
--- a/discovery/moby/docker.go
+++ b/discovery/moby/docker.go
@ -19,6 +19,7 @@ import (
 	"net"
 	"net/http"
 	"net/url"
+	"sort"
 	"strconv"
 	"time"

@ -251,28 +252,26 @@ func (d *DockerDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er
 		}

 		if d.matchFirstNetwork && len(networks) > 1 {
-			// Match user defined network
-			if containerNetworkMode.IsUserDefined() {
-				networkMode := string(containerNetworkMode)
-				networks = map[string]*network.EndpointSettings{networkMode: networks[networkMode]}
-			} else {
-				// Get first network if container network mode has "none" value.
-				// This case appears under certain condition:
-				// 1. Container created with network set to "--net=none".
-				// 2. Disconnect network "none".
-				// 3. Reconnect network with user defined networks.
-				var first string
+			// Sort networks by name and take first non-nil network.
+			keys := make([]string, 0, len(networks))
 			for k, n := range networks {
 				if n != nil {
-						first = k
-						break
+					keys = append(keys, k)
 				}
 			}
-				networks = map[string]*network.EndpointSettings{first: networks[first]}
+			if len(keys) > 0 {
+				sort.Strings(keys)
+				firstNetworkMode := keys[0]
+				firstNetwork := networks[firstNetworkMode]
+				networks = map[string]*network.EndpointSettings{firstNetworkMode: firstNetwork}
 			}
 		}

 		for _, n := range networks {
+			if n == nil {
+				continue
+			}
+
 			var added bool

 			for _, p := range c.Ports {
--- a/discovery/moby/docker_test.go
+++ b/discovery/moby/docker_test.go
@ -60,9 +60,9 @@ host: %s
 	tg := tgs[0]
 	require.NotNil(t, tg)
 	require.NotNil(t, tg.Targets)
-	require.Len(t, tg.Targets, 6)
+	require.Len(t, tg.Targets, 8)

-	for i, lbls := range []model.LabelSet{
+	expected := []model.LabelSet{
 		{
 			"__address__":                "172.19.0.2:9100",
 			"__meta_docker_container_id": "c301b928faceb1a18fe379f6bc178727ef920bb30b0f9b8592b32b36255a0eca",
@ -163,7 +163,43 @@ host: %s
 			"__meta_docker_network_scope":                              "local",
 			"__meta_docker_port_private":                               "9104",
 		},
-	} {
+		{
+			"__address__":                "172.20.0.3:3306",
+			"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+			"__meta_docker_container_label_com_docker_compose_project": "dockersd",
+			"__meta_docker_container_label_com_docker_compose_service": "mysql",
+			"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+			"__meta_docker_container_name":                             "/dockersd_multi_networks",
+			"__meta_docker_container_network_mode":                     "dockersd_private_none",
+			"__meta_docker_network_id":                                 "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
+			"__meta_docker_network_ingress":                            "false",
+			"__meta_docker_network_internal":                           "false",
+			"__meta_docker_network_ip":                                 "172.20.0.3",
+			"__meta_docker_network_name":                               "dockersd_private",
+			"__meta_docker_network_scope":                              "local",
+			"__meta_docker_port_private":                               "3306",
+		},
+		{
+			"__address__":                "172.20.0.3:33060",
+			"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+			"__meta_docker_container_label_com_docker_compose_project": "dockersd",
+			"__meta_docker_container_label_com_docker_compose_service": "mysql",
+			"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+			"__meta_docker_container_name":                             "/dockersd_multi_networks",
+			"__meta_docker_container_network_mode":                     "dockersd_private_none",
+			"__meta_docker_network_id":                                 "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
+			"__meta_docker_network_ingress":                            "false",
+			"__meta_docker_network_internal":                           "false",
+			"__meta_docker_network_ip":                                 "172.20.0.3",
+			"__meta_docker_network_name":                               "dockersd_private",
+			"__meta_docker_network_scope":                              "local",
+			"__meta_docker_port_private":                               "33060",
+		},
+	}
+	sortFunc(expected)
+	sortFunc(tg.Targets)
+
+	for i, lbls := range expected {
 		t.Run(fmt.Sprintf("item %d", i), func(t *testing.T) {
 			require.Equal(t, lbls, tg.Targets[i])
 		})
@ -202,13 +238,8 @@ host: %s
 	tg := tgs[0]
 	require.NotNil(t, tg)
 	require.NotNil(t, tg.Targets)
-	require.Len(t, tg.Targets, 9)
+	require.Len(t, tg.Targets, 13)

-	sortFunc := func(labelSets []model.LabelSet) {
-		sort.Slice(labelSets, func(i, j int) bool {
-			return labelSets[i]["__address__"] < labelSets[j]["__address__"]
-		})
-	}
 	expected := []model.LabelSet{
 		{
 			"__address__":                "172.19.0.2:9100",
@ -359,6 +390,70 @@ host: %s
 			"__meta_docker_network_scope":                              "local",
 			"__meta_docker_port_private":                               "9104",
 		},
+		{
+			"__address__":                "172.20.0.3:3306",
+			"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+			"__meta_docker_container_label_com_docker_compose_project": "dockersd",
+			"__meta_docker_container_label_com_docker_compose_service": "mysql",
+			"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+			"__meta_docker_container_name":                             "/dockersd_multi_networks",
+			"__meta_docker_container_network_mode":                     "dockersd_private_none",
+			"__meta_docker_network_id":                                 "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
+			"__meta_docker_network_ingress":                            "false",
+			"__meta_docker_network_internal":                           "false",
+			"__meta_docker_network_ip":                                 "172.20.0.3",
+			"__meta_docker_network_name":                               "dockersd_private",
+			"__meta_docker_network_scope":                              "local",
+			"__meta_docker_port_private":                               "3306",
+		},
+		{
+			"__address__":                "172.20.0.3:33060",
+			"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+			"__meta_docker_container_label_com_docker_compose_project": "dockersd",
+			"__meta_docker_container_label_com_docker_compose_service": "mysql",
+			"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+			"__meta_docker_container_name":                             "/dockersd_multi_networks",
+			"__meta_docker_container_network_mode":                     "dockersd_private_none",
+			"__meta_docker_network_id":                                 "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
+			"__meta_docker_network_ingress":                            "false",
+			"__meta_docker_network_internal":                           "false",
+			"__meta_docker_network_ip":                                 "172.20.0.3",
+			"__meta_docker_network_name":                               "dockersd_private",
+			"__meta_docker_network_scope":                              "local",
+			"__meta_docker_port_private":                               "33060",
+		},
+		{
+			"__address__":                "172.21.0.3:3306",
+			"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+			"__meta_docker_container_label_com_docker_compose_project": "dockersd",
+			"__meta_docker_container_label_com_docker_compose_service": "mysql",
+			"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+			"__meta_docker_container_name":                             "/dockersd_multi_networks",
+			"__meta_docker_container_network_mode":                     "dockersd_private_none",
+			"__meta_docker_network_id":                                 "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8",
+			"__meta_docker_network_ingress":                            "false",
+			"__meta_docker_network_internal":                           "false",
+			"__meta_docker_network_ip":                                 "172.21.0.3",
+			"__meta_docker_network_name":                               "dockersd_private1",
+			"__meta_docker_network_scope":                              "local",
+			"__meta_docker_port_private":                               "3306",
+		},
+		{
+			"__address__":                "172.21.0.3:33060",
+			"__meta_docker_container_id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+			"__meta_docker_container_label_com_docker_compose_project": "dockersd",
+			"__meta_docker_container_label_com_docker_compose_service": "mysql",
+			"__meta_docker_container_label_com_docker_compose_version": "2.2.2",
+			"__meta_docker_container_name":                             "/dockersd_multi_networks",
+			"__meta_docker_container_network_mode":                     "dockersd_private_none",
+			"__meta_docker_network_id":                                 "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8",
+			"__meta_docker_network_ingress":                            "false",
+			"__meta_docker_network_internal":                           "false",
+			"__meta_docker_network_ip":                                 "172.21.0.3",
+			"__meta_docker_network_name":                               "dockersd_private1",
+			"__meta_docker_network_scope":                              "local",
+			"__meta_docker_port_private":                               "33060",
+		},
 	}

 	sortFunc(expected)
@ -370,3 +465,9 @@ host: %s
 		})
 	}
 }
+
+func sortFunc(labelSets []model.LabelSet) {
+	sort.Slice(labelSets, func(i, j int) bool {
+		return labelSets[i]["__address__"] < labelSets[j]["__address__"]
+	})
+}
--- a/discovery/moby/testdata/dockerprom/containers/json.json
+++ b/discovery/moby/testdata/dockerprom/containers/json.json
@ -228,5 +228,74 @@
      "Networks": {}
    },
    "Mounts": []
+  },
+  {
+    "Id": "f84b2a0cfaa58d9e70b0657e2b3c6f44f0e973de4163a871299b4acf127b224f",
+    "Names": [
+      "/dockersd_multi_networks"
+    ],
+    "Image": "mysql:5.7.29",
+    "ImageID": "sha256:16ae2f4625ba63a250462bedeece422e741de9f0caf3b1d89fd5b257aca80cd1",
+    "Command": "mysqld",
+    "Created": 1616273136,
+    "Ports": [
+      {
+        "PrivatePort": 3306,
+        "Type": "tcp"
+      },
+      {
+        "PrivatePort": 33060,
+        "Type": "tcp"
+      }
+    ],
+    "Labels": {
+      "com.docker.compose.project": "dockersd",
+      "com.docker.compose.service": "mysql",
+      "com.docker.compose.version": "2.2.2"
+    },
+    "State": "running",
+    "Status": "Up 40 seconds",
+    "HostConfig": {
+      "NetworkMode": "dockersd_private_none"
+    },
+    "NetworkSettings": {
+      "Networks": {
+        "dockersd_private": {
+          "IPAMConfig": null,
+          "Links": null,
+          "Aliases": null,
+          "NetworkID": "e804771e55254a360fdb70dfdd78d3610fdde231b14ef2f837a00ac1eeb9e601",
+          "EndpointID": "972d6807997369605ace863af58de6cb90c787a5bf2ffc4105662d393ae539b7",
+          "Gateway": "172.20.0.1",
+          "IPAddress": "172.20.0.3",
+          "IPPrefixLen": 16,
+          "IPv6Gateway": "",
+          "GlobalIPv6Address": "",
+          "GlobalIPv6PrefixLen": 0,
+          "MacAddress": "02:42:ac:14:00:02",
+          "DriverOpts": null
+        },
+        "dockersd_private1": {
+          "IPAMConfig": {},
+          "Links": null,
+          "Aliases": [
+            "mysql",
+            "mysql",
+            "f9ade4b83199"
+          ],
+          "NetworkID": "bfcf66a6b64f7d518f009e34290dc3f3c66a08164257ad1afc3bd31d75f656e8",
+          "EndpointID": "91a98405344ee1cb7d977cafabe634837876651544b32da20a5e0155868e6f5f",
+          "Gateway": "172.21.0.1",
+          "IPAddress": "172.21.0.3",
+          "IPPrefixLen": 24,
+          "IPv6Gateway": "",
+          "GlobalIPv6Address": "",
+          "GlobalIPv6PrefixLen": 0,
+          "MacAddress": "02:42:ac:15:00:02",
+          "DriverOpts": null
+        }
+      }
+    },
+    "Mounts": []
  }
 ]
--- a/discovery/util.go
+++ b/discovery/util.go
@ -19,8 +19,8 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 )

-// A utility to be used by implementations of discovery.Discoverer
-// which need to manage the lifetime of their metrics.
+// MetricRegisterer is used by implementations of discovery.Discoverer that need
+// to manage the lifetime of their metrics.
 type MetricRegisterer interface {
 	RegisterMetrics() error
 	UnregisterMetrics()
@ -34,7 +34,7 @@ type metricRegistererImpl struct {

 var _ MetricRegisterer = &metricRegistererImpl{}

-// Creates an instance of a MetricRegisterer.
+// NewMetricRegisterer creates an instance of a MetricRegisterer.
 // Typically called inside the implementation of the NewDiscoverer() method.
 func NewMetricRegisterer(reg prometheus.Registerer, metrics []prometheus.Collector) MetricRegisterer {
 	return &metricRegistererImpl{
--- a/docs/command-line/prometheus.md
+++ b/docs/command-line/prometheus.md
@ -15,11 +15,11 @@ The Prometheus monitoring server
 | <code class="text-nowrap">-h</code>, <code class="text-nowrap">--help</code> | Show context-sensitive help (also try --help-long and --help-man). |  |
 | <code class="text-nowrap">--version</code> | Show application version. |  |
 | <code class="text-nowrap">--config.file</code> | Prometheus configuration file path. | `prometheus.yml` |
-| <code class="text-nowrap">--web.listen-address</code> | Address to listen on for UI, API, and telemetry. | `0.0.0.0:9090` |
+| <code class="text-nowrap">--web.listen-address</code> <code class="text-nowrap">...<code class="text-nowrap"> | Address to listen on for UI, API, and telemetry. Can be repeated. | `0.0.0.0:9090` |
 | <code class="text-nowrap">--auto-gomemlimit.ratio</code> | The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory | `0.9` |
 | <code class="text-nowrap">--web.config.file</code> | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. |  |
 | <code class="text-nowrap">--web.read-timeout</code> | Maximum duration before timing out read of the request, and closing idle connections. | `5m` |
-| <code class="text-nowrap">--web.max-connections</code> | Maximum number of simultaneous connections. | `512` |
+| <code class="text-nowrap">--web.max-connections</code> | Maximum number of simultaneous connections across all listeners. | `512` |
 | <code class="text-nowrap">--web.external-url</code> | The URL under which Prometheus is externally reachable (for example, if Prometheus is served via a reverse proxy). Used for generating relative and absolute links back to Prometheus itself. If the URL has a path portion, it will be used to prefix all HTTP endpoints served by Prometheus. If omitted, relevant URL components will be derived automatically. |  |
 | <code class="text-nowrap">--web.route-prefix</code> | Prefix for the internal routes of web endpoints. Defaults to path of --web.external-url. |  |
 | <code class="text-nowrap">--web.user-assets</code> | Path to static asset directory, available at /user. |  |
@ -55,7 +55,8 @@ The Prometheus monitoring server
 | <code class="text-nowrap">--query.timeout</code> | Maximum time a query may take before being aborted. Use with server mode only. | `2m` |
 | <code class="text-nowrap">--query.max-concurrency</code> | Maximum number of queries executed concurrently. Use with server mode only. | `20` |
 | <code class="text-nowrap">--query.max-samples</code> | Maximum number of samples a single query can load into memory. Note that queries will fail if they try to load more samples than this into memory, so this also limits the number of samples a query can return. Use with server mode only. | `50000000` |
-| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. |  |
+| <code class="text-nowrap">--scrape.name-escaping-scheme</code> | Method for escaping legacy invalid names when sending to Prometheus that does not support UTF-8. Can be one of "values", "underscores", or "dots". | `values` |
+| <code class="text-nowrap">--enable-feature</code> <code class="text-nowrap">...<code class="text-nowrap"> | Comma separated feature names to enable. Valid options: agent, auto-gomemlimit, exemplar-storage, expand-external-labels, memory-snapshot-on-shutdown, promql-per-step-stats, promql-experimental-functions, extra-scrape-metrics, new-service-discovery-manager, auto-gomaxprocs, no-default-scrape-port, native-histograms, otlp-write-receiver, created-timestamp-zero-ingestion, concurrent-rule-eval. See https://prometheus.io/docs/prometheus/latest/feature_flags/ for more details. |  |
 | <code class="text-nowrap">--log.level</code> | Only log messages with the given severity or above. One of: [debug, info, warn, error] | `info` |
 | <code class="text-nowrap">--log.format</code> | Output format of log messages. One of: [logfmt, json] | `logfmt` |

--- a/docs/command-line/promtool.md
+++ b/docs/command-line/promtool.md
@ -15,7 +15,7 @@ Tooling for the Prometheus monitoring system.
 | <code class="text-nowrap">-h</code>, <code class="text-nowrap">--help</code> | Show context-sensitive help (also try --help-long and --help-man). |
 | <code class="text-nowrap">--version</code> | Show application version. |
 | <code class="text-nowrap">--experimental</code> | Enable experimental commands. |
-| <code class="text-nowrap">--enable-feature</code> | Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details. |
+| <code class="text-nowrap">--enable-feature</code> <code class="text-nowrap">...<code class="text-nowrap"> | Comma separated feature names to enable (only PromQL related and no-default-scrape-port). See https://prometheus.io/docs/prometheus/latest/feature_flags/ for the options and more details. |



@ -281,7 +281,7 @@ Run series query.

 | Flag | Description |
 | --- | --- |
-| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. |
+| <code class="text-nowrap">--match</code> <code class="text-nowrap">...<code class="text-nowrap"> | Series selector. Can be specified multiple times. |
 | <code class="text-nowrap">--start</code> | Start time (RFC3339 or Unix timestamp). |
 | <code class="text-nowrap">--end</code> | End time (RFC3339 or Unix timestamp). |

@ -309,7 +309,7 @@ Run labels query.
 | --- | --- |
 | <code class="text-nowrap">--start</code> | Start time (RFC3339 or Unix timestamp). |
 | <code class="text-nowrap">--end</code> | End time (RFC3339 or Unix timestamp). |
-| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. |
+| <code class="text-nowrap">--match</code> <code class="text-nowrap">...<code class="text-nowrap"> | Series selector. Can be specified multiple times. |



@ -338,7 +338,7 @@ Run queries against your Prometheus to analyze the usage pattern of certain metr
 | <code class="text-nowrap">--type</code> | Type of metric: histogram. |  |
 | <code class="text-nowrap">--duration</code> | Time frame to analyze. | `1h` |
 | <code class="text-nowrap">--time</code> | Query time (RFC3339 or Unix timestamp), defaults to now. |  |
-| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. |  |
+| <code class="text-nowrap">--match</code> <code class="text-nowrap">...<code class="text-nowrap"> | Series selector. Can be specified multiple times. |  |



@ -461,7 +461,7 @@ Unit tests for rules.

 | Flag | Description | Default |
 | --- | --- | --- |
-| <code class="text-nowrap">--run</code> | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. |  |
+| <code class="text-nowrap">--run</code> <code class="text-nowrap">...<code class="text-nowrap"> | If set, will only run test groups whose names match the regular expression. Can be specified multiple times. |  |
 | <code class="text-nowrap">--diff</code> | [Experimental] Print colored differential output between expected & received output. | `false` |


@ -578,7 +578,7 @@ Dump samples from a TSDB.
 | <code class="text-nowrap">--sandbox-dir-root</code> | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` |
 | <code class="text-nowrap">--min-time</code> | Minimum timestamp to dump. | `-9223372036854775808` |
 | <code class="text-nowrap">--max-time</code> | Maximum timestamp to dump. | `9223372036854775807` |
-| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |
+| <code class="text-nowrap">--match</code> <code class="text-nowrap">...<code class="text-nowrap"> | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |



@ -605,7 +605,7 @@ Dump samples from a TSDB.
 | <code class="text-nowrap">--sandbox-dir-root</code> | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` |
 | <code class="text-nowrap">--min-time</code> | Minimum timestamp to dump. | `-9223372036854775808` |
 | <code class="text-nowrap">--max-time</code> | Maximum timestamp to dump. | `9223372036854775807` |
-| <code class="text-nowrap">--match</code> | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |
+| <code class="text-nowrap">--match</code> <code class="text-nowrap">...<code class="text-nowrap"> | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` |



@ -641,6 +641,15 @@ Import samples from OpenMetrics input and produce TSDB blocks. Please refer to t



+###### Flags
+
+| Flag | Description |
+| --- | --- |
+| <code class="text-nowrap">--label</code> | Label to attach to metrics. Can be specified multiple times. Example --label=label_name=label_value |
+
+
+
+
 ###### Arguments

 | Argument | Description | Default | Required |
--- a/docs/configuration/configuration.md
+++ b/docs/configuration/configuration.md
@ -957,7 +957,9 @@ tls_config:
 # The host to use if the container is in host networking mode.
 [ host_networking_host: <string> | default = "localhost" ]

-# Match the first network if the container has multiple networks defined, thus avoiding collecting duplicate targets.
+# Sort all non-nil networks in ascending order based on network name and
+# get the first network if the container has multiple networks defined, 
+# thus avoiding collecting duplicate targets.
 [ match_first_network: <boolean> | default = true ]

 # Optional filters to limit the discovery process to a subset of available
@ -3279,12 +3281,16 @@ Initially, aside from the configured per-target labels, a target's `job`
 label is set to the `job_name` value of the respective scrape configuration.
 The `__address__` label is set to the `<host>:<port>` address of the target.
 After relabeling, the `instance` label is set to the value of `__address__` by default if
-it was not set during relabeling. The `__scheme__` and `__metrics_path__` labels
-are set to the scheme and metrics path of the target respectively. The `__param_<name>`
-label is set to the value of the first passed URL parameter called `<name>`.
+it was not set during relabeling.
+
+The `__scheme__` and `__metrics_path__` labels
+are set to the scheme and metrics path of the target respectively, as specified in `scrape_config`.
+
+The `__param_<name>`
+label is set to the value of the first passed URL parameter called `<name>`, as defined in `scrape_config`.

 The `__scrape_interval__` and `__scrape_timeout__` labels are set to the target's
-interval and timeout.
+interval and timeout, as specified in `scrape_config`.

 Additional labels prefixed with `__meta_` may be available during the
 relabeling phase. They are set by the service discovery mechanism that provided
--- a/docs/feature_flags.md
+++ b/docs/feature_flags.md
@ -242,6 +242,14 @@ Note that during this delay, the Head continues its usual operations, which incl

 Despite the delay in compaction, the blocks produced are time-aligned in the same manner as they would be if the delay was not in place.

+## Delay __name__ label removal for PromQL engine
+
+`--enable-feature=promql-delayed-name-removal`
+
+When enabled, Prometheus will change the way in which the `__name__` label is removed from PromQL query results (for functions and expressions for which this is necessary). Specifically, it will delay the removal to the last step of the query evaluation, instead of every time an expression or function creating derived metrics is evaluated.
+
+This allows optionally preserving the `__name__` label via the `label_replace` and `label_join` functions, and helps prevent the "vector cannot contain metrics with the same labelset" error, which can happen when applying a regex-matcher to the `__name__` label.
+
 ## UTF-8 Name Support

 `--enable-feature=utf8-names`
--- a/docs/querying/basics.md
+++ b/docs/querying/basics.md
@ -41,7 +41,7 @@ vector is the only type which can be graphed.
 _Notes about the experimental native histograms:_

 * Ingesting native histograms has to be enabled via a [feature
-  flag](../../feature_flags.md#native-histograms).
+  flag](../feature_flags.md#native-histograms).
 * Once native histograms have been ingested into the TSDB (and even after
  disabling the feature flag again), both instant vectors and range vectors may
  now contain samples that aren't simple floating point numbers (float samples)
--- a/docs/querying/functions.md
+++ b/docs/querying/functions.md
@ -619,7 +619,7 @@ Like `sort`, `sort_desc` only affects the results of instant queries, as range q

 **This function has to be enabled via the [feature flag](../feature_flags.md#experimental-promql-functions) `--enable-feature=promql-experimental-functions`.**

-`sort_by_label(v instant-vector, label string, ...)` returns vector elements sorted by their label values and sample value in case of label values being equal, in ascending order.
+`sort_by_label(v instant-vector, label string, ...)` returns vector elements sorted by the values of the given labels in ascending order. In case these label values are equal, elements are sorted by their full label sets.

 Please note that the sort by label functions only affect the results of instant queries, as range query results always have a fixed output ordering.

--- a/go.mod
+++ b/go.mod
@ -52,9 +52,9 @@ require (
 	github.com/oklog/ulid v1.3.1
 	github.com/ovh/go-ovh v1.6.0
 	github.com/prometheus/alertmanager v0.27.0
-	github.com/prometheus/client_golang v1.19.1
+	github.com/prometheus/client_golang v1.20.0
 	github.com/prometheus/client_model v0.6.1
-	github.com/prometheus/common v0.55.0
+	github.com/prometheus/common v0.56.0
 	github.com/prometheus/common/assets v0.2.0
 	github.com/prometheus/common/sigv4 v0.1.0
 	github.com/prometheus/exporter-toolkit v0.11.0
@ -75,7 +75,6 @@ require (
 	go.uber.org/automaxprocs v1.5.3
 	go.uber.org/goleak v1.3.0
 	go.uber.org/multierr v1.11.0
-	golang.org/x/net v0.27.0
 	golang.org/x/oauth2 v0.21.0
 	golang.org/x/sync v0.7.0
 	golang.org/x/sys v0.22.0
@ -190,6 +189,7 @@ require (
 	golang.org/x/crypto v0.25.0 // indirect
 	golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
 	golang.org/x/mod v0.19.0 // indirect
+	golang.org/x/net v0.27.0 // indirect
 	golang.org/x/term v0.22.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20240730163845-b1a4ccb954bf // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
--- a/go.sum
+++ b/go.sum
@ -608,8 +608,8 @@ github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeD
 github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
 github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
 github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
-github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
-github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
+github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI=
+github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
 github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
 github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
 github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
@ -625,8 +625,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b
 github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
 github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
 github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls=
-github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
-github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
+github.com/prometheus/common v0.56.0 h1:UffReloqkBtvtQEYDg2s+uDPGRrJyC6vZWPGXf6OhPY=
+github.com/prometheus/common v0.56.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI=
 github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM=
 github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI=
 github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4=
--- a/model/exemplar/exemplar.go
+++ b/model/exemplar/exemplar.go
@ -15,7 +15,9 @@ package exemplar

 import "github.com/prometheus/prometheus/model/labels"

-// The combined length of the label names and values of an Exemplar's LabelSet MUST NOT exceed 128 UTF-8 characters
+// ExemplarMaxLabelSetLength is defined by OpenMetrics: "The combined length of
+// the label names and values of an Exemplar's LabelSet MUST NOT exceed 128
+// UTF-8 characters."
 // https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#exemplars
 const ExemplarMaxLabelSetLength = 128

@ -49,7 +51,7 @@ func (e Exemplar) Equals(e2 Exemplar) bool {
 	return e.Value == e2.Value
 }

-// Sort first by timestamp, then value, then labels.
+// Compare first timestamps, then values, then labels.
 func Compare(a, b Exemplar) int {
 	if a.Ts < b.Ts {
 		return -1
--- a/model/labels/labels.go
+++ b/model/labels/labels.go
@ -315,7 +315,8 @@ func Compare(a, b Labels) int {
 	return len(a) - len(b)
 }

-// Copy labels from b on top of whatever was in ls previously, reusing memory or expanding if needed.
+// CopyFrom copies labels from b on top of whatever was in ls previously,
+// reusing memory or expanding if needed.
 func (ls *Labels) CopyFrom(b Labels) {
 	(*ls) = append((*ls)[:0], b...)
 }
@ -422,7 +423,7 @@ type ScratchBuilder struct {
 	add Labels
 }

-// Symbol-table is no-op, just for api parity with dedupelabels.
+// SymbolTable is no-op, just for api parity with dedupelabels.
 type SymbolTable struct{}

 func NewSymbolTable() *SymbolTable { return nil }
@ -458,7 +459,7 @@ func (b *ScratchBuilder) Add(name, value string) {
 	b.add = append(b.add, Label{Name: name, Value: value})
 }

-// Add a name/value pair, using []byte instead of string.
+// UnsafeAddBytes adds a name/value pair, using []byte instead of string.
 // The '-tags stringlabels' version of this function is unsafe, hence the name.
 // This version is safe - it copies the strings immediately - but we keep the same name so everything compiles.
 func (b *ScratchBuilder) UnsafeAddBytes(name, value []byte) {
@ -475,14 +476,14 @@ func (b *ScratchBuilder) Assign(ls Labels) {
 	b.add = append(b.add[:0], ls...) // Copy on top of our slice, so we don't retain the input slice.
 }

-// Return the name/value pairs added so far as a Labels object.
+// Labels returns the name/value pairs added so far as a Labels object.
 // Note: if you want them sorted, call Sort() first.
 func (b *ScratchBuilder) Labels() Labels {
 	// Copy the slice, so the next use of ScratchBuilder doesn't overwrite.
 	return append([]Label{}, b.add...)
 }

-// Write the newly-built Labels out to ls.
+// Overwrite the newly-built Labels out to ls.
 // Callers must ensure that there are no other references to ls, or any strings fetched from it.
 func (b *ScratchBuilder) Overwrite(ls *Labels) {
 	*ls = append((*ls)[:0], b.add...)
--- a/model/labels/labels_common.go
+++ b/model/labels/labels_common.go
@ -95,12 +95,23 @@ func (ls *Labels) UnmarshalYAML(unmarshal func(interface{}) error) error {
 }

 // IsValid checks if the metric name or label names are valid.
-func (ls Labels) IsValid() bool {
+func (ls Labels) IsValid(validationScheme model.ValidationScheme) bool {
 	err := ls.Validate(func(l Label) error {
-		if l.Name == model.MetricNameLabel && !model.IsValidMetricName(model.LabelValue(l.Value)) {
+		if l.Name == model.MetricNameLabel {
+			// If the default validation scheme has been overridden with legacy mode,
+			// we need to call the special legacy validation checker.
+			if validationScheme == model.LegacyValidation && model.NameValidationScheme == model.UTF8Validation && !model.IsValidLegacyMetricName(string(model.LabelValue(l.Value))) {
 				return strconv.ErrSyntax
 			}
-		if !model.LabelName(l.Name).IsValid() || !model.LabelValue(l.Value).IsValid() {
+			if !model.IsValidMetricName(model.LabelValue(l.Value)) {
+				return strconv.ErrSyntax
+			}
+		}
+		if validationScheme == model.LegacyValidation && model.NameValidationScheme == model.UTF8Validation {
+			if !model.LabelName(l.Name).IsValidLegacy() || !model.LabelValue(l.Value).IsValid() {
+				return strconv.ErrSyntax
+			}
+		} else if !model.LabelName(l.Name).IsValid() || !model.LabelValue(l.Value).IsValid() {
 			return strconv.ErrSyntax
 		}
 		return nil
--- a/model/labels/labels_test.go
+++ b/model/labels/labels_test.go
@ -21,6 +21,7 @@ import (
 	"strings"
 	"testing"

+	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 	"gopkg.in/yaml.v2"
 )
@ -272,11 +273,86 @@ func TestLabels_IsValid(t *testing.T) {
 		},
 	} {
 		t.Run("", func(t *testing.T) {
-			require.Equal(t, test.expected, test.input.IsValid())
+			require.Equal(t, test.expected, test.input.IsValid(model.LegacyValidation))
 		})
 	}
 }

+func TestLabels_ValidationModes(t *testing.T) {
+	for _, test := range []struct {
+		input      Labels
+		globalMode model.ValidationScheme
+		callMode   model.ValidationScheme
+		expected   bool
+	}{
+		{
+			input: FromStrings(
+				"__name__", "test.metric",
+				"hostname", "localhost",
+				"job", "check",
+			),
+			globalMode: model.UTF8Validation,
+			callMode:   model.UTF8Validation,
+			expected:   true,
+		},
+		{
+			input: FromStrings(
+				"__name__", "test",
+				"\xc5 bad utf8", "localhost",
+				"job", "check",
+			),
+			globalMode: model.UTF8Validation,
+			callMode:   model.UTF8Validation,
+			expected:   false,
+		},
+		{
+			// Setting the common model to legacy validation and then trying to check for UTF-8 on a
+			// per-call basis is not supported.
+			input: FromStrings(
+				"__name__", "test.utf8.metric",
+				"hostname", "localhost",
+				"job", "check",
+			),
+			globalMode: model.LegacyValidation,
+			callMode:   model.UTF8Validation,
+			expected:   false,
+		},
+		{
+			input: FromStrings(
+				"__name__", "test",
+				"hostname", "localhost",
+				"job", "check",
+			),
+			globalMode: model.LegacyValidation,
+			callMode:   model.LegacyValidation,
+			expected:   true,
+		},
+		{
+			input: FromStrings(
+				"__name__", "test.utf8.metric",
+				"hostname", "localhost",
+				"job", "check",
+			),
+			globalMode: model.UTF8Validation,
+			callMode:   model.LegacyValidation,
+			expected:   false,
+		},
+		{
+			input: FromStrings(
+				"__name__", "test",
+				"host.name", "localhost",
+				"job", "check",
+			),
+			globalMode: model.UTF8Validation,
+			callMode:   model.LegacyValidation,
+			expected:   false,
+		},
+	} {
+		model.NameValidationScheme = test.globalMode
+		require.Equal(t, test.expected, test.input.IsValid(test.callMode))
+	}
+}
+
 func TestLabels_Equal(t *testing.T) {
 	labels := FromStrings(
 		"aaa", "111",
--- a/model/textparse/interface.go
+++ b/model/textparse/interface.go
@ -106,8 +106,8 @@ const (
 	EntryInvalid   Entry = -1
 	EntryType      Entry = 0
 	EntryHelp      Entry = 1
-	EntrySeries    Entry = 2 // A series with a simple float64 as value.
+	EntrySeries    Entry = 2 // EntrySeries marks a series with a simple float64 as value.
 	EntryComment   Entry = 3
 	EntryUnit      Entry = 4
-	EntryHistogram Entry = 5 // A series with a native histogram as a value.
+	EntryHistogram Entry = 5 // EntryHistogram marks a series with a native histogram as a value.
 )
--- a/model/textparse/protobufparse.go
+++ b/model/textparse/protobufparse.go
@ -47,7 +47,7 @@ import (
 // the re-arrangement work is actually causing problems (which has to be seen),
 // that expectation needs to be changed.
 type ProtobufParser struct {
-	in        []byte // The intput to parse.
+	in        []byte // The input to parse.
 	inPos     int    // Position within the input.
 	metricPos int    // Position within Metric slice.
 	// fieldPos is the position within a Summary or (legacy) Histogram. -2
@ -71,7 +71,7 @@ type ProtobufParser struct {

 	mf *dto.MetricFamily

-	// Wether to also parse a classic histogram that is also present as a
+	// Whether to also parse a classic histogram that is also present as a
 	// native histogram.
 	parseClassicHistograms bool

--- a/promql/engine.go
+++ b/promql/engine.go
@ -313,6 +313,11 @@ type EngineOpts struct {

 	// EnablePerStepStats if true allows for per-step stats to be computed on request. Disabled otherwise.
 	EnablePerStepStats bool
+
+	// EnableDelayedNameRemoval delays the removal of the __name__ label to the last step of the query evaluation.
+	// This is useful in certain scenarios where the __name__ label must be preserved or where applying a
+	// regex-matcher to the __name__ label may otherwise lead to duplicate labelset errors.
+	EnableDelayedNameRemoval bool
 }

 // Engine handles the lifetime of queries from beginning to end.
@ -330,6 +335,7 @@ type Engine struct {
 	enableAtModifier         bool
 	enableNegativeOffset     bool
 	enablePerStepStats       bool
+	enableDelayedNameRemoval bool
 }

 // NewEngine returns a new engine.
@ -420,6 +426,7 @@ func NewEngine(opts EngineOpts) *Engine {
 		enableAtModifier:         opts.EnableAtModifier,
 		enableNegativeOffset:     opts.EnableNegativeOffset,
 		enablePerStepStats:       opts.EnablePerStepStats,
+		enableDelayedNameRemoval: opts.EnableDelayedNameRemoval,
 	}
 }

@ -573,7 +580,7 @@ func (ng *Engine) validateOpts(expr parser.Expr) error {
 	return validationErr
 }

-// NewTestQuery: inject special behaviour into Query for testing.
+// NewTestQuery injects special behaviour into Query for testing.
 func (ng *Engine) NewTestQuery(f func(context.Context) error) Query {
 	qry := &query{
 		q:           "test statement",
@ -712,6 +719,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval
 			lookbackDelta:            s.LookbackDelta,
 			samplesStats:             query.sampleStats,
 			noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn,
+			enableDelayedNameRemoval: ng.enableDelayedNameRemoval,
 		}
 		query.sampleStats.InitStepTracking(start, start, 1)

@ -743,9 +751,9 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval
 				// Point might have a different timestamp, force it to the evaluation
 				// timestamp as that is when we ran the evaluation.
 				if len(s.Histograms) > 0 {
-					vector[i] = Sample{Metric: s.Metric, H: s.Histograms[0].H, T: start}
+					vector[i] = Sample{Metric: s.Metric, H: s.Histograms[0].H, T: start, DropName: s.DropName}
 				} else {
-					vector[i] = Sample{Metric: s.Metric, F: s.Floats[0].F, T: start}
+					vector[i] = Sample{Metric: s.Metric, F: s.Floats[0].F, T: start, DropName: s.DropName}
 				}
 			}
 			return vector, warnings, nil
@ -770,6 +778,7 @@ func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *parser.Eval
 		lookbackDelta:            s.LookbackDelta,
 		samplesStats:             query.sampleStats,
 		noStepSubqueryIntervalFn: ng.noStepSubqueryIntervalFn,
+		enableDelayedNameRemoval: ng.enableDelayedNameRemoval,
 	}
 	query.sampleStats.InitStepTracking(evaluator.startTimestamp, evaluator.endTimestamp, evaluator.interval)
 	val, warnings, err := evaluator.Eval(s.Expr)
@ -1038,6 +1047,7 @@ type evaluator struct {
 	lookbackDelta            time.Duration
 	samplesStats             *stats.QuerySamples
 	noStepSubqueryIntervalFn func(rangeMillis int64) int64
+	enableDelayedNameRemoval bool
 }

 // errorf causes a panic with the input formatted into an error.
@ -1079,6 +1089,9 @@ func (ev *evaluator) Eval(expr parser.Expr) (v parser.Value, ws annotations.Anno
 	defer ev.recover(expr, &ws, &err)

 	v, ws = ev.eval(expr)
+	if ev.enableDelayedNameRemoval {
+		ev.cleanupMetricLabels(v)
+	}
 	return v, ws, nil
 }

@ -1107,6 +1120,9 @@ type EvalNodeHelper struct {
 	rightSigs    map[string]Sample
 	matchedSigs  map[string]map[uint64]struct{}
 	resultMetric map[string]labels.Labels
+
+	// Additional options for the evaluation.
+	enableDelayedNameRemoval bool
 }

 func (enh *EvalNodeHelper) resetBuilder(lbls labels.Labels) {
@ -1156,7 +1172,7 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper)
 			biggestLen = len(matrixes[i])
 		}
 	}
-	enh := &EvalNodeHelper{Out: make(Vector, 0, biggestLen)}
+	enh := &EvalNodeHelper{Out: make(Vector, 0, biggestLen), enableDelayedNameRemoval: ev.enableDelayedNameRemoval}
 	type seriesAndTimestamp struct {
 		Series
 		ts int64
@ -1202,12 +1218,12 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper)
 			for si, series := range matrixes[i] {
 				switch {
 				case len(series.Floats) > 0 && series.Floats[0].T == ts:
-					vectors[i] = append(vectors[i], Sample{Metric: series.Metric, F: series.Floats[0].F, T: ts})
+					vectors[i] = append(vectors[i], Sample{Metric: series.Metric, F: series.Floats[0].F, T: ts, DropName: series.DropName})
 					// Move input vectors forward so we don't have to re-scan the same
 					// past points at the next step.
 					matrixes[i][si].Floats = series.Floats[1:]
 				case len(series.Histograms) > 0 && series.Histograms[0].T == ts:
-					vectors[i] = append(vectors[i], Sample{Metric: series.Metric, H: series.Histograms[0].H, T: ts})
+					vectors[i] = append(vectors[i], Sample{Metric: series.Metric, H: series.Histograms[0].H, T: ts, DropName: series.DropName})
 					matrixes[i][si].Histograms = series.Histograms[1:]
 				default:
 					continue
@ -1246,15 +1262,15 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper)

 		// If this could be an instant query, shortcut so as not to change sort order.
 		if ev.endTimestamp == ev.startTimestamp {
-			if result.ContainsSameLabelset() {
+			if !ev.enableDelayedNameRemoval && result.ContainsSameLabelset() {
 				ev.errorf("vector cannot contain metrics with the same labelset")
 			}
 			mat := make(Matrix, len(result))
 			for i, s := range result {
 				if s.H == nil {
-					mat[i] = Series{Metric: s.Metric, Floats: []FPoint{{T: ts, F: s.F}}}
+					mat[i] = Series{Metric: s.Metric, Floats: []FPoint{{T: ts, F: s.F}}, DropName: s.DropName}
 				} else {
-					mat[i] = Series{Metric: s.Metric, Histograms: []HPoint{{T: ts, H: s.H}}}
+					mat[i] = Series{Metric: s.Metric, Histograms: []HPoint{{T: ts, H: s.H}}, DropName: s.DropName}
 				}
 			}
 			ev.currentSamples = originalNumSamples + mat.TotalSamples()
@ -1272,7 +1288,7 @@ func (ev *evaluator) rangeEval(prepSeries func(labels.Labels, *EvalSeriesHelper)
 				}
 				ss.ts = ts
 			} else {
-				ss = seriesAndTimestamp{Series{Metric: sample.Metric}, ts}
+				ss = seriesAndTimestamp{Series{Metric: sample.Metric, DropName: sample.DropName}, ts}
 			}
 			addToSeries(&ss.Series, enh.Ts, sample.F, sample.H, numSteps)
 			seriess[h] = ss
@ -1308,7 +1324,7 @@ func (ev *evaluator) rangeEvalAgg(aggExpr *parser.AggregateExpr, sortedGrouping

 	var warnings annotations.Annotations

-	enh := &EvalNodeHelper{}
+	enh := &EvalNodeHelper{enableDelayedNameRemoval: ev.enableDelayedNameRemoval}
 	tempNumSamples := ev.currentSamples

 	// Create a mapping from input series to output groups.
@ -1617,10 +1633,17 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
 		var prevSS *Series
 		inMatrix := make(Matrix, 1)
 		inArgs[matrixArgIndex] = inMatrix
-		enh := &EvalNodeHelper{Out: make(Vector, 0, 1)}
+		enh := &EvalNodeHelper{Out: make(Vector, 0, 1), enableDelayedNameRemoval: ev.enableDelayedNameRemoval}
 		// Process all the calls for one time series at a time.
 		it := storage.NewBuffer(selRange)
 		var chkIter chunkenc.Iterator
+
+		// The last_over_time function acts like offset; thus, it
+		// should keep the metric name.  For all the other range
+		// vector functions, the only change needed is to drop the
+		// metric name in the output.
+		dropName := e.Func.Name != "last_over_time"
+
 		for i, s := range selVS.Series {
 			if err := contextDone(ev.ctx, "expression evaluation"); err != nil {
 				ev.error(err)
@ -1635,15 +1658,12 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
 			chkIter = s.Iterator(chkIter)
 			it.Reset(chkIter)
 			metric := selVS.Series[i].Labels()
-			// The last_over_time function acts like offset; thus, it
-			// should keep the metric name.  For all the other range
-			// vector functions, the only change needed is to drop the
-			// metric name in the output.
-			if e.Func.Name != "last_over_time" {
+			if !ev.enableDelayedNameRemoval && dropName {
 				metric = metric.DropMetricName()
 			}
 			ss := Series{
 				Metric:   metric,
+				DropName: dropName,
 			}
 			inMatrix[0].Metric = selVS.Series[i].Labels()
 			for ts, step := ev.startTimestamp, -1; ts <= ev.endTimestamp; ts += ev.interval {
@ -1760,14 +1780,14 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
 				Series{
 					Metric:   createLabelsForAbsentFunction(e.Args[0]),
 					Floats:   newp,
+					DropName: dropName,
 				},
 			}, warnings
 		}

-		if mat.ContainsSameLabelset() {
+		if !ev.enableDelayedNameRemoval && mat.ContainsSameLabelset() {
 			ev.errorf("vector cannot contain metrics with the same labelset")
 		}
-
 		return mat, warnings

 	case *parser.ParenExpr:
@ -1778,12 +1798,15 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
 		mat := val.(Matrix)
 		if e.Op == parser.SUB {
 			for i := range mat {
+				if !ev.enableDelayedNameRemoval {
 					mat[i].Metric = mat[i].Metric.DropMetricName()
+				}
+				mat[i].DropName = true
 				for j := range mat[i].Floats {
 					mat[i].Floats[j].F = -mat[i].Floats[j].F
 				}
 			}
-			if mat.ContainsSameLabelset() {
+			if !ev.enableDelayedNameRemoval && mat.ContainsSameLabelset() {
 				ev.errorf("vector cannot contain metrics with the same labelset")
 			}
 		}
@ -1919,6 +1942,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
 			lookbackDelta:            ev.lookbackDelta,
 			samplesStats:             ev.samplesStats.NewChild(),
 			noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn,
+			enableDelayedNameRemoval: ev.enableDelayedNameRemoval,
 		}

 		if e.Step != 0 {
@ -1963,6 +1987,7 @@ func (ev *evaluator) eval(expr parser.Expr) (parser.Value, annotations.Annotatio
 			lookbackDelta:            ev.lookbackDelta,
 			samplesStats:             ev.samplesStats.NewChild(),
 			noStepSubqueryIntervalFn: ev.noStepSubqueryIntervalFn,
+			enableDelayedNameRemoval: ev.enableDelayedNameRemoval,
 		}
 		res, ws := newEv.eval(e.Expr)
 		ev.currentSamples = newEv.currentSamples
@ -2559,7 +2584,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
 			continue
 		}
 		metric := resultMetric(ls.Metric, rs.Metric, op, matching, enh)
-		if returnBool {
+		if !ev.enableDelayedNameRemoval && returnBool {
 			metric = metric.DropMetricName()
 		}
 		insertedSigs, exists := matchedSigs[sig]
@ -2587,6 +2612,7 @@ func (ev *evaluator) VectorBinop(op parser.ItemType, lhs, rhs Vector, matching *
 			Metric:   metric,
 			F:        floatValue,
 			H:        histogramValue,
+			DropName: returnBool,
 		})
 	}
 	return enh.Out, lastErr
@ -2686,8 +2712,11 @@ func (ev *evaluator) VectorscalarBinop(op parser.ItemType, lhs Vector, rhs Scala
 			lhsSample.F = float
 			lhsSample.H = histogram
 			if shouldDropMetricName(op) || returnBool {
+				if !ev.enableDelayedNameRemoval {
 					lhsSample.Metric = lhsSample.Metric.DropMetricName()
 				}
+				lhsSample.DropName = true
+			}
 			enh.Out = append(enh.Out, lhsSample)
 		}
 	}
@ -3025,6 +3054,7 @@ func (ev *evaluator) aggregation(e *parser.AggregateExpr, q float64, inputMatrix

 		ss := &outputMatrix[ri]
 		addToSeries(ss, enh.Ts, aggr.floatValue, aggr.histogramValue, numSteps)
+		ss.DropName = inputMatrix[ri].DropName
 	}

 	return annos
@ -3051,7 +3081,7 @@ seriesLoop:
 		if !ok {
 			continue
 		}
-		s = Sample{Metric: inputMatrix[si].Metric, F: f}
+		s = Sample{Metric: inputMatrix[si].Metric, F: f, DropName: inputMatrix[si].DropName}

 		group := &groups[seriesToResult[si]]
 		// Initialize this group if it's the first time we've seen it.
@ -3135,16 +3165,16 @@ seriesLoop:
 		mat = make(Matrix, 0, len(groups))
 	}

-	add := func(lbls labels.Labels, f float64) {
+	add := func(lbls labels.Labels, f float64, dropName bool) {
 		// If this could be an instant query, add directly to the matrix so the result is in consistent order.
 		if ev.endTimestamp == ev.startTimestamp {
-			mat = append(mat, Series{Metric: lbls, Floats: []FPoint{{T: enh.Ts, F: f}}})
+			mat = append(mat, Series{Metric: lbls, Floats: []FPoint{{T: enh.Ts, F: f}}, DropName: dropName})
 		} else {
 			// Otherwise the results are added into seriess elements.
 			hash := lbls.Hash()
 			ss, ok := seriess[hash]
 			if !ok {
-				ss = Series{Metric: lbls}
+				ss = Series{Metric: lbls, DropName: dropName}
 			}
 			addToSeries(&ss, enh.Ts, f, nil, numSteps)
 			seriess[hash] = ss
@ -3161,7 +3191,7 @@ seriesLoop:
 				sort.Sort(sort.Reverse(aggr.heap))
 			}
 			for _, v := range aggr.heap {
-				add(v.Metric, v.F)
+				add(v.Metric, v.F, v.DropName)
 			}

 		case parser.BOTTOMK:
@ -3170,12 +3200,12 @@ seriesLoop:
 				sort.Sort(sort.Reverse((*vectorByReverseValueHeap)(&aggr.heap)))
 			}
 			for _, v := range aggr.heap {
-				add(v.Metric, v.F)
+				add(v.Metric, v.F, v.DropName)
 			}

 		case parser.LIMITK, parser.LIMIT_RATIO:
 			for _, v := range aggr.heap {
-				add(v.Metric, v.F)
+				add(v.Metric, v.F, v.DropName)
 			}
 		}
 	}
@ -3227,6 +3257,30 @@ func (ev *evaluator) aggregationCountValues(e *parser.AggregateExpr, grouping []
 	return enh.Out, nil
 }

+func (ev *evaluator) cleanupMetricLabels(v parser.Value) {
+	if v.Type() == parser.ValueTypeMatrix {
+		mat := v.(Matrix)
+		for i := range mat {
+			if mat[i].DropName {
+				mat[i].Metric = mat[i].Metric.DropMetricName()
+			}
+		}
+		if mat.ContainsSameLabelset() {
+			ev.errorf("vector cannot contain metrics with the same labelset")
+		}
+	} else if v.Type() == parser.ValueTypeVector {
+		vec := v.(Vector)
+		for i := range vec {
+			if vec[i].DropName {
+				vec[i].Metric = vec[i].Metric.DropMetricName()
+			}
+		}
+		if vec.ContainsSameLabelset() {
+			ev.errorf("vector cannot contain metrics with the same labelset")
+		}
+	}
+}
+
 func addToSeries(ss *Series, ts int64, f float64, h *histogram.FloatHistogram, numSteps int) {
 	if h == nil {
 		if ss.Floats == nil {
@ -3537,14 +3591,14 @@ func makeInt64Pointer(val int64) *int64 {
 	return valp
 }

-// Add RatioSampler interface to allow unit-testing (previously: Randomizer).
+// RatioSampler allows unit-testing (previously: Randomizer).
 type RatioSampler interface {
 	// Return this sample "offset" between [0.0, 1.0]
 	sampleOffset(ts int64, sample *Sample) float64
 	AddRatioSample(r float64, sample *Sample) bool
 }

-// Use Hash(labels.String()) / maxUint64 as a "deterministic"
+// HashRatioSampler uses Hash(labels.String()) / maxUint64 as a "deterministic"
 // value in [0.0, 1.0].
 type HashRatioSampler struct{}

--- a/promql/engine_test.go
+++ b/promql/engine_test.go
@ -17,7 +17,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"math"
 	"os"
 	"sort"
 	"strconv"
@ -1712,6 +1711,7 @@ load 1ms
 						{F: 3600, T: 7 * 60 * 1000},
 					},
 					Metric:   labels.EmptyLabels(),
+					DropName: true,
 				},
 			},
 		},
@ -1929,18 +1929,22 @@ func TestSubquerySelector(t *testing.T) {
 							promql.Series{
 								Floats:   []promql.FPoint{{F: 3, T: 7985000}, {F: 3, T: 7990000}, {F: 3, T: 7995000}, {F: 3, T: 8000000}},
 								Metric:   labels.FromStrings("job", "api-server", "instance", "0", "group", "canary"),
+								DropName: true,
 							},
 							promql.Series{
 								Floats:   []promql.FPoint{{F: 4, T: 7985000}, {F: 4, T: 7990000}, {F: 4, T: 7995000}, {F: 4, T: 8000000}},
 								Metric:   labels.FromStrings("job", "api-server", "instance", "1", "group", "canary"),
+								DropName: true,
 							},
 							promql.Series{
 								Floats:   []promql.FPoint{{F: 1, T: 7985000}, {F: 1, T: 7990000}, {F: 1, T: 7995000}, {F: 1, T: 8000000}},
 								Metric:   labels.FromStrings("job", "api-server", "instance", "0", "group", "production"),
+								DropName: true,
 							},
 							promql.Series{
 								Floats:   []promql.FPoint{{F: 2, T: 7985000}, {F: 2, T: 7990000}, {F: 2, T: 7995000}, {F: 2, T: 8000000}},
 								Metric:   labels.FromStrings("job", "api-server", "instance", "1", "group", "production"),
+								DropName: true,
 							},
 						},
 						nil,
@ -3293,11 +3297,11 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) {

 				// sum_over_time().
 				queryString = fmt.Sprintf("sum_over_time(%s[%dm:1m])", seriesNameOverTime, offset+1)
-				queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expected, Metric: labels.EmptyLabels()}})
+				queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expected, Metric: labels.EmptyLabels(), DropName: true}})

 				// avg_over_time().
 				queryString = fmt.Sprintf("avg_over_time(%s[%dm:1m])", seriesNameOverTime, offset+1)
-				queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expectedAvg, Metric: labels.EmptyLabels()}})
+				queryAndCheck(queryString, newTs, []promql.Sample{{T: newTs, H: &c.expectedAvg, Metric: labels.EmptyLabels(), DropName: true}})
 			})
 			idx0++
 		}
@ -3539,171 +3543,6 @@ func TestNativeHistogram_SubOperator(t *testing.T) {
 	}
 }

-func TestNativeHistogram_MulDivOperator(t *testing.T) {
-	// TODO(codesome): Integrate histograms into the PromQL testing framework
-	// and write more tests there.
-	originalHistogram := histogram.Histogram{
-		Schema:        0,
-		Count:         21,
-		Sum:           33,
-		ZeroThreshold: 0.001,
-		ZeroCount:     3,
-		PositiveSpans: []histogram.Span{
-			{Offset: 0, Length: 3},
-		},
-		PositiveBuckets: []int64{3, 0, 0},
-		NegativeSpans: []histogram.Span{
-			{Offset: 0, Length: 3},
-		},
-		NegativeBuckets: []int64{3, 0, 0},
-	}
-
-	cases := []struct {
-		scalar      float64
-		histogram   histogram.Histogram
-		expectedMul histogram.FloatHistogram
-		expectedDiv histogram.FloatHistogram
-	}{
-		{
-			scalar:    3,
-			histogram: originalHistogram,
-			expectedMul: histogram.FloatHistogram{
-				Schema:        0,
-				Count:         63,
-				Sum:           99,
-				ZeroThreshold: 0.001,
-				ZeroCount:     9,
-				PositiveSpans: []histogram.Span{
-					{Offset: 0, Length: 3},
-				},
-				PositiveBuckets: []float64{9, 9, 9},
-				NegativeSpans: []histogram.Span{
-					{Offset: 0, Length: 3},
-				},
-				NegativeBuckets: []float64{9, 9, 9},
-			},
-			expectedDiv: histogram.FloatHistogram{
-				Schema:        0,
-				Count:         7,
-				Sum:           11,
-				ZeroThreshold: 0.001,
-				ZeroCount:     1,
-				PositiveSpans: []histogram.Span{
-					{Offset: 0, Length: 3},
-				},
-				PositiveBuckets: []float64{1, 1, 1},
-				NegativeSpans: []histogram.Span{
-					{Offset: 0, Length: 3},
-				},
-				NegativeBuckets: []float64{1, 1, 1},
-			},
-		},
-		{
-			scalar:    0,
-			histogram: originalHistogram,
-			expectedMul: histogram.FloatHistogram{
-				Schema:        0,
-				Count:         0,
-				Sum:           0,
-				ZeroThreshold: 0.001,
-				ZeroCount:     0,
-				PositiveSpans: []histogram.Span{
-					{Offset: 0, Length: 3},
-				},
-				PositiveBuckets: []float64{0, 0, 0},
-				NegativeSpans: []histogram.Span{
-					{Offset: 0, Length: 3},
-				},
-				NegativeBuckets: []float64{0, 0, 0},
-			},
-			expectedDiv: histogram.FloatHistogram{
-				Schema:        0,
-				Count:         math.Inf(1),
-				Sum:           math.Inf(1),
-				ZeroThreshold: 0.001,
-				ZeroCount:     math.Inf(1),
-				PositiveSpans: []histogram.Span{
-					{Offset: 0, Length: 3},
-				},
-				PositiveBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1)},
-				NegativeSpans: []histogram.Span{
-					{Offset: 0, Length: 3},
-				},
-				NegativeBuckets: []float64{math.Inf(1), math.Inf(1), math.Inf(1)},
-			},
-		},
-	}
-
-	idx0 := int64(0)
-	for _, c := range cases {
-		for _, floatHisto := range []bool{true, false} {
-			t.Run(fmt.Sprintf("floatHistogram=%t %d", floatHisto, idx0), func(t *testing.T) {
-				storage := teststorage.New(t)
-				t.Cleanup(func() { storage.Close() })
-
-				seriesName := "sparse_histogram_series"
-				floatSeriesName := "float_series"
-
-				engine := newTestEngine()
-
-				ts := idx0 * int64(10*time.Minute/time.Millisecond)
-				app := storage.Appender(context.Background())
-				h := c.histogram
-				lbls := labels.FromStrings("__name__", seriesName)
-				// Since we mutate h later, we need to create a copy here.
-				var err error
-				if floatHisto {
-					_, err = app.AppendHistogram(0, lbls, ts, nil, h.Copy().ToFloat(nil))
-				} else {
-					_, err = app.AppendHistogram(0, lbls, ts, h.Copy(), nil)
-				}
-				require.NoError(t, err)
-				_, err = app.Append(0, labels.FromStrings("__name__", floatSeriesName), ts, c.scalar)
-				require.NoError(t, err)
-				require.NoError(t, app.Commit())
-
-				queryAndCheck := func(queryString string, exp promql.Vector) {
-					qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts))
-					require.NoError(t, err)
-
-					res := qry.Exec(context.Background())
-					require.NoError(t, res.Err)
-
-					vector, err := res.Vector()
-					require.NoError(t, err)
-
-					testutil.RequireEqual(t, exp, vector)
-				}
-
-				// histogram * scalar.
-				queryString := fmt.Sprintf(`%s * %f`, seriesName, c.scalar)
-				queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}})
-
-				// scalar * histogram.
-				queryString = fmt.Sprintf(`%f * %s`, c.scalar, seriesName)
-				queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}})
-
-				// histogram * float.
-				queryString = fmt.Sprintf(`%s * %s`, seriesName, floatSeriesName)
-				queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}})
-
-				// float * histogram.
-				queryString = fmt.Sprintf(`%s * %s`, floatSeriesName, seriesName)
-				queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedMul, Metric: labels.EmptyLabels()}})
-
-				// histogram / scalar.
-				queryString = fmt.Sprintf(`%s / %f`, seriesName, c.scalar)
-				queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}})
-
-				// histogram / float.
-				queryString = fmt.Sprintf(`%s / %s`, seriesName, floatSeriesName)
-				queryAndCheck(queryString, []promql.Sample{{T: ts, H: &c.expectedDiv, Metric: labels.EmptyLabels()}})
-			})
-			idx0++
-		}
-	}
-}
-
 func TestQueryLookbackDelta(t *testing.T) {
 	var (
 		load = `load 5m
--- a/promql/functions.go
+++ b/promql/functions.go
@ -406,17 +406,22 @@ func funcSortDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel

 // === sort_by_label(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) ===
 func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
-	// In case the labels are the same, NaN should sort to the bottom, so take
-	// ascending sort with NaN first and reverse it.
-	var anno annotations.Annotations
-	vals[0], anno = funcSort(vals, args, enh)
-	labels := stringSliceFromArgs(args[1:])
+	// First, sort by the full label set. This ensures a consistent ordering in case sorting by the
+	// labels provided as arguments is not conclusive.
 	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
-		// Iterate over each given label
+		return labels.Compare(a.Metric, b.Metric)
+	})
+
+	labels := stringSliceFromArgs(args[1:])
+	// Next, sort by the labels provided as arguments.
+	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
+		// Iterate over each given label.
 		for _, label := range labels {
 			lv1 := a.Metric.Get(label)
 			lv2 := b.Metric.Get(label)

+			// If we encounter multiple samples with the same label values, the sorting which was
+			// performed in the first step will act as a "tie breaker".
 			if lv1 == lv2 {
 				continue
 			}
@ -431,22 +436,27 @@ func funcSortByLabel(vals []parser.Value, args parser.Expressions, enh *EvalNode
 		return 0
 	})

-	return vals[0].(Vector), anno
+	return vals[0].(Vector), nil
 }

 // === sort_by_label_desc(vector parser.ValueTypeVector, label parser.ValueTypeString...) (Vector, Annotations) ===
 func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
-	// In case the labels are the same, NaN should sort to the bottom, so take
-	// ascending sort with NaN first and reverse it.
-	var anno annotations.Annotations
-	vals[0], anno = funcSortDesc(vals, args, enh)
-	labels := stringSliceFromArgs(args[1:])
+	// First, sort by the full label set. This ensures a consistent ordering in case sorting by the
+	// labels provided as arguments is not conclusive.
 	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
-		// Iterate over each given label
+		return labels.Compare(b.Metric, a.Metric)
+	})
+
+	labels := stringSliceFromArgs(args[1:])
+	// Next, sort by the labels provided as arguments.
+	slices.SortFunc(vals[0].(Vector), func(a, b Sample) int {
+		// Iterate over each given label.
 		for _, label := range labels {
 			lv1 := a.Metric.Get(label)
 			lv2 := b.Metric.Get(label)

+			// If we encounter multiple samples with the same label values, the sorting which was
+			// performed in the first step will act as a "tie breaker".
 			if lv1 == lv2 {
 				continue
 			}
@ -461,7 +471,7 @@ func funcSortByLabelDesc(vals []parser.Value, args parser.Expressions, enh *Eval
 		return 0
 	})

-	return vals[0].(Vector), anno
+	return vals[0].(Vector), nil
 }

 // === clamp(Vector parser.ValueTypeVector, min, max Scalar) (Vector, Annotations) ===
@ -473,9 +483,13 @@ func funcClamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper
 		return enh.Out, nil
 	}
 	for _, el := range vec {
+		if !enh.enableDelayedNameRemoval {
+			el.Metric = el.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: el.Metric.DropMetricName(),
+			Metric:   el.Metric,
 			F:        math.Max(minVal, math.Min(maxVal, el.F)),
+			DropName: true,
 		})
 	}
 	return enh.Out, nil
@ -486,9 +500,13 @@ func funcClampMax(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel
 	vec := vals[0].(Vector)
 	maxVal := vals[1].(Vector)[0].F
 	for _, el := range vec {
+		if !enh.enableDelayedNameRemoval {
+			el.Metric = el.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: el.Metric.DropMetricName(),
+			Metric:   el.Metric,
 			F:        math.Min(maxVal, el.F),
+			DropName: true,
 		})
 	}
 	return enh.Out, nil
@ -499,9 +517,13 @@ func funcClampMin(vals []parser.Value, args parser.Expressions, enh *EvalNodeHel
 	vec := vals[0].(Vector)
 	minVal := vals[1].(Vector)[0].F
 	for _, el := range vec {
+		if !enh.enableDelayedNameRemoval {
+			el.Metric = el.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: el.Metric.DropMetricName(),
+			Metric:   el.Metric,
 			F:        math.Max(minVal, el.F),
+			DropName: true,
 		})
 	}
 	return enh.Out, nil
@ -522,8 +544,9 @@ func funcRound(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper
 	for _, el := range vec {
 		f := math.Floor(el.F*toNearestInverse+0.5) / toNearestInverse
 		enh.Out = append(enh.Out, Sample{
-			Metric: el.Metric.DropMetricName(),
+			Metric:   el.Metric,
 			F:        f,
+			DropName: true,
 		})
 	}
 	return enh.Out, nil
@ -872,9 +895,13 @@ func funcPresentOverTime(vals []parser.Value, args parser.Expressions, enh *Eval
 func simpleFunc(vals []parser.Value, enh *EvalNodeHelper, f func(float64) float64) Vector {
 	for _, el := range vals[0].(Vector) {
 		if el.H == nil { // Process only float samples.
+			if !enh.enableDelayedNameRemoval {
+				el.Metric = el.Metric.DropMetricName()
+			}
 			enh.Out = append(enh.Out, Sample{
-				Metric: el.Metric.DropMetricName(),
+				Metric:   el.Metric,
 				F:        f(el.F),
+				DropName: true,
 			})
 		}
 	}
@ -1018,9 +1045,13 @@ func funcSgn(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper)
 func funcTimestamp(vals []parser.Value, args parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	vec := vals[0].(Vector)
 	for _, el := range vec {
+		if !enh.enableDelayedNameRemoval {
+			el.Metric = el.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: el.Metric.DropMetricName(),
+			Metric:   el.Metric,
 			F:        float64(el.T) / 1000,
+			DropName: true,
 		})
 	}
 	return enh.Out, nil
@ -1127,9 +1158,13 @@ func funcHistogramCount(vals []parser.Value, args parser.Expressions, enh *EvalN
 		if sample.H == nil {
 			continue
 		}
+		if !enh.enableDelayedNameRemoval {
+			sample.Metric = sample.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: sample.Metric.DropMetricName(),
+			Metric:   sample.Metric,
 			F:        sample.H.Count,
+			DropName: true,
 		})
 	}
 	return enh.Out, nil
@ -1144,9 +1179,13 @@ func funcHistogramSum(vals []parser.Value, args parser.Expressions, enh *EvalNod
 		if sample.H == nil {
 			continue
 		}
+		if !enh.enableDelayedNameRemoval {
+			sample.Metric = sample.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: sample.Metric.DropMetricName(),
+			Metric:   sample.Metric,
 			F:        sample.H.Sum,
+			DropName: true,
 		})
 	}
 	return enh.Out, nil
@ -1161,9 +1200,13 @@ func funcHistogramAvg(vals []parser.Value, args parser.Expressions, enh *EvalNod
 		if sample.H == nil {
 			continue
 		}
+		if !enh.enableDelayedNameRemoval {
+			sample.Metric = sample.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: sample.Metric.DropMetricName(),
+			Metric:   sample.Metric,
 			F:        sample.H.Sum / sample.H.Count,
+			DropName: true,
 		})
 	}
 	return enh.Out, nil
@ -1200,9 +1243,13 @@ func funcHistogramStdDev(vals []parser.Value, args parser.Expressions, enh *Eval
 		}
 		variance += cVariance
 		variance /= sample.H.Count
+		if !enh.enableDelayedNameRemoval {
+			sample.Metric = sample.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: sample.Metric.DropMetricName(),
+			Metric:   sample.Metric,
 			F:        math.Sqrt(variance),
+			DropName: true,
 		})
 	}
 	return enh.Out, nil
@ -1239,9 +1286,13 @@ func funcHistogramStdVar(vals []parser.Value, args parser.Expressions, enh *Eval
 		}
 		variance += cVariance
 		variance /= sample.H.Count
+		if !enh.enableDelayedNameRemoval {
+			sample.Metric = sample.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: sample.Metric.DropMetricName(),
+			Metric:   sample.Metric,
 			F:        variance,
+			DropName: true,
 		})
 	}
 	return enh.Out, nil
@ -1258,9 +1309,13 @@ func funcHistogramFraction(vals []parser.Value, args parser.Expressions, enh *Ev
 		if sample.H == nil {
 			continue
 		}
+		if !enh.enableDelayedNameRemoval {
+			sample.Metric = sample.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: sample.Metric.DropMetricName(),
+			Metric:   sample.Metric,
 			F:        histogramFraction(lower, upper, sample.H),
+			DropName: true,
 		})
 	}
 	return enh.Out, nil
@ -1328,9 +1383,13 @@ func funcHistogramQuantile(vals []parser.Value, args parser.Expressions, enh *Ev
 			continue
 		}

+		if !enh.enableDelayedNameRemoval {
+			sample.Metric = sample.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: sample.Metric.DropMetricName(),
+			Metric:   sample.Metric,
 			F:        histogramQuantile(q, sample.H),
+			DropName: true,
 		})
 	}

@ -1432,6 +1491,11 @@ func (ev *evaluator) evalLabelReplace(args parser.Expressions) (parser.Value, an
 			lb.Reset(el.Metric)
 			lb.Set(dst, string(res))
 			matrix[i].Metric = lb.Labels()
+			if dst == model.MetricNameLabel {
+				matrix[i].DropName = false
+			} else {
+				matrix[i].DropName = el.DropName
+			}
 		}
 	}
 	if matrix.ContainsSameLabelset() {
@ -1486,6 +1550,12 @@ func (ev *evaluator) evalLabelJoin(args parser.Expressions) (parser.Value, annot
 		lb.Reset(el.Metric)
 		lb.Set(dst, strval)
 		matrix[i].Metric = lb.Labels()
+
+		if dst == model.MetricNameLabel {
+			matrix[i].DropName = false
+		} else {
+			matrix[i].DropName = el.DropName
+		}
 	}

 	return matrix, ws
@ -1508,9 +1578,13 @@ func dateWrapper(vals []parser.Value, enh *EvalNodeHelper, f func(time.Time) flo

 	for _, el := range vals[0].(Vector) {
 		t := time.Unix(int64(el.F), 0).UTC()
+		if !enh.enableDelayedNameRemoval {
+			el.Metric = el.Metric.DropMetricName()
+		}
 		enh.Out = append(enh.Out, Sample{
-			Metric: el.Metric.DropMetricName(),
+			Metric:   el.Metric,
 			F:        f(t),
+			DropName: true,
 		})
 	}
 	return enh.Out
--- a/promql/parser/ast.go
+++ b/promql/parser/ast.go
@ -352,8 +352,7 @@ func (f inspector) Visit(node Node, path []Node) (Visitor, error) {
 // f(node, path); node must not be nil. If f returns a nil error, Inspect invokes f
 // for all the non-nil children of node, recursively.
 func Inspect(node Node, f inspector) {
-	//nolint: errcheck
-	Walk(f, node, nil)
+	Walk(f, node, nil) //nolint:errcheck
 }

 // Children returns a list of all child nodes of a syntax tree node.
@ -419,7 +418,7 @@ func mergeRanges(first, last Node) posrange.PositionRange {
 	}
 }

-// Item implements the Node interface.
+// PositionRange implements the Node interface.
 // This makes it possible to call mergeRanges on them.
 func (i *Item) PositionRange() posrange.PositionRange {
 	return posrange.PositionRange{
--- a/promql/parser/lex.go
+++ b/promql/parser/lex.go
@ -617,6 +617,16 @@ func lexBuckets(l *Lexer) stateFn {
 		l.bracketOpen = false
 		l.emit(RIGHT_BRACKET)
 		return lexHistogram
+	case isAlpha(r):
+		// Current word is Inf or NaN.
+		word := l.input[l.start:l.pos]
+		if desc, ok := key[strings.ToLower(word)]; ok {
+			if desc == NUMBER {
+				l.emit(desc)
+				return lexStatements
+			}
+		}
+		return lexBuckets
 	default:
 		return l.errorf("invalid character in buckets description: %q", r)
 	}
--- a/promql/parser/lex_test.go
+++ b/promql/parser/lex_test.go
@ -639,6 +639,29 @@ var tests = []struct {
 				},
 				seriesDesc: true,
 			},
+			{
+				input: `{} {{buckets: [Inf NaN] schema:1}}`,
+				expected: []Item{
+					{LEFT_BRACE, 0, `{`},
+					{RIGHT_BRACE, 1, `}`},
+					{SPACE, 2, ` `},
+					{OPEN_HIST, 3, `{{`},
+					{BUCKETS_DESC, 5, `buckets`},
+					{COLON, 12, `:`},
+					{SPACE, 13, ` `},
+					{LEFT_BRACKET, 14, `[`},
+					{NUMBER, 15, `Inf`},
+					{SPACE, 18, ` `},
+					{NUMBER, 19, `NaN`},
+					{RIGHT_BRACKET, 22, `]`},
+					{SPACE, 23, ` `},
+					{SCHEMA_DESC, 24, `schema`},
+					{COLON, 30, `:`},
+					{NUMBER, 31, `1`},
+					{CLOSE_HIST, 32, `}}`},
+				},
+				seriesDesc: true,
+			},
 			{ // Series with sum as -Inf and count as NaN.
 				input: `{} {{buckets: [5 10 7] sum:Inf count:NaN}}`,
 				expected: []Item{
--- a/promql/parser/printer.go
+++ b/promql/parser/printer.go
@ -88,7 +88,7 @@ func (node *AggregateExpr) getAggOpStr() string {
 func joinLabels(ss []string) string {
 	for i, s := range ss {
 		// If the label is already quoted, don't quote it again.
-		if s[0] != '"' && s[0] != '\'' && s[0] != '`' && !model.IsValidLegacyMetricName(model.LabelValue(s)) {
+		if s[0] != '"' && s[0] != '\'' && s[0] != '`' && !model.IsValidLegacyMetricName(string(model.LabelValue(s))) {
 			ss[i] = fmt.Sprintf("\"%s\"", s)
 		}
 	}
--- a/promql/promqltest/test.go
+++ b/promql/promqltest/test.go
@ -90,6 +90,7 @@ func NewTestEngine(enablePerStepStats bool, lookbackDelta time.Duration, maxSamp
 		EnableNegativeOffset:     true,
 		EnablePerStepStats:       enablePerStepStats,
 		LookbackDelta:            lookbackDelta,
+		EnableDelayedNameRemoval: true,
 	})
 }

@ -769,7 +770,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error {
 					return fmt.Errorf("expected histogram value at index %v for %s to have timestamp %v, but it had timestamp %v (result has %s)", i, ev.metrics[hash], expected.T, actual.T, formatSeriesResult(s))
 				}

-				if !actual.H.Equals(expected.H.Compact(0)) {
+				if !compareNativeHistogram(expected.H.Compact(0), actual.H.Compact(0)) {
 					return fmt.Errorf("expected histogram value at index %v (t=%v) for %s to be %v, but got %v (result has %s)", i, actual.T, ev.metrics[hash], expected.H, actual.H, formatSeriesResult(s))
 				}
 			}
@ -804,7 +805,7 @@ func (ev *evalCmd) compareResult(result parser.Value) error {
 			if expH != nil && v.H == nil {
 				return fmt.Errorf("expected histogram %s for %s but got float value %v", HistogramTestExpression(expH), v.Metric, v.F)
 			}
-			if expH != nil && !expH.Compact(0).Equals(v.H) {
+			if expH != nil && !compareNativeHistogram(expH.Compact(0), v.H.Compact(0)) {
 				return fmt.Errorf("expected %v for %s but got %s", HistogramTestExpression(expH), v.Metric, HistogramTestExpression(v.H))
 			}
 			if !almost.Equal(exp0.Value, v.F, defaultEpsilon) {
@ -837,6 +838,121 @@ func (ev *evalCmd) compareResult(result parser.Value) error {
 	return nil
 }

+// compareNativeHistogram is helper function to compare two native histograms
+// which can tolerate some differ in the field of float type, such as Count, Sum.
+func compareNativeHistogram(exp, cur *histogram.FloatHistogram) bool {
+	if exp == nil || cur == nil {
+		return false
+	}
+
+	if exp.Schema != cur.Schema ||
+		!almost.Equal(exp.Count, cur.Count, defaultEpsilon) ||
+		!almost.Equal(exp.Sum, cur.Sum, defaultEpsilon) {
+		return false
+	}
+
+	if exp.UsesCustomBuckets() {
+		if !histogram.FloatBucketsMatch(exp.CustomValues, cur.CustomValues) {
+			return false
+		}
+	}
+
+	if exp.ZeroThreshold != cur.ZeroThreshold ||
+		!almost.Equal(exp.ZeroCount, cur.ZeroCount, defaultEpsilon) {
+		return false
+	}
+
+	if !spansMatch(exp.NegativeSpans, cur.NegativeSpans) {
+		return false
+	}
+	if !floatBucketsMatch(exp.NegativeBuckets, cur.NegativeBuckets) {
+		return false
+	}
+
+	if !spansMatch(exp.PositiveSpans, cur.PositiveSpans) {
+		return false
+	}
+	if !floatBucketsMatch(exp.PositiveBuckets, cur.PositiveBuckets) {
+		return false
+	}
+
+	return true
+}
+
+func floatBucketsMatch(b1, b2 []float64) bool {
+	if len(b1) != len(b2) {
+		return false
+	}
+	for i, b := range b1 {
+		if !almost.Equal(b, b2[i], defaultEpsilon) {
+			return false
+		}
+	}
+	return true
+}
+
+func spansMatch(s1, s2 []histogram.Span) bool {
+	if len(s1) == 0 && len(s2) == 0 {
+		return true
+	}
+
+	s1idx, s2idx := 0, 0
+	for {
+		if s1idx >= len(s1) {
+			return allEmptySpans(s2[s2idx:])
+		}
+		if s2idx >= len(s2) {
+			return allEmptySpans(s1[s1idx:])
+		}
+
+		currS1, currS2 := s1[s1idx], s2[s2idx]
+		s1idx++
+		s2idx++
+		if currS1.Length == 0 {
+			// This span is zero length, so we add consecutive such spans
+			// until we find a non-zero span.
+			for ; s1idx < len(s1) && s1[s1idx].Length == 0; s1idx++ {
+				currS1.Offset += s1[s1idx].Offset
+			}
+			if s1idx < len(s1) {
+				currS1.Offset += s1[s1idx].Offset
+				currS1.Length = s1[s1idx].Length
+				s1idx++
+			}
+		}
+		if currS2.Length == 0 {
+			// This span is zero length, so we add consecutive such spans
+			// until we find a non-zero span.
+			for ; s2idx < len(s2) && s2[s2idx].Length == 0; s2idx++ {
+				currS2.Offset += s2[s2idx].Offset
+			}
+			if s2idx < len(s2) {
+				currS2.Offset += s2[s2idx].Offset
+				currS2.Length = s2[s2idx].Length
+				s2idx++
+			}
+		}
+
+		if currS1.Length == 0 && currS2.Length == 0 {
+			// The last spans of both set are zero length. Previous spans match.
+			return true
+		}
+
+		if currS1.Offset != currS2.Offset || currS1.Length != currS2.Length {
+			return false
+		}
+	}
+}
+
+func allEmptySpans(s []histogram.Span) bool {
+	for _, ss := range s {
+		if ss.Length > 0 {
+			return false
+		}
+	}
+	return true
+}
+
 func (ev *evalCmd) checkExpectedFailure(actual error) error {
 	if ev.expectedFailMessage != "" {
 		if ev.expectedFailMessage != actual.Error() {
@ -1247,6 +1363,7 @@ func (ll *LazyLoader) clear() error {
 		NoStepSubqueryIntervalFn: func(int64) int64 { return durationMilliseconds(ll.SubqueryInterval) },
 		EnableAtModifier:         ll.opts.EnableAtModifier,
 		EnableNegativeOffset:     ll.opts.EnableNegativeOffset,
+		EnableDelayedNameRemoval: true,
 	}

 	ll.queryEngine = promql.NewEngine(opts)
--- a/promql/promqltest/testdata/functions.test
+++ b/promql/promqltest/testdata/functions.test
@ -534,16 +534,16 @@ load 5m
 	node_uname_info{job="node_exporter", instance="4m1000", release="1.111.3"} 0+10x10

 eval_ordered instant at 50m sort_by_label(http_requests, "instance")
-	http_requests{group="production", instance="0", job="api-server"} 100
 	http_requests{group="canary", instance="0", job="api-server"} 300
-	http_requests{group="production", instance="0", job="app-server"} 500
 	http_requests{group="canary", instance="0", job="app-server"} 700
-	http_requests{group="production", instance="1", job="api-server"} 200
+	http_requests{group="production", instance="0", job="api-server"} 100
+	http_requests{group="production", instance="0", job="app-server"} 500
 	http_requests{group="canary", instance="1", job="api-server"} 400
-	http_requests{group="production", instance="1", job="app-server"} 600
 	http_requests{group="canary", instance="1", job="app-server"} 800
-	http_requests{group="production", instance="2", job="api-server"} 100
+	http_requests{group="production", instance="1", job="api-server"} 200
+	http_requests{group="production", instance="1", job="app-server"} 600
 	http_requests{group="canary", instance="2", job="api-server"} NaN
+	http_requests{group="production", instance="2", job="api-server"} 100

 eval_ordered instant at 50m sort_by_label(http_requests, "instance", "group")
 	http_requests{group="canary", instance="0", job="api-server"} 300
@ -596,14 +596,14 @@ eval_ordered instant at 50m sort_by_label(http_requests, "job", "instance", "gro
 eval_ordered instant at 50m sort_by_label_desc(http_requests, "instance")
 	http_requests{group="production", instance="2", job="api-server"} 100
 	http_requests{group="canary", instance="2", job="api-server"} NaN
-	http_requests{group="canary", instance="1", job="app-server"} 800
 	http_requests{group="production", instance="1", job="app-server"} 600
-	http_requests{group="canary", instance="1", job="api-server"} 400
 	http_requests{group="production", instance="1", job="api-server"} 200
-	http_requests{group="canary", instance="0", job="app-server"} 700
+	http_requests{group="canary", instance="1", job="app-server"} 800
+	http_requests{group="canary", instance="1", job="api-server"} 400
 	http_requests{group="production", instance="0", job="app-server"} 500
-	http_requests{group="canary", instance="0", job="api-server"} 300
 	http_requests{group="production", instance="0", job="api-server"} 100
+	http_requests{group="canary", instance="0", job="app-server"} 700
+	http_requests{group="canary", instance="0", job="api-server"} 300

 eval_ordered instant at 50m sort_by_label_desc(http_requests, "instance", "group")
 	http_requests{group="production", instance="2", job="api-server"} 100
--- a/promql/promqltest/testdata/name_label_dropping.test
+++ b/promql/promqltest/testdata/name_label_dropping.test
@ -0,0 +1,84 @@
+# Test for __name__ label drop.
+load 5m
+	metric{env="1"}	0 60 120
+	another_metric{env="1"}	60 120 180
+
+# Does not drop __name__ for vector selector
+eval instant at 15m metric{env="1"}
+	metric{env="1"} 120
+
+# Drops __name__ for unary operators
+eval instant at 15m -metric
+	{env="1"} -120
+
+# Drops __name__ for binary operators
+eval instant at 15m metric + another_metric
+	{env="1"} 300
+
+# Does not drop __name__ for binary comparison operators
+eval instant at 15m metric <= another_metric
+	metric{env="1"} 120
+
+# Drops __name__ for binary comparison operators with "bool" modifier
+eval instant at 15m metric <= bool another_metric
+	{env="1"} 1
+
+# Drops __name__ for vector-scalar operations
+eval instant at 15m metric * 2
+	{env="1"} 240
+
+# Drops __name__ for instant-vector functions
+eval instant at 15m clamp(metric, 0, 100)
+	{env="1"} 100
+
+# Drops __name__ for range-vector functions
+eval instant at 15m rate(metric{env="1"}[10m])
+	{env="1"} 0.2
+
+# Does not drop __name__ for last_over_time function
+eval instant at 15m last_over_time(metric{env="1"}[10m])
+	metric{env="1"} 120
+
+# Drops name for other _over_time functions
+eval instant at 15m max_over_time(metric{env="1"}[10m])
+	{env="1"} 120
+
+# Allows relabeling (to-be-dropped) __name__  via label_replace
+eval instant at 15m label_replace(rate({env="1"}[10m]), "my_name", "rate_$1", "__name__", "(.+)")
+	{my_name="rate_metric", env="1"} 0.2
+	{my_name="rate_another_metric", env="1"} 0.2
+
+# Allows preserving __name__ via label_replace
+eval instant at 15m label_replace(rate({env="1"}[10m]), "__name__", "rate_$1", "__name__", "(.+)")
+	rate_metric{env="1"} 0.2
+	rate_another_metric{env="1"} 0.2
+
+# Allows relabeling (to-be-dropped) __name__  via label_join
+eval instant at 15m label_join(rate({env="1"}[10m]), "my_name", "_", "__name__")
+	{my_name="metric", env="1"} 0.2
+	{my_name="another_metric", env="1"} 0.2
+
+# Allows preserving __name__ via label_join
+eval instant at 15m label_join(rate({env="1"}[10m]), "__name__", "_", "__name__", "env")
+	metric_1{env="1"} 0.2
+	another_metric_1{env="1"} 0.2
+
+# Does not drop metric names fro aggregation operators
+eval instant at 15m sum by (__name__, env) (metric{env="1"})
+	metric{env="1"} 120
+
+# Aggregation operators by __name__ lead to duplicate labelset errors (aggregation is partitioned by not yet removed __name__ label)
+# This is an accidental side effect of delayed __name__ label dropping
+eval_fail instant at 15m sum by (__name__) (rate({env="1"}[10m]))
+
+# Aggregation operators aggregate metrics with same labelset and to-be-dropped names
+# This is an accidental side effect of delayed __name__ label dropping
+eval instant at 15m sum(rate({env="1"}[10m])) by (env)
+	{env="1"} 0.4
+
+# Aggregationk operators propagate __name__ label dropping information
+eval instant at 15m topk(10, sum by (__name__, env) (metric{env="1"}))
+	metric{env="1"} 120
+
+eval instant at 15m topk(10, sum by (__name__, env) (rate(metric{env="1"}[10m])))
+	{env="1"} 0.2
--- a/promql/promqltest/testdata/native_histograms.test
+++ b/promql/promqltest/testdata/native_histograms.test
@ -750,6 +750,52 @@ eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_4)
 eval instant at 10m histogram_sum(scalar(histogram_fraction(-Inf, +Inf, sum(histogram_fraction_4))) * histogram_fraction_4)
    {} 100

+# Apply multiplication and division operator to histogram.
+load 10m
+    histogram_mul_div {{schema:0 count:21 sum:33 z_bucket:3 z_bucket_w:0.001 buckets:[3 3 3] n_buckets:[3 3 3]}}x1
+    float_series_3 3+0x1
+    float_series_0 0+0x1
+
+eval instant at 10m histogram_mul_div*3
+    {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}}
+
+eval instant at 10m 3*histogram_mul_div
+    {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}}
+
+eval instant at 10m histogram_mul_div*float_series_3
+    {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}}
+
+eval instant at 10m float_series_3*histogram_mul_div
+    {} {{schema:0 count:63 sum:99 z_bucket:9 z_bucket_w:0.001 buckets:[9 9 9] n_buckets:[9 9 9]}}
+
+eval instant at 10m histogram_mul_div/3
+    {} {{schema:0 count:7 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[1 1 1]}}
+
+eval instant at 10m histogram_mul_div/float_series_3
+    {} {{schema:0 count:7 sum:11 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 1] n_buckets:[1 1 1]}}
+
+eval instant at 10m histogram_mul_div*0
+    {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}}
+
+eval instant at 10m 0*histogram_mul_div
+    {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}}
+
+eval instant at 10m histogram_mul_div*float_series_0
+    {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}}
+
+eval instant at 10m float_series_0*histogram_mul_div
+    {} {{schema:0 count:0 sum:0 z_bucket:0 z_bucket_w:0.001 buckets:[0 0 0] n_buckets:[0 0 0]}}
+
+# TODO: (NeerajGartia21) remove all the histogram buckets in case of division with zero. See: https://github.com/prometheus/prometheus/issues/13934
+eval instant at 10m histogram_mul_div/0
+    {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}}
+
+eval instant at 10m histogram_mul_div/float_series_0
+    {} {{schema:0 count:Inf sum:Inf z_bucket:Inf z_bucket_w:0.001 buckets:[Inf Inf Inf] n_buckets:[Inf Inf Inf]}}
+
+eval instant at 10m histogram_mul_div*0/0
+    {} {{schema:0 count:NaN sum:NaN z_bucket:NaN z_bucket_w:0.001 buckets:[NaN NaN NaN] n_buckets:[NaN NaN NaN]}}
+
 clear

 # Counter reset only noticeable in a single bucket.
@ -918,3 +964,39 @@ eval_warn instant at 0 sum by (group) (metric)
  {group="just-floats"} 5
  {group="just-exponential-histograms"} {{sum:5 count:7 buckets:[2 3 2]}}
  {group="just-custom-histograms"} {{schema:-53 sum:4 count:5 custom_values:[2] buckets:[8]}}
+
+clear
+
+# Test native histograms with sum, count, avg.
+load 10m
+    histogram_sum{idx="0"} {{schema:0 count:25 sum:1234.5 z_bucket:4 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[2 4 0 0 1 9]}}x1
+    histogram_sum{idx="1"} {{schema:0 count:41 sum:2345.6 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}}x1
+    histogram_sum{idx="2"} {{schema:0 count:41 sum:1111.1 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}}x1
+    histogram_sum{idx="3"} {{schema:1 count:0}}x1
+    histogram_sum_float{idx="0"} 42.0x1
+
+eval instant at 10m sum(histogram_sum)
+    {} {{schema:0 count:107 sum:4691.2 z_bucket:14 z_bucket_w:0.001 buckets:[3 8 2 5 3 2 2] n_buckets:[2 6 8 4 15 9 0 0 0 10 10 4]}}
+
+eval_warn instant at 10m sum({idx="0"})
+
+eval instant at 10m sum(histogram_sum{idx="0"} + ignoring(idx) histogram_sum{idx="1"} + ignoring(idx) histogram_sum{idx="2"} + ignoring(idx) histogram_sum{idx="3"})
+    {} {{schema:0 count:107 sum:4691.2 z_bucket:14 z_bucket_w:0.001 buckets:[3 8 2 5 3 2 2] n_buckets:[2 6 8 4 15 9 0 0 0 10 10 4]}}
+
+eval instant at 10m count(histogram_sum)
+    {} 4
+
+eval instant at 10m avg(histogram_sum)
+    {} {{schema:0 count:26.75 sum:1172.8 z_bucket:3.5 z_bucket_w:0.001 buckets:[0.75 2 0.5 1.25 0.75 0.5 0.5] n_buckets:[0.5 1.5 2 1 3.75 2.25 0 0 0 2.5 2.5 1]}}
+
+clear
+
+# Test native histograms with sum_over_time, avg_over_time.
+load 1m
+    histogram_sum_over_time {{schema:0 count:25 sum:1234.5 z_bucket:4 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[2 4 0 0 1 9]}} {{schema:0 count:41 sum:2345.6 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}} {{schema:0 count:41 sum:1111.1 z_bucket:5 z_bucket_w:0.001 buckets:[1 3 1 2 1 1 1] n_buckets:[0 1 4 2 7 0 0 0 0 5 5 2]}} {{schema:1 count:0}}
+
+eval instant at 3m sum_over_time(histogram_sum_over_time[3m:1m])
+    {} {{schema:0 count:107 sum:4691.2 z_bucket:14 z_bucket_w:0.001 buckets:[3 8 2 5 3 2 2] n_buckets:[2 6 8 4 15 9 0 0 0 10 10 4]}}
+
+eval instant at 3m avg_over_time(histogram_sum_over_time[3m:1m])
+    {} {{schema:0 count:26.75 sum:1172.8 z_bucket:3.5 z_bucket_w:0.001 buckets:[0.75 2 0.5 1.25 0.75 0.5 0.5] n_buckets:[0.5 1.5 2 1 3.75 2.25 0 0 0 2.5 2.5 1]}}
--- a/promql/value.go
+++ b/promql/value.go
@ -68,6 +68,9 @@ type Series struct {
 	Metric     labels.Labels `json:"metric"`
 	Floats     []FPoint      `json:"values,omitempty"`
 	Histograms []HPoint      `json:"histograms,omitempty"`
+	// DropName is used to indicate whether the __name__ label should be dropped
+	// as part of the query evaluation.
+	DropName bool `json:"-"`
 }

 func (s Series) String() string {
@ -194,6 +197,9 @@ type Sample struct {
 	H *histogram.FloatHistogram

 	Metric labels.Labels
+	// DropName is used to indicate whether the __name__ label should be dropped
+	// as part of the query evaluation.
+	DropName bool
 }

 func (s Sample) String() string {
--- a/rules/alerting_test.go
+++ b/rules/alerting_test.go
@ -648,7 +648,7 @@ func TestAlertingRuleLimit(t *testing.T) {
 		case err != nil:
 			require.EqualError(t, err, test.err)
 		case test.err != "":
-			t.Errorf("Expected errror %s, got none", test.err)
+			t.Errorf("Expected error %s, got none", test.err)
 		}
 	}
 }
--- a/scrape/clientprotobuf.go
+++ b/scrape/clientprotobuf.go
@ -23,7 +23,7 @@ import (
 	dto "github.com/prometheus/client_model/go"
 )

-// Write a MetricFamily into a protobuf.
+// MetricFamilyToProtobuf writes a MetricFamily into a protobuf.
 // This function is intended for testing scraping by providing protobuf serialized input.
 func MetricFamilyToProtobuf(metricFamily *dto.MetricFamily) ([]byte, error) {
 	buffer := &bytes.Buffer{}
@ -34,7 +34,7 @@ func MetricFamilyToProtobuf(metricFamily *dto.MetricFamily) ([]byte, error) {
 	return buffer.Bytes(), nil
 }

-// Append a MetricFamily protobuf representation to a buffer.
+// AddMetricFamilyToProtobuf appends a MetricFamily protobuf representation to a buffer.
 // This function is intended for testing scraping by providing protobuf serialized input.
 func AddMetricFamilyToProtobuf(buffer *bytes.Buffer, metricFamily *dto.MetricFamily) error {
 	protoBuf, err := proto.Marshal(metricFamily)
--- a/scrape/manager.go
+++ b/scrape/manager.go
@ -142,7 +142,7 @@ func (m *Manager) UnregisterMetrics() {

 func (m *Manager) reloader() {
 	reloadIntervalDuration := m.opts.DiscoveryReloadInterval
-	if reloadIntervalDuration < model.Duration(5*time.Second) {
+	if reloadIntervalDuration == model.Duration(0) {
 		reloadIntervalDuration = model.Duration(5 * time.Second)
 	}

--- a/scrape/manager_test.go
+++ b/scrape/manager_test.go
@ -20,6 +20,7 @@ import (
 	"net/http/httptest"
 	"net/url"
 	"os"
+	"sort"
 	"strconv"
 	"sync"
 	"testing"
@ -36,6 +37,7 @@ import (

 	"github.com/prometheus/prometheus/config"
 	"github.com/prometheus/prometheus/discovery"
+	_ "github.com/prometheus/prometheus/discovery/file"
 	"github.com/prometheus/prometheus/discovery/targetgroup"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/model/relabel"
@ -722,8 +724,6 @@ func TestManagerCTZeroIngestion(t *testing.T) {
 		name                  string
 		counterSample         *dto.Counter
 		enableCTZeroIngestion bool
-
-		expectedValues []float64
 	}{
 		{
 			name: "disabled with CT on counter",
@ -732,7 +732,6 @@ func TestManagerCTZeroIngestion(t *testing.T) {
 				// Timestamp does not matter as long as it exists in this test.
 				CreatedTimestamp: timestamppb.Now(),
 			},
-			expectedValues: []float64{1.0},
 		},
 		{
 			name: "enabled with CT on counter",
@ -742,7 +741,6 @@ func TestManagerCTZeroIngestion(t *testing.T) {
 				CreatedTimestamp: timestamppb.Now(),
 			},
 			enableCTZeroIngestion: true,
-			expectedValues:        []float64{0.0, 1.0},
 		},
 		{
 			name: "enabled without CT on counter",
@ -750,7 +748,6 @@ func TestManagerCTZeroIngestion(t *testing.T) {
 				Value: proto.Float64(1.0),
 			},
 			enableCTZeroIngestion: true,
-			expectedValues:        []float64{1.0},
 		},
 	} {
 		t.Run(tc.name, func(t *testing.T) {
@ -817,44 +814,42 @@ func TestManagerCTZeroIngestion(t *testing.T) {
 			})
 			scrapeManager.reload()

+			var got []float64
 			// Wait for one scrape.
 			ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
 			defer cancel()
 			require.NoError(t, runutil.Retry(100*time.Millisecond, ctx.Done(), func() error {
-				if countFloatSamples(app, mName) != len(tc.expectedValues) {
-					return fmt.Errorf("expected %v samples", tc.expectedValues)
-				}
-				return nil
-			}), "after 1 minute")
-			scrapeManager.Stop()
-
-			require.Equal(t, tc.expectedValues, getResultFloats(app, mName))
-		})
-	}
-}
-
-func countFloatSamples(a *collectResultAppender, expectedMetricName string) (count int) {
-	a.mtx.Lock()
-	defer a.mtx.Unlock()
-
-	for _, f := range a.resultFloats {
-		if f.metric.Get(model.MetricNameLabel) == expectedMetricName {
-			count++
-		}
-	}
-	return count
-}
-
-func getResultFloats(app *collectResultAppender, expectedMetricName string) (result []float64) {
 				app.mtx.Lock()
 				defer app.mtx.Unlock()

+				// Check if scrape happened and grab the relevant samples, they have to be there - or it's a bug
+				// and it's not worth waiting.
 				for _, f := range app.resultFloats {
-		if f.metric.Get(model.MetricNameLabel) == expectedMetricName {
-			result = append(result, f.f)
+					if f.metric.Get(model.MetricNameLabel) == mName {
+						got = append(got, f.f)
 					}
 				}
-	return result
+				if len(app.resultFloats) > 0 {
+					return nil
+				}
+				return fmt.Errorf("expected some samples, got none")
+			}), "after 1 minute")
+			scrapeManager.Stop()
+
+			// Check for zero samples, assuming we only injected always one sample.
+			// Did it contain CT to inject? If yes, was CT zero enabled?
+			if tc.counterSample.CreatedTimestamp.IsValid() && tc.enableCTZeroIngestion {
+				require.Len(t, got, 2)
+				require.Equal(t, 0.0, got[0])
+				require.Equal(t, tc.counterSample.GetValue(), got[1])
+				return
+			}
+
+			// Expect only one, valid sample.
+			require.Len(t, got, 1)
+			require.Equal(t, tc.counterSample.GetValue(), got[0])
+		})
+	}
 }

 func TestUnregisterMetrics(t *testing.T) {
@ -869,3 +864,414 @@ func TestUnregisterMetrics(t *testing.T) {
 		manager.UnregisterMetrics()
 	}
 }
+
+func applyConfig(
+	t *testing.T,
+	config string,
+	scrapeManager *Manager,
+	discoveryManager *discovery.Manager,
+) {
+	t.Helper()
+
+	cfg := loadConfiguration(t, config)
+	require.NoError(t, scrapeManager.ApplyConfig(cfg))
+
+	c := make(map[string]discovery.Configs)
+	scfgs, err := cfg.GetScrapeConfigs()
+	require.NoError(t, err)
+	for _, v := range scfgs {
+		c[v.JobName] = v.ServiceDiscoveryConfigs
+	}
+	require.NoError(t, discoveryManager.ApplyConfig(c))
+}
+
+func runManagers(t *testing.T, ctx context.Context) (*discovery.Manager, *Manager) {
+	t.Helper()
+
+	reg := prometheus.NewRegistry()
+	sdMetrics, err := discovery.RegisterSDMetrics(reg, discovery.NewRefreshMetrics(reg))
+	require.NoError(t, err)
+	discoveryManager := discovery.NewManager(
+		ctx,
+		log.NewNopLogger(),
+		reg,
+		sdMetrics,
+		discovery.Updatert(100*time.Millisecond),
+	)
+	scrapeManager, err := NewManager(
+		&Options{DiscoveryReloadInterval: model.Duration(100 * time.Millisecond)},
+		nil,
+		nopAppendable{},
+		prometheus.NewRegistry(),
+	)
+	require.NoError(t, err)
+	go discoveryManager.Run()
+	go scrapeManager.Run(discoveryManager.SyncCh())
+	return discoveryManager, scrapeManager
+}
+
+func writeIntoFile(t *testing.T, content, filePattern string) *os.File {
+	t.Helper()
+
+	file, err := os.CreateTemp("", filePattern)
+	require.NoError(t, err)
+	_, err = file.WriteString(content)
+	require.NoError(t, err)
+	return file
+}
+
+func requireTargets(
+	t *testing.T,
+	scrapeManager *Manager,
+	jobName string,
+	waitToAppear bool,
+	expectedTargets []string,
+) {
+	t.Helper()
+
+	require.Eventually(t, func() bool {
+		targets, ok := scrapeManager.TargetsActive()[jobName]
+		if !ok {
+			if waitToAppear {
+				return false
+			}
+			t.Fatalf("job %s shouldn't be dropped", jobName)
+		}
+		if expectedTargets == nil {
+			return targets == nil
+		}
+		if len(targets) != len(expectedTargets) {
+			return false
+		}
+		sTargets := []string{}
+		for _, t := range targets {
+			sTargets = append(sTargets, t.String())
+		}
+		sort.Strings(expectedTargets)
+		sort.Strings(sTargets)
+		for i, t := range sTargets {
+			if t != expectedTargets[i] {
+				return false
+			}
+		}
+		return true
+	}, 1*time.Second, 100*time.Millisecond)
+}
+
+// TestTargetDisappearsAfterProviderRemoved makes sure that when a provider is dropped, (only) its targets are dropped.
+func TestTargetDisappearsAfterProviderRemoved(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	myJob := "my-job"
+	myJobSDTargetURL := "my:9876"
+	myJobStaticTargetURL := "my:5432"
+
+	sdFileContent := fmt.Sprintf(`[{"targets": ["%s"]}]`, myJobSDTargetURL)
+	sDFile := writeIntoFile(t, sdFileContent, "*targets.json")
+
+	baseConfig := `
+scrape_configs:
+- job_name: %s
+  static_configs:
+  - targets: ['%s']
+  file_sd_configs:
+  - files: ['%s']
+`
+
+	discoveryManager, scrapeManager := runManagers(t, ctx)
+	defer scrapeManager.Stop()
+
+	applyConfig(
+		t,
+		fmt.Sprintf(
+			baseConfig,
+			myJob,
+			myJobStaticTargetURL,
+			sDFile.Name(),
+		),
+		scrapeManager,
+		discoveryManager,
+	)
+	// Make sure the jobs targets are taken into account
+	requireTargets(
+		t,
+		scrapeManager,
+		myJob,
+		true,
+		[]string{
+			fmt.Sprintf("http://%s/metrics", myJobSDTargetURL),
+			fmt.Sprintf("http://%s/metrics", myJobStaticTargetURL),
+		},
+	)
+
+	// Apply a new config where a provider is removed
+	baseConfig = `
+scrape_configs:
+- job_name: %s
+  static_configs:
+  - targets: ['%s']
+`
+	applyConfig(
+		t,
+		fmt.Sprintf(
+			baseConfig,
+			myJob,
+			myJobStaticTargetURL,
+		),
+		scrapeManager,
+		discoveryManager,
+	)
+	// Make sure the corresponding target was dropped
+	requireTargets(
+		t,
+		scrapeManager,
+		myJob,
+		false,
+		[]string{
+			fmt.Sprintf("http://%s/metrics", myJobStaticTargetURL),
+		},
+	)
+
+	// Apply a new config with no providers
+	baseConfig = `
+scrape_configs:
+- job_name: %s
+`
+	applyConfig(
+		t,
+		fmt.Sprintf(
+			baseConfig,
+			myJob,
+		),
+		scrapeManager,
+		discoveryManager,
+	)
+	// Make sure the corresponding target was dropped
+	requireTargets(
+		t,
+		scrapeManager,
+		myJob,
+		false,
+		nil,
+	)
+}
+
+// TestOnlyProviderStaleTargetsAreDropped makes sure that when a job has only one provider with multiple targets
+// and when the provider can no longer discover some of those targets, only those stale targets are dropped.
+func TestOnlyProviderStaleTargetsAreDropped(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	jobName := "my-job"
+	jobTarget1URL := "foo:9876"
+	jobTarget2URL := "foo:5432"
+
+	sdFile1Content := fmt.Sprintf(`[{"targets": ["%s"]}]`, jobTarget1URL)
+	sdFile2Content := fmt.Sprintf(`[{"targets": ["%s"]}]`, jobTarget2URL)
+	sDFile1 := writeIntoFile(t, sdFile1Content, "*targets.json")
+	sDFile2 := writeIntoFile(t, sdFile2Content, "*targets.json")
+
+	baseConfig := `
+scrape_configs:
+- job_name: %s
+  file_sd_configs:
+  - files: ['%s', '%s']
+`
+	discoveryManager, scrapeManager := runManagers(t, ctx)
+	defer scrapeManager.Stop()
+
+	applyConfig(
+		t,
+		fmt.Sprintf(baseConfig, jobName, sDFile1.Name(), sDFile2.Name()),
+		scrapeManager,
+		discoveryManager,
+	)
+
+	// Make sure the job's targets are taken into account
+	requireTargets(
+		t,
+		scrapeManager,
+		jobName,
+		true,
+		[]string{
+			fmt.Sprintf("http://%s/metrics", jobTarget1URL),
+			fmt.Sprintf("http://%s/metrics", jobTarget2URL),
+		},
+	)
+
+	// Apply the same config for the same job but with a non existing file to make the provider
+	// unable to discover some targets
+	applyConfig(
+		t,
+		fmt.Sprintf(baseConfig, jobName, sDFile1.Name(), "/idontexistdoi.json"),
+		scrapeManager,
+		discoveryManager,
+	)
+
+	// The old target should get dropped
+	requireTargets(
+		t,
+		scrapeManager,
+		jobName,
+		false,
+		[]string{fmt.Sprintf("http://%s/metrics", jobTarget1URL)},
+	)
+}
+
+// TestProviderStaleTargetsAreDropped makes sure that when a job has only one provider and when that provider
+// should no longer discover targets, the targets of that provider are dropped.
+// See: https://github.com/prometheus/prometheus/issues/12858
+func TestProviderStaleTargetsAreDropped(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	jobName := "my-job"
+	jobTargetURL := "foo:9876"
+
+	sdFileContent := fmt.Sprintf(`[{"targets": ["%s"]}]`, jobTargetURL)
+	sDFile := writeIntoFile(t, sdFileContent, "*targets.json")
+
+	baseConfig := `
+scrape_configs:
+- job_name: %s
+  file_sd_configs:
+  - files: ['%s']
+`
+	discoveryManager, scrapeManager := runManagers(t, ctx)
+	defer scrapeManager.Stop()
+
+	applyConfig(
+		t,
+		fmt.Sprintf(baseConfig, jobName, sDFile.Name()),
+		scrapeManager,
+		discoveryManager,
+	)
+
+	// Make sure the job's targets are taken into account
+	requireTargets(
+		t,
+		scrapeManager,
+		jobName,
+		true,
+		[]string{
+			fmt.Sprintf("http://%s/metrics", jobTargetURL),
+		},
+	)
+
+	// Apply the same config for the same job but with a non existing file to make the provider
+	// unable to discover some targets
+	applyConfig(
+		t,
+		fmt.Sprintf(baseConfig, jobName, "/idontexistdoi.json"),
+		scrapeManager,
+		discoveryManager,
+	)
+
+	// The old target should get dropped
+	requireTargets(
+		t,
+		scrapeManager,
+		jobName,
+		false,
+		nil,
+	)
+}
+
+// TestOnlyStaleTargetsAreDropped makes sure that when a job has multiple providers, when aone of them should no,
+// longer discover targets, only the stale targets of that provier are dropped.
+func TestOnlyStaleTargetsAreDropped(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	myJob := "my-job"
+	myJobSDTargetURL := "my:9876"
+	myJobStaticTargetURL := "my:5432"
+	otherJob := "other-job"
+	otherJobTargetURL := "other:1234"
+
+	sdFileContent := fmt.Sprintf(`[{"targets": ["%s"]}]`, myJobSDTargetURL)
+	sDFile := writeIntoFile(t, sdFileContent, "*targets.json")
+
+	baseConfig := `
+scrape_configs:
+- job_name: %s
+  static_configs:
+  - targets: ['%s']
+  file_sd_configs:
+  - files: ['%s']
+- job_name: %s
+  static_configs:
+  - targets: ['%s']
+`
+
+	discoveryManager, scrapeManager := runManagers(t, ctx)
+	defer scrapeManager.Stop()
+
+	// Apply the initial config with an existing file
+	applyConfig(
+		t,
+		fmt.Sprintf(
+			baseConfig,
+			myJob,
+			myJobStaticTargetURL,
+			sDFile.Name(),
+			otherJob,
+			otherJobTargetURL,
+		),
+		scrapeManager,
+		discoveryManager,
+	)
+	// Make sure the jobs targets are taken into account
+	requireTargets(
+		t,
+		scrapeManager,
+		myJob,
+		true,
+		[]string{
+			fmt.Sprintf("http://%s/metrics", myJobSDTargetURL),
+			fmt.Sprintf("http://%s/metrics", myJobStaticTargetURL),
+		},
+	)
+	requireTargets(
+		t,
+		scrapeManager,
+		otherJob,
+		true,
+		[]string{fmt.Sprintf("http://%s/metrics", otherJobTargetURL)},
+	)
+
+	// Apply the same config with a non existing file for myJob
+	applyConfig(
+		t,
+		fmt.Sprintf(
+			baseConfig,
+			myJob,
+			myJobStaticTargetURL,
+			"/idontexistdoi.json",
+			otherJob,
+			otherJobTargetURL,
+		),
+		scrapeManager,
+		discoveryManager,
+	)
+
+	// Only the SD target should get dropped for myJob
+	requireTargets(
+		t,
+		scrapeManager,
+		myJob,
+		false,
+		[]string{
+			fmt.Sprintf("http://%s/metrics", myJobStaticTargetURL),
+		},
+	)
+	// The otherJob should keep its target
+	requireTargets(
+		t,
+		scrapeManager,
+		otherJob,
+		false,
+		[]string{fmt.Sprintf("http://%s/metrics", otherJobTargetURL)},
+	)
+}
--- a/scrape/scrape.go
+++ b/scrape/scrape.go
@ -111,6 +111,7 @@ type scrapeLoopOptions struct {
 	interval                 time.Duration
 	timeout                  time.Duration
 	scrapeClassicHistograms  bool
+	validationScheme         model.ValidationScheme

 	mrc               []*relabel.Config
 	cache             *scrapeCache
@ -186,6 +187,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed
 			options.PassMetadataInContext,
 			metrics,
 			options.skipOffsetting,
+			opts.validationScheme,
 		)
 	}
 	sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit))
@ -346,6 +348,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) {
 				cache:                    cache,
 				interval:                 interval,
 				timeout:                  timeout,
+				validationScheme:         validationScheme,
 			})
 		)
 		if err != nil {
@ -853,6 +856,7 @@ type scrapeLoop struct {
 	interval                 time.Duration
 	timeout                  time.Duration
 	scrapeClassicHistograms  bool
+	validationScheme         model.ValidationScheme

 	// Feature flagged options.
 	enableNativeHistogramIngestion bool
@ -1160,6 +1164,7 @@ func newScrapeLoop(ctx context.Context,
 	passMetadataInContext bool,
 	metrics *scrapeMetrics,
 	skipOffsetting bool,
+	validationScheme model.ValidationScheme,
 ) *scrapeLoop {
 	if l == nil {
 		l = log.NewNopLogger()
@ -1211,6 +1216,7 @@ func newScrapeLoop(ctx context.Context,
 		appendMetadataToWAL:            appendMetadataToWAL,
 		metrics:                        metrics,
 		skipOffsetting:                 skipOffsetting,
+		validationScheme:               validationScheme,
 	}
 	sl.ctx, sl.cancel = context.WithCancel(ctx)

@ -1631,7 +1637,7 @@ loop:
 				err = errNameLabelMandatory
 				break loop
 			}
-			if !lset.IsValid() {
+			if !lset.IsValid(sl.validationScheme) {
 				err = fmt.Errorf("invalid metric name or label names: %s", lset.String())
 				break loop
 			}
@ -1646,7 +1652,7 @@ loop:
 			updateMetadata(lset, true)
 		}

-		if seriesAlreadyScraped {
+		if seriesAlreadyScraped && parsedTimestamp == nil {
 			err = storage.ErrDuplicateSampleForTimestamp
 		} else {
 			if ctMs := p.CreatedTimestamp(); sl.enableCTZeroIngestion && ctMs != nil {
--- a/scrape/scrape_test.go
+++ b/scrape/scrape_test.go
@ -35,6 +35,7 @@ import (
 	"github.com/gogo/protobuf/proto"
 	"github.com/google/go-cmp/cmp"
 	"github.com/prometheus/client_golang/prometheus"
+	prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
 	dto "github.com/prometheus/client_model/go"
 	config_util "github.com/prometheus/common/config"
 	"github.com/prometheus/common/model"
@ -683,6 +684,7 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app
 		false,
 		newTestScrapeMetrics(t),
 		false,
+		model.LegacyValidation,
 	)
 }

@ -825,6 +827,7 @@ func TestScrapeLoopRun(t *testing.T) {
 		false,
 		scrapeMetrics,
 		false,
+		model.LegacyValidation,
 	)

 	// The loop must terminate during the initial offset if the context
@ -969,6 +972,7 @@ func TestScrapeLoopMetadata(t *testing.T) {
 		false,
 		scrapeMetrics,
 		false,
+		model.LegacyValidation,
 	)
 	defer cancel()

@ -1064,6 +1068,40 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) {
 	require.Equal(t, 0, seriesAdded)
 }

+func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) {
+	// Test that scrapes fail when default validation is utf8 but scrape config is
+	// legacy.
+	model.NameValidationScheme = model.UTF8Validation
+	defer func() {
+		model.NameValidationScheme = model.LegacyValidation
+	}()
+	s := teststorage.New(t)
+	defer s.Close()
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+
+	sl := newBasicScrapeLoop(t, ctx, &testScraper{}, s.Appender, 0)
+	sl.validationScheme = model.LegacyValidation
+
+	slApp := sl.appender(ctx)
+	total, added, seriesAdded, err := sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "", time.Time{})
+	require.ErrorContains(t, err, "invalid metric name or label names")
+	require.NoError(t, slApp.Rollback())
+	require.Equal(t, 1, total)
+	require.Equal(t, 0, added)
+	require.Equal(t, 0, seriesAdded)
+
+	// When scrapeloop has validation set to UTF-8, the metric is allowed.
+	sl.validationScheme = model.UTF8Validation
+
+	slApp = sl.appender(ctx)
+	total, added, seriesAdded, err = sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "", time.Time{})
+	require.NoError(t, err)
+	require.Equal(t, 1, total)
+	require.Equal(t, 1, added)
+	require.Equal(t, 1, seriesAdded)
+}
+
 func makeTestMetrics(n int) []byte {
 	// Construct a metrics string to parse
 	sb := bytes.Buffer{}
@ -3644,6 +3682,7 @@ func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) {
 	require.Equal(t, 3, total)
 	require.Equal(t, 3, added)
 	require.Equal(t, 1, seriesAdded)
+	require.Equal(t, 2.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate))

 	slApp = sl.appender(ctx)
 	total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1\ntest_metric 1\ntest_metric 1\n"), "", time.Time{})
@ -3652,12 +3691,18 @@ func TestScrapeLoopSeriesAddedDuplicates(t *testing.T) {
 	require.Equal(t, 3, total)
 	require.Equal(t, 3, added)
 	require.Equal(t, 0, seriesAdded)
+	require.Equal(t, 4.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate))

-	metric := dto.Metric{}
-	err = sl.metrics.targetScrapeSampleDuplicate.Write(&metric)
+	// When different timestamps are supplied, multiple samples are accepted.
+	slApp = sl.appender(ctx)
+	total, added, seriesAdded, err = sl.append(slApp, []byte("test_metric 1 1001\ntest_metric 1 1002\ntest_metric 1 1003\n"), "", time.Time{})
 	require.NoError(t, err)
-	value := metric.GetCounter().GetValue()
-	require.Equal(t, 4.0, value)
+	require.NoError(t, slApp.Commit())
+	require.Equal(t, 3, total)
+	require.Equal(t, 3, added)
+	require.Equal(t, 0, seriesAdded)
+	// Metric is not higher than last time.
+	require.Equal(t, 4.0, prom_testutil.ToFloat64(sl.metrics.targetScrapeSampleDuplicate))
 }

 // This tests running a full scrape loop and checking that the scrape option
--- a/scrape/testdata/ca.cer
+++ b/scrape/testdata/ca.cer
@ -1,3 +1,61 @@
+Certificate:
+    Data:
+        Version: 3 (0x2)
+        Serial Number:
+            93:6c:9e:29:8d:37:7b:66
+        Signature Algorithm: sha256WithRSAEncryption
+        Issuer: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA
+        Validity
+            Not Before: Aug 20 11:51:23 2024 GMT
+            Not After : Dec  5 11:51:23 2044 GMT
+        Subject: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA
+        Subject Public Key Info:
+            Public Key Algorithm: rsaEncryption
+                Public-Key: (2048 bit)
+                Modulus:
+                    00:e9:52:05:4d:f2:5a:95:04:2d:b8:73:8b:3c:e7:
+                    47:48:67:00:be:dd:6c:41:f3:7c:4b:44:73:a3:77:
+                    3e:84:af:30:d7:2a:ad:45:6a:b7:89:23:05:15:b3:
+                    aa:46:79:b8:95:64:cc:13:c4:44:a1:01:a0:e2:3d:
+                    a5:67:2b:aa:d3:13:06:43:33:1c:96:36:12:9e:c6:
+                    1d:36:9b:d7:47:bd:28:2d:88:15:04:fa:14:a3:ff:
+                    8c:26:22:c5:a2:15:c7:76:b3:11:f6:a3:44:9a:28:
+                    0f:ca:fb:f4:51:a8:6a:05:94:7c:77:47:c8:21:56:
+                    25:bf:e2:2e:df:33:f3:e4:bd:d6:47:a5:49:13:12:
+                    c8:1f:4c:d7:2a:56:a2:6c:c1:cf:55:05:5d:9a:75:
+                    a2:23:4e:e6:8e:ff:76:05:d6:e0:c8:0b:51:f0:b6:
+                    a1:b2:7d:8f:9c:6a:a5:ce:86:92:15:8c:5b:86:45:
+                    c0:4a:ff:54:b8:ee:cf:11:bd:07:cb:4b:7d:0b:a1:
+                    9d:72:86:9f:55:bc:f9:6c:d9:55:60:96:30:3f:ec:
+                    2d:f6:5f:9a:32:9a:5a:5c:1c:5f:32:f9:d6:0f:04:
+                    f8:81:08:04:9a:95:c3:9d:5a:30:8e:a5:0e:47:2f:
+                    00:ce:e0:2e:ad:5a:b8:b6:4c:55:7c:8a:59:22:b0:
+                    ed:73
+                Exponent: 65537 (0x10001)
+        X509v3 extensions:
+            X509v3 Subject Key Identifier: 
+                CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B
+            X509v3 Authority Key Identifier: 
+                CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B
+            X509v3 Basic Constraints: 
+                CA:TRUE
+    Signature Algorithm: sha256WithRSAEncryption
+    Signature Value:
+        4a:a1:b0:bc:c8:87:4f:7c:96:62:e5:09:29:ae:3a:2e:68:d0:
+        d2:c5:68:ed:ea:83:36:b1:86:f3:b9:e9:19:2b:b6:73:10:6f:
+        df:7f:bb:f1:76:81:03:c1:a1:5a:ee:6c:44:b8:7c:10:d1:5a:
+        d7:c1:92:64:59:35:a6:e0:aa:08:41:37:6e:e7:c8:b6:bd:0c:
+        4b:47:78:ec:c4:b4:15:a3:62:76:4a:39:8e:6e:19:ff:f0:c0:
+        8a:7e:1c:cd:87:e5:00:6c:f1:ce:27:26:ff:b8:e9:eb:f7:2f:
+        bd:c2:4b:9c:d6:57:de:74:74:b3:4f:03:98:9a:b5:08:2d:16:
+        ca:7f:b6:c8:76:62:86:1b:7c:f2:3e:6c:78:cc:2c:95:9a:bb:
+        77:25:e8:80:ff:9b:e8:f8:9a:85:3b:85:b7:17:4e:77:a1:cf:
+        4d:b9:d0:25:e8:5d:8c:e6:7c:f1:d9:52:30:3d:ec:2b:37:91:
+        bc:e2:e8:39:31:6f:3d:e9:98:70:80:7c:41:dd:19:13:05:21:
+        94:7b:16:cf:d8:ee:4e:38:34:5e:6a:ff:cd:85:ac:8f:94:9a:
+        dd:4e:77:05:13:a6:b4:80:52:b2:97:64:76:88:f4:dd:42:0a:
+        50:1c:80:fd:4b:6e:a9:62:10:aa:ef:2e:c1:2f:be:0e:c2:2e:
+        b5:28:5f:83
 -----BEGIN CERTIFICATE-----
 MIIDkTCCAnmgAwIBAgIJAJNsnimNN3tmMA0GCSqGSIb3DQEBCwUAMF8xCzAJBgNV
 BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg
--- a/scripts/golangci-lint.yml
+++ b/scripts/golangci-lint.yml
@ -36,4 +36,4 @@ jobs:
        uses: golangci/golangci-lint-action@aaa42aa0628b4ae2578232a66b541047968fac86 # v6.1.0
        with:
          args: --verbose
-          version: v1.60.1
+          version: v1.60.2
--- a/storage/interface.go
+++ b/storage/interface.go
@ -227,9 +227,9 @@ type LabelHints struct {
 	Limit int
 }

-// TODO(bwplotka): Move to promql/engine_test.go?
 // QueryableFunc is an adapter to allow the use of ordinary functions as
 // Queryables. It follows the idea of http.HandlerFunc.
+// TODO(bwplotka): Move to promql/engine_test.go?
 type QueryableFunc func(mint, maxt int64) (Querier, error)

 // Querier calls f() with the given parameters.
--- a/storage/remote/azuread/azuread.go
+++ b/storage/remote/azuread/azuread.go
@ -31,13 +31,15 @@ import (
 	"github.com/google/uuid"
 )

-const (
 // Clouds.
+const (
 	AzureChina      = "AzureChina"
 	AzureGovernment = "AzureGovernment"
 	AzurePublic     = "AzurePublic"
+)

 // Audiences.
+const (
 	IngestionChinaAudience      = "https://monitor.azure.cn//.default"
 	IngestionGovernmentAudience = "https://monitor.azure.us//.default"
 	IngestionPublicAudience     = "https://monitor.azure.com//.default"
--- a/storage/remote/chunked.go
+++ b/storage/remote/chunked.go
@ -26,10 +26,6 @@ import (
 	"github.com/gogo/protobuf/proto"
 )

-// DefaultChunkedReadLimit is the default value for the maximum size of the protobuf frame client allows.
-// 50MB is the default. This is equivalent to ~100k full XOR chunks and average labelset.
-const DefaultChunkedReadLimit = 5e+7
-
 // The table gets initialized with sync.Once but may still cause a race
 // with any other use of the crc32 package anywhere. Thus we initialize it
 // before.
--- a/storage/remote/client.go
+++ b/storage/remote/client.go
@ -16,6 +16,7 @@ package remote
 import (
 	"bytes"
 	"context"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@ -36,13 +37,14 @@ import (

 	"github.com/prometheus/prometheus/config"
 	"github.com/prometheus/prometheus/prompb"
+	"github.com/prometheus/prometheus/storage"
 	"github.com/prometheus/prometheus/storage/remote/azuread"
 	"github.com/prometheus/prometheus/storage/remote/googleiam"
 )

-const maxErrMsgLen = 1024
-
 const (
+	maxErrMsgLen = 1024
+
 	RemoteWriteVersionHeader        = "X-Prometheus-Remote-Write-Version"
 	RemoteWriteVersion1HeaderValue  = "0.1.0"
 	RemoteWriteVersion20HeaderValue = "2.0.0"
@ -68,9 +70,12 @@ var (
 		config.RemoteWriteProtoMsgV1: appProtoContentType, // Also application/x-protobuf;proto=prometheus.WriteRequest but simplified for compatibility with 1.x spec.
 		config.RemoteWriteProtoMsgV2: appProtoContentType + ";proto=io.prometheus.write.v2.Request",
 	}
-)

-var (
+	AcceptedResponseTypes = []prompb.ReadRequest_ResponseType{
+		prompb.ReadRequest_STREAMED_XOR_CHUNKS,
+		prompb.ReadRequest_SAMPLES,
+	}
+
 	remoteReadQueriesTotal = prometheus.NewCounterVec(
 		prometheus.CounterOpts{
 			Namespace: namespace,
@ -78,7 +83,7 @@ var (
 			Name:      "read_queries_total",
 			Help:      "The total number of remote read queries.",
 		},
-		[]string{remoteName, endpoint, "code"},
+		[]string{remoteName, endpoint, "response_type", "code"},
 	)
 	remoteReadQueries = prometheus.NewGaugeVec(
 		prometheus.GaugeOpts{
@ -94,13 +99,13 @@ var (
 			Namespace:                       namespace,
 			Subsystem:                       subsystem,
 			Name:                            "read_request_duration_seconds",
-			Help:                            "Histogram of the latency for remote read requests.",
+			Help:                            "Histogram of the latency for remote read requests. Note that for streamed responses this is only the duration of the initial call and does not include the processing of the stream.",
 			Buckets:                         append(prometheus.DefBuckets, 25, 60),
 			NativeHistogramBucketFactor:     1.1,
 			NativeHistogramMaxBucketNumber:  100,
 			NativeHistogramMinResetDuration: 1 * time.Hour,
 		},
-		[]string{remoteName, endpoint},
+		[]string{remoteName, endpoint, "response_type"},
 	)
 )

@ -116,10 +121,11 @@ type Client struct {
 	timeout    time.Duration

 	retryOnRateLimit bool
+	chunkedReadLimit uint64

 	readQueries         prometheus.Gauge
 	readQueriesTotal    *prometheus.CounterVec
-	readQueriesDuration prometheus.Observer
+	readQueriesDuration prometheus.ObserverVec

 	writeProtoMsg    config.RemoteWriteProtoMsg
 	writeCompression Compression // Not exposed by ClientConfig for now.
@ -136,12 +142,13 @@ type ClientConfig struct {
 	Headers          map[string]string
 	RetryOnRateLimit bool
 	WriteProtoMsg    config.RemoteWriteProtoMsg
+	ChunkedReadLimit uint64
 }

-// ReadClient uses the SAMPLES method of remote read to read series samples from remote server.
-// TODO(bwplotka): Add streamed chunked remote read method as well (https://github.com/prometheus/prometheus/issues/5926).
+// ReadClient will request the STREAMED_XOR_CHUNKS method of remote read but can
+// also fall back to the SAMPLES method if necessary.
 type ReadClient interface {
-	Read(ctx context.Context, query *prompb.Query) (*prompb.QueryResult, error)
+	Read(ctx context.Context, query *prompb.Query, sortSeries bool) (storage.SeriesSet, error)
 }

 // NewReadClient creates a new client for remote read.
@ -162,9 +169,10 @@ func NewReadClient(name string, conf *ClientConfig) (ReadClient, error) {
 		urlString:           conf.URL.String(),
 		Client:              httpClient,
 		timeout:             time.Duration(conf.Timeout),
+		chunkedReadLimit:    conf.ChunkedReadLimit,
 		readQueries:         remoteReadQueries.WithLabelValues(name, conf.URL.String()),
 		readQueriesTotal:    remoteReadQueriesTotal.MustCurryWith(prometheus.Labels{remoteName: name, endpoint: conf.URL.String()}),
-		readQueriesDuration: remoteReadQueryDuration.WithLabelValues(name, conf.URL.String()),
+		readQueriesDuration: remoteReadQueryDuration.MustCurryWith(prometheus.Labels{remoteName: name, endpoint: conf.URL.String()}),
 	}, nil
 }

@ -278,8 +286,8 @@ func (c *Client) Store(ctx context.Context, req []byte, attempt int) (WriteRespo
 		return WriteResponseStats{}, RecoverableError{err, defaultBackoff}
 	}
 	defer func() {
-		io.Copy(io.Discard, httpResp.Body)
-		httpResp.Body.Close()
+		_, _ = io.Copy(io.Discard, httpResp.Body)
+		_ = httpResp.Body.Close()
 	}()

 	// TODO(bwplotka): Pass logger and emit debug on error?
@ -329,17 +337,17 @@ func (c *Client) Endpoint() string {
 	return c.urlString
 }

-// Read reads from a remote endpoint.
-func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryResult, error) {
+// Read reads from a remote endpoint. The sortSeries parameter is only respected in the case of a sampled response;
+// chunked responses arrive already sorted by the server.
+func (c *Client) Read(ctx context.Context, query *prompb.Query, sortSeries bool) (storage.SeriesSet, error) {
 	c.readQueries.Inc()
 	defer c.readQueries.Dec()

 	req := &prompb.ReadRequest{
 		// TODO: Support batching multiple queries into one read request,
 		// as the protobuf interface allows for it.
-		Queries: []*prompb.Query{
-			query,
-		},
+		Queries:               []*prompb.Query{query},
+		AcceptedResponseTypes: AcceptedResponseTypes,
 	}
 	data, err := proto.Marshal(req)
 	if err != nil {
@ -358,7 +366,6 @@ func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryRe
 	httpReq.Header.Set("X-Prometheus-Remote-Read-Version", "0.1.0")

 	ctx, cancel := context.WithTimeout(ctx, c.timeout)
-	defer cancel()

 	ctx, span := otel.Tracer("").Start(ctx, "Remote Read", trace.WithSpanKind(trace.SpanKindClient))
 	defer span.End()
@ -366,24 +373,58 @@ func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryRe
 	start := time.Now()
 	httpResp, err := c.Client.Do(httpReq.WithContext(ctx))
 	if err != nil {
+		cancel()
 		return nil, fmt.Errorf("error sending request: %w", err)
 	}
-	defer func() {
-		io.Copy(io.Discard, httpResp.Body)
-		httpResp.Body.Close()
-	}()
-	c.readQueriesDuration.Observe(time.Since(start).Seconds())
-	c.readQueriesTotal.WithLabelValues(strconv.Itoa(httpResp.StatusCode)).Inc()
-
-	compressed, err = io.ReadAll(httpResp.Body)
-	if err != nil {
-		return nil, fmt.Errorf("error reading response. HTTP status code: %s: %w", httpResp.Status, err)
-	}

 	if httpResp.StatusCode/100 != 2 {
-		return nil, fmt.Errorf("remote server %s returned HTTP status %s: %s", c.urlString, httpResp.Status, strings.TrimSpace(string(compressed)))
+		// Make an attempt at getting an error message.
+		body, _ := io.ReadAll(httpResp.Body)
+		_ = httpResp.Body.Close()
+
+		cancel()
+		return nil, fmt.Errorf("remote server %s returned http status %s: %s", c.urlString, httpResp.Status, string(body))
 	}

+	contentType := httpResp.Header.Get("Content-Type")
+
+	switch {
+	case strings.HasPrefix(contentType, "application/x-protobuf"):
+		c.readQueriesDuration.WithLabelValues("sampled").Observe(time.Since(start).Seconds())
+		c.readQueriesTotal.WithLabelValues("sampled", strconv.Itoa(httpResp.StatusCode)).Inc()
+		ss, err := c.handleSampledResponse(req, httpResp, sortSeries)
+		cancel()
+		return ss, err
+	case strings.HasPrefix(contentType, "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse"):
+		c.readQueriesDuration.WithLabelValues("chunked").Observe(time.Since(start).Seconds())
+
+		s := NewChunkedReader(httpResp.Body, c.chunkedReadLimit, nil)
+		return NewChunkedSeriesSet(s, httpResp.Body, query.StartTimestampMs, query.EndTimestampMs, func(err error) {
+			code := strconv.Itoa(httpResp.StatusCode)
+			if !errors.Is(err, io.EOF) {
+				code = "aborted_stream"
+			}
+			c.readQueriesTotal.WithLabelValues("chunked", code).Inc()
+			cancel()
+		}), nil
+	default:
+		c.readQueriesDuration.WithLabelValues("unsupported").Observe(time.Since(start).Seconds())
+		c.readQueriesTotal.WithLabelValues("unsupported", strconv.Itoa(httpResp.StatusCode)).Inc()
+		cancel()
+		return nil, fmt.Errorf("unsupported content type: %s", contentType)
+	}
+}
+
+func (c *Client) handleSampledResponse(req *prompb.ReadRequest, httpResp *http.Response, sortSeries bool) (storage.SeriesSet, error) {
+	compressed, err := io.ReadAll(httpResp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("error reading response. HTTP status code: %s: %w", httpResp.Status, err)
+	}
+	defer func() {
+		_, _ = io.Copy(io.Discard, httpResp.Body)
+		_ = httpResp.Body.Close()
+	}()
+
 	uncompressed, err := snappy.Decode(nil, compressed)
 	if err != nil {
 		return nil, fmt.Errorf("error reading response: %w", err)
@ -399,5 +440,8 @@ func (c *Client) Read(ctx context.Context, query *prompb.Query) (*prompb.QueryRe
 		return nil, fmt.Errorf("responses: want %d, got %d", len(req.Queries), len(resp.Results))
 	}

-	return resp.Results[0], nil
+	// This client does not batch queries so there's always only 1 result.
+	res := resp.Results[0]
+
+	return FromQueryResult(sortSeries, res), nil
 }
--- a/storage/remote/client_test.go
+++ b/storage/remote/client_test.go
@ -23,9 +23,15 @@ import (
 	"testing"
 	"time"

+	"github.com/gogo/protobuf/proto"
+	"github.com/golang/snappy"
 	config_util "github.com/prometheus/common/config"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
+
+	"github.com/prometheus/prometheus/config"
+	"github.com/prometheus/prometheus/prompb"
+	"github.com/prometheus/prometheus/tsdb/chunkenc"
 )

 var longErrMessage = strings.Repeat("error message", maxErrMsgLen)
@ -208,3 +214,226 @@ func TestClientCustomHeaders(t *testing.T) {

 	require.True(t, called, "The remote server wasn't called")
 }
+
+func TestReadClient(t *testing.T) {
+	tests := []struct {
+		name                  string
+		query                 *prompb.Query
+		httpHandler           http.HandlerFunc
+		expectedLabels        []map[string]string
+		expectedSamples       [][]model.SamplePair
+		expectedErrorContains string
+		sortSeries            bool
+	}{
+		{
+			name:        "sorted sampled response",
+			httpHandler: sampledResponseHTTPHandler(t),
+			expectedLabels: []map[string]string{
+				{"foo1": "bar"},
+				{"foo2": "bar"},
+			},
+			expectedSamples: [][]model.SamplePair{
+				{
+					{Timestamp: model.Time(0), Value: model.SampleValue(3)},
+					{Timestamp: model.Time(5), Value: model.SampleValue(4)},
+				},
+				{
+					{Timestamp: model.Time(0), Value: model.SampleValue(1)},
+					{Timestamp: model.Time(5), Value: model.SampleValue(2)},
+				},
+			},
+			expectedErrorContains: "",
+			sortSeries:            true,
+		},
+		{
+			name:        "unsorted sampled response",
+			httpHandler: sampledResponseHTTPHandler(t),
+			expectedLabels: []map[string]string{
+				{"foo2": "bar"},
+				{"foo1": "bar"},
+			},
+			expectedSamples: [][]model.SamplePair{
+				{
+					{Timestamp: model.Time(0), Value: model.SampleValue(1)},
+					{Timestamp: model.Time(5), Value: model.SampleValue(2)},
+				},
+				{
+					{Timestamp: model.Time(0), Value: model.SampleValue(3)},
+					{Timestamp: model.Time(5), Value: model.SampleValue(4)},
+				},
+			},
+			expectedErrorContains: "",
+			sortSeries:            false,
+		},
+		{
+			name: "chunked response",
+			query: &prompb.Query{
+				StartTimestampMs: 4000,
+				EndTimestampMs:   12000,
+			},
+			httpHandler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+				w.Header().Set("Content-Type", "application/x-streamed-protobuf; proto=prometheus.ChunkedReadResponse")
+
+				flusher, ok := w.(http.Flusher)
+				require.True(t, ok)
+
+				cw := NewChunkedWriter(w, flusher)
+				l := []prompb.Label{
+					{Name: "foo", Value: "bar"},
+				}
+
+				chunks := buildTestChunks(t)
+				for i, c := range chunks {
+					cSeries := prompb.ChunkedSeries{Labels: l, Chunks: []prompb.Chunk{c}}
+					readResp := prompb.ChunkedReadResponse{
+						ChunkedSeries: []*prompb.ChunkedSeries{&cSeries},
+						QueryIndex:    int64(i),
+					}
+
+					b, err := proto.Marshal(&readResp)
+					require.NoError(t, err)
+
+					_, err = cw.Write(b)
+					require.NoError(t, err)
+				}
+			}),
+			expectedLabels: []map[string]string{
+				{"foo": "bar"},
+				{"foo": "bar"},
+				{"foo": "bar"},
+			},
+			// This is the output of buildTestChunks minus the samples outside the query range.
+			expectedSamples: [][]model.SamplePair{
+				{
+					{Timestamp: model.Time(4000), Value: model.SampleValue(4)},
+				},
+				{
+					{Timestamp: model.Time(5000), Value: model.SampleValue(1)},
+					{Timestamp: model.Time(6000), Value: model.SampleValue(2)},
+					{Timestamp: model.Time(7000), Value: model.SampleValue(3)},
+					{Timestamp: model.Time(8000), Value: model.SampleValue(4)},
+					{Timestamp: model.Time(9000), Value: model.SampleValue(5)},
+				},
+				{
+					{Timestamp: model.Time(10000), Value: model.SampleValue(2)},
+					{Timestamp: model.Time(11000), Value: model.SampleValue(3)},
+					{Timestamp: model.Time(12000), Value: model.SampleValue(4)},
+				},
+			},
+			expectedErrorContains: "",
+		},
+		{
+			name: "unsupported content type",
+			httpHandler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+				w.Header().Set("Content-Type", "foobar")
+			}),
+			expectedErrorContains: "unsupported content type",
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			server := httptest.NewServer(test.httpHandler)
+			defer server.Close()
+
+			u, err := url.Parse(server.URL)
+			require.NoError(t, err)
+
+			conf := &ClientConfig{
+				URL:              &config_util.URL{URL: u},
+				Timeout:          model.Duration(5 * time.Second),
+				ChunkedReadLimit: config.DefaultChunkedReadLimit,
+			}
+			c, err := NewReadClient("test", conf)
+			require.NoError(t, err)
+
+			query := &prompb.Query{}
+			if test.query != nil {
+				query = test.query
+			}
+
+			ss, err := c.Read(context.Background(), query, test.sortSeries)
+			if test.expectedErrorContains != "" {
+				require.ErrorContains(t, err, test.expectedErrorContains)
+				return
+			}
+
+			require.NoError(t, err)
+
+			i := 0
+
+			for ss.Next() {
+				require.NoError(t, ss.Err())
+				s := ss.At()
+
+				l := s.Labels()
+				require.Len(t, test.expectedLabels[i], l.Len())
+				for k, v := range test.expectedLabels[i] {
+					require.True(t, l.Has(k))
+					require.Equal(t, v, l.Get(k))
+				}
+
+				it := s.Iterator(nil)
+				j := 0
+
+				for valType := it.Next(); valType != chunkenc.ValNone; valType = it.Next() {
+					require.NoError(t, it.Err())
+
+					ts, v := it.At()
+					expectedSample := test.expectedSamples[i][j]
+
+					require.Equal(t, int64(expectedSample.Timestamp), ts)
+					require.Equal(t, float64(expectedSample.Value), v)
+
+					j++
+				}
+
+				require.Len(t, test.expectedSamples[i], j)
+
+				i++
+			}
+
+			require.NoError(t, ss.Err())
+		})
+	}
+}
+
+func sampledResponseHTTPHandler(t *testing.T) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/x-protobuf")
+
+		resp := prompb.ReadResponse{
+			Results: []*prompb.QueryResult{
+				{
+					Timeseries: []*prompb.TimeSeries{
+						{
+							Labels: []prompb.Label{
+								{Name: "foo2", Value: "bar"},
+							},
+							Samples: []prompb.Sample{
+								{Value: float64(1), Timestamp: int64(0)},
+								{Value: float64(2), Timestamp: int64(5)},
+							},
+							Exemplars: []prompb.Exemplar{},
+						},
+						{
+							Labels: []prompb.Label{
+								{Name: "foo1", Value: "bar"},
+							},
+							Samples: []prompb.Sample{
+								{Value: float64(3), Timestamp: int64(0)},
+								{Value: float64(4), Timestamp: int64(5)},
+							},
+							Exemplars: []prompb.Exemplar{},
+						},
+					},
+				},
+			},
+		}
+		b, err := proto.Marshal(&resp)
+		require.NoError(t, err)
+
+		_, err = w.Write(snappy.Encode(nil, b))
+		require.NoError(t, err)
+	}
+}
--- a/storage/remote/codec.go
+++ b/storage/remote/codec.go
@ -540,6 +540,220 @@ func (c *concreteSeriesIterator) Err() error {
 	return nil
 }

+// chunkedSeriesSet implements storage.SeriesSet.
+type chunkedSeriesSet struct {
+	chunkedReader *ChunkedReader
+	respBody      io.ReadCloser
+	mint, maxt    int64
+	cancel        func(error)
+
+	current storage.Series
+	err     error
+}
+
+func NewChunkedSeriesSet(chunkedReader *ChunkedReader, respBody io.ReadCloser, mint, maxt int64, cancel func(error)) storage.SeriesSet {
+	return &chunkedSeriesSet{
+		chunkedReader: chunkedReader,
+		respBody:      respBody,
+		mint:          mint,
+		maxt:          maxt,
+		cancel:        cancel,
+	}
+}
+
+// Next return true if there is a next series and false otherwise. It will
+// block until the next series is available.
+func (s *chunkedSeriesSet) Next() bool {
+	res := &prompb.ChunkedReadResponse{}
+
+	err := s.chunkedReader.NextProto(res)
+	if err != nil {
+		if !errors.Is(err, io.EOF) {
+			s.err = err
+			_, _ = io.Copy(io.Discard, s.respBody)
+		}
+
+		_ = s.respBody.Close()
+		s.cancel(err)
+
+		return false
+	}
+
+	s.current = &chunkedSeries{
+		ChunkedSeries: prompb.ChunkedSeries{
+			Labels: res.ChunkedSeries[0].Labels,
+			Chunks: res.ChunkedSeries[0].Chunks,
+		},
+		mint: s.mint,
+		maxt: s.maxt,
+	}
+
+	return true
+}
+
+func (s *chunkedSeriesSet) At() storage.Series {
+	return s.current
+}
+
+func (s *chunkedSeriesSet) Err() error {
+	return s.err
+}
+
+func (s *chunkedSeriesSet) Warnings() annotations.Annotations {
+	return nil
+}
+
+type chunkedSeries struct {
+	prompb.ChunkedSeries
+	mint, maxt int64
+}
+
+var _ storage.Series = &chunkedSeries{}
+
+func (s *chunkedSeries) Labels() labels.Labels {
+	b := labels.NewScratchBuilder(0)
+	return s.ToLabels(&b, nil)
+}
+
+func (s *chunkedSeries) Iterator(it chunkenc.Iterator) chunkenc.Iterator {
+	csIt, ok := it.(*chunkedSeriesIterator)
+	if ok {
+		csIt.reset(s.Chunks, s.mint, s.maxt)
+		return csIt
+	}
+	return newChunkedSeriesIterator(s.Chunks, s.mint, s.maxt)
+}
+
+type chunkedSeriesIterator struct {
+	chunks     []prompb.Chunk
+	idx        int
+	cur        chunkenc.Iterator
+	valType    chunkenc.ValueType
+	mint, maxt int64
+
+	err error
+}
+
+var _ chunkenc.Iterator = &chunkedSeriesIterator{}
+
+func newChunkedSeriesIterator(chunks []prompb.Chunk, mint, maxt int64) *chunkedSeriesIterator {
+	it := &chunkedSeriesIterator{}
+	it.reset(chunks, mint, maxt)
+	return it
+}
+
+func (it *chunkedSeriesIterator) Next() chunkenc.ValueType {
+	if it.err != nil {
+		return chunkenc.ValNone
+	}
+	if len(it.chunks) == 0 {
+		return chunkenc.ValNone
+	}
+
+	for it.valType = it.cur.Next(); it.valType != chunkenc.ValNone; it.valType = it.cur.Next() {
+		atT := it.AtT()
+		if atT > it.maxt {
+			it.chunks = nil // Exhaust this iterator so follow-up calls to Next or Seek return fast.
+			return chunkenc.ValNone
+		}
+		if atT >= it.mint {
+			return it.valType
+		}
+	}
+
+	if it.idx >= len(it.chunks)-1 {
+		it.valType = chunkenc.ValNone
+	} else {
+		it.idx++
+		it.resetIterator()
+		it.valType = it.Next()
+	}
+
+	return it.valType
+}
+
+func (it *chunkedSeriesIterator) Seek(t int64) chunkenc.ValueType {
+	if it.err != nil {
+		return chunkenc.ValNone
+	}
+	if len(it.chunks) == 0 {
+		return chunkenc.ValNone
+	}
+
+	startIdx := it.idx
+	it.idx += sort.Search(len(it.chunks)-startIdx, func(i int) bool {
+		return it.chunks[startIdx+i].MaxTimeMs >= t
+	})
+	if it.idx > startIdx {
+		it.resetIterator()
+	} else {
+		ts := it.cur.AtT()
+		if ts >= t {
+			return it.valType
+		}
+	}
+
+	for it.valType = it.cur.Next(); it.valType != chunkenc.ValNone; it.valType = it.cur.Next() {
+		ts := it.cur.AtT()
+		if ts > it.maxt {
+			it.chunks = nil // Exhaust this iterator so follow-up calls to Next or Seek return fast.
+			return chunkenc.ValNone
+		}
+		if ts >= t && ts >= it.mint {
+			return it.valType
+		}
+	}
+
+	it.valType = chunkenc.ValNone
+	return it.valType
+}
+
+func (it *chunkedSeriesIterator) resetIterator() {
+	if it.idx < len(it.chunks) {
+		chunk := it.chunks[it.idx]
+
+		decodedChunk, err := chunkenc.FromData(chunkenc.Encoding(chunk.Type), chunk.Data)
+		if err != nil {
+			it.err = err
+			return
+		}
+
+		it.cur = decodedChunk.Iterator(nil)
+	} else {
+		it.cur = chunkenc.NewNopIterator()
+	}
+}
+
+func (it *chunkedSeriesIterator) reset(chunks []prompb.Chunk, mint, maxt int64) {
+	it.chunks = chunks
+	it.mint = mint
+	it.maxt = maxt
+	it.idx = 0
+	if len(chunks) > 0 {
+		it.resetIterator()
+	}
+}
+
+func (it *chunkedSeriesIterator) At() (ts int64, v float64) {
+	return it.cur.At()
+}
+
+func (it *chunkedSeriesIterator) AtHistogram(h *histogram.Histogram) (int64, *histogram.Histogram) {
+	return it.cur.AtHistogram(h)
+}
+
+func (it *chunkedSeriesIterator) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) {
+	return it.cur.AtFloatHistogram(fh)
+}
+
+func (it *chunkedSeriesIterator) AtT() int64 {
+	return it.cur.AtT()
+}
+
+func (it *chunkedSeriesIterator) Err() error {
+	return it.err
+}
+
 // validateLabelsAndMetricName validates the label names/values and metric names returned from remote read,
 // also making sure that there are no labels with duplicate names.
 func validateLabelsAndMetricName(ls []prompb.Label) error {
@ -612,15 +826,6 @@ func FromLabelMatchers(matchers []*prompb.LabelMatcher) ([]*labels.Matcher, erro
 	return result, nil
 }

-// LabelProtosToMetric unpack a []*prompb.Label to a model.Metric.
-func LabelProtosToMetric(labelPairs []*prompb.Label) model.Metric {
-	metric := make(model.Metric, len(labelPairs))
-	for _, l := range labelPairs {
-		metric[model.LabelName(l.Name)] = model.LabelValue(l.Value)
-	}
-	return metric
-}
-
 // DecodeWriteRequest from an io.Reader into a prompb.WriteRequest, handling
 // snappy decompression.
 // Used also by documentation/examples/remote_storage.
--- a/storage/remote/codec_test.go
+++ b/storage/remote/codec_test.go
@ -16,6 +16,7 @@ package remote
 import (
 	"bytes"
 	"fmt"
+	"io"
 	"sync"
 	"testing"

@ -24,6 +25,7 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"

+	"github.com/prometheus/prometheus/config"
 	"github.com/prometheus/prometheus/model/histogram"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/model/metadata"
@ -705,3 +707,270 @@ func (c *mockChunkIterator) Next() bool {
 func (c *mockChunkIterator) Err() error {
 	return nil
 }
+
+func TestChunkedSeriesIterator(t *testing.T) {
+	t.Run("happy path", func(t *testing.T) {
+		chks := buildTestChunks(t)
+
+		it := newChunkedSeriesIterator(chks, 2000, 12000)
+
+		require.NoError(t, it.err)
+		require.NotNil(t, it.cur)
+
+		// Initial next; advance to first valid sample of first chunk.
+		res := it.Next()
+		require.Equal(t, chunkenc.ValFloat, res)
+		require.NoError(t, it.Err())
+
+		ts, v := it.At()
+		require.Equal(t, int64(2000), ts)
+		require.Equal(t, float64(2), v)
+
+		// Next to the second sample of the first chunk.
+		res = it.Next()
+		require.Equal(t, chunkenc.ValFloat, res)
+		require.NoError(t, it.Err())
+
+		ts, v = it.At()
+		require.Equal(t, int64(3000), ts)
+		require.Equal(t, float64(3), v)
+
+		// Attempt to seek to the first sample of the first chunk (should return current sample).
+		res = it.Seek(0)
+		require.Equal(t, chunkenc.ValFloat, res)
+
+		ts, v = it.At()
+		require.Equal(t, int64(3000), ts)
+		require.Equal(t, float64(3), v)
+
+		// Seek to the end of the first chunk.
+		res = it.Seek(4000)
+		require.Equal(t, chunkenc.ValFloat, res)
+
+		ts, v = it.At()
+		require.Equal(t, int64(4000), ts)
+		require.Equal(t, float64(4), v)
+
+		// Next to the first sample of the second chunk.
+		res = it.Next()
+		require.Equal(t, chunkenc.ValFloat, res)
+		require.NoError(t, it.Err())
+
+		ts, v = it.At()
+		require.Equal(t, int64(5000), ts)
+		require.Equal(t, float64(1), v)
+
+		// Seek to the second sample of the third chunk.
+		res = it.Seek(10999)
+		require.Equal(t, chunkenc.ValFloat, res)
+		require.NoError(t, it.Err())
+
+		ts, v = it.At()
+		require.Equal(t, int64(11000), ts)
+		require.Equal(t, float64(3), v)
+
+		// Attempt to seek to something past the last sample (should return false and exhaust the iterator).
+		res = it.Seek(99999)
+		require.Equal(t, chunkenc.ValNone, res)
+		require.NoError(t, it.Err())
+
+		// Attempt to next past the last sample (should return false as the iterator is exhausted).
+		res = it.Next()
+		require.Equal(t, chunkenc.ValNone, res)
+		require.NoError(t, it.Err())
+	})
+
+	t.Run("invalid chunk encoding error", func(t *testing.T) {
+		chks := buildTestChunks(t)
+
+		// Set chunk type to an invalid value.
+		chks[0].Type = 8
+
+		it := newChunkedSeriesIterator(chks, 0, 14000)
+
+		res := it.Next()
+		require.Equal(t, chunkenc.ValNone, res)
+
+		res = it.Seek(1000)
+		require.Equal(t, chunkenc.ValNone, res)
+
+		require.ErrorContains(t, it.err, "invalid chunk encoding")
+		require.Nil(t, it.cur)
+	})
+
+	t.Run("empty chunks", func(t *testing.T) {
+		emptyChunks := make([]prompb.Chunk, 0)
+
+		it1 := newChunkedSeriesIterator(emptyChunks, 0, 1000)
+		require.Equal(t, chunkenc.ValNone, it1.Next())
+		require.Equal(t, chunkenc.ValNone, it1.Seek(1000))
+		require.NoError(t, it1.Err())
+
+		var nilChunks []prompb.Chunk
+
+		it2 := newChunkedSeriesIterator(nilChunks, 0, 1000)
+		require.Equal(t, chunkenc.ValNone, it2.Next())
+		require.Equal(t, chunkenc.ValNone, it2.Seek(1000))
+		require.NoError(t, it2.Err())
+	})
+}
+
+func TestChunkedSeries(t *testing.T) {
+	t.Run("happy path", func(t *testing.T) {
+		chks := buildTestChunks(t)
+
+		s := chunkedSeries{
+			ChunkedSeries: prompb.ChunkedSeries{
+				Labels: []prompb.Label{
+					{Name: "foo", Value: "bar"},
+					{Name: "asdf", Value: "zxcv"},
+				},
+				Chunks: chks,
+			},
+		}
+
+		require.Equal(t, labels.FromStrings("asdf", "zxcv", "foo", "bar"), s.Labels())
+
+		it := s.Iterator(nil)
+		res := it.Next() // Behavior is undefined w/o the initial call to Next.
+
+		require.Equal(t, chunkenc.ValFloat, res)
+		require.NoError(t, it.Err())
+
+		ts, v := it.At()
+		require.Equal(t, int64(0), ts)
+		require.Equal(t, float64(0), v)
+	})
+}
+
+func TestChunkedSeriesSet(t *testing.T) {
+	t.Run("happy path", func(t *testing.T) {
+		buf := &bytes.Buffer{}
+		flusher := &mockFlusher{}
+
+		w := NewChunkedWriter(buf, flusher)
+		r := NewChunkedReader(buf, config.DefaultChunkedReadLimit, nil)
+
+		chks := buildTestChunks(t)
+		l := []prompb.Label{
+			{Name: "foo", Value: "bar"},
+		}
+
+		for i, c := range chks {
+			cSeries := prompb.ChunkedSeries{Labels: l, Chunks: []prompb.Chunk{c}}
+			readResp := prompb.ChunkedReadResponse{
+				ChunkedSeries: []*prompb.ChunkedSeries{&cSeries},
+				QueryIndex:    int64(i),
+			}
+
+			b, err := proto.Marshal(&readResp)
+			require.NoError(t, err)
+
+			_, err = w.Write(b)
+			require.NoError(t, err)
+		}
+
+		ss := NewChunkedSeriesSet(r, io.NopCloser(buf), 0, 14000, func(error) {})
+		require.NoError(t, ss.Err())
+		require.Nil(t, ss.Warnings())
+
+		res := ss.Next()
+		require.True(t, res)
+		require.NoError(t, ss.Err())
+
+		s := ss.At()
+		require.Equal(t, 1, s.Labels().Len())
+		require.True(t, s.Labels().Has("foo"))
+		require.Equal(t, "bar", s.Labels().Get("foo"))
+
+		it := s.Iterator(nil)
+		it.Next()
+		ts, v := it.At()
+		require.Equal(t, int64(0), ts)
+		require.Equal(t, float64(0), v)
+
+		numResponses := 1
+		for ss.Next() {
+			numResponses++
+		}
+		require.Equal(t, numTestChunks, numResponses)
+		require.NoError(t, ss.Err())
+	})
+
+	t.Run("chunked reader error", func(t *testing.T) {
+		buf := &bytes.Buffer{}
+		flusher := &mockFlusher{}
+
+		w := NewChunkedWriter(buf, flusher)
+		r := NewChunkedReader(buf, config.DefaultChunkedReadLimit, nil)
+
+		chks := buildTestChunks(t)
+		l := []prompb.Label{
+			{Name: "foo", Value: "bar"},
+		}
+
+		for i, c := range chks {
+			cSeries := prompb.ChunkedSeries{Labels: l, Chunks: []prompb.Chunk{c}}
+			readResp := prompb.ChunkedReadResponse{
+				ChunkedSeries: []*prompb.ChunkedSeries{&cSeries},
+				QueryIndex:    int64(i),
+			}
+
+			b, err := proto.Marshal(&readResp)
+			require.NoError(t, err)
+
+			b[0] = 0xFF // Corruption!
+
+			_, err = w.Write(b)
+			require.NoError(t, err)
+		}
+
+		ss := NewChunkedSeriesSet(r, io.NopCloser(buf), 0, 14000, func(error) {})
+		require.NoError(t, ss.Err())
+		require.Nil(t, ss.Warnings())
+
+		res := ss.Next()
+		require.False(t, res)
+		require.ErrorContains(t, ss.Err(), "proto: illegal wireType 7")
+	})
+}
+
+// mockFlusher implements http.Flusher.
+type mockFlusher struct{}
+
+func (f *mockFlusher) Flush() {}
+
+const (
+	numTestChunks          = 3
+	numSamplesPerTestChunk = 5
+)
+
+func buildTestChunks(t *testing.T) []prompb.Chunk {
+	startTime := int64(0)
+	chks := make([]prompb.Chunk, 0, numTestChunks)
+
+	time := startTime
+
+	for i := 0; i < numTestChunks; i++ {
+		c := chunkenc.NewXORChunk()
+
+		a, err := c.Appender()
+		require.NoError(t, err)
+
+		minTimeMs := time
+
+		for j := 0; j < numSamplesPerTestChunk; j++ {
+			a.Append(time, float64(i+j))
+			time += int64(1000)
+		}
+
+		chks = append(chks, prompb.Chunk{
+			MinTimeMs: minTimeMs,
+			MaxTimeMs: time,
+			Type:      prompb.Chunk_XOR,
+			Data:      c.Bytes(),
+		})
+	}
+
+	return chks
+}
--- a/storage/remote/otlptranslator/prometheusremotewrite/helper.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/helper.go
@ -24,7 +24,6 @@ import (
 	"slices"
 	"sort"
 	"strconv"
-	"time"
 	"unicode/utf8"

 	"github.com/cespare/xxhash/v2"
@ -594,5 +593,5 @@ func addResourceTargetInfo(resource pcommon.Resource, settings Settings, timesta

 // convertTimeStamp converts OTLP timestamp in ns to timestamp in ms
 func convertTimeStamp(timestamp pcommon.Timestamp) int64 {
-	return timestamp.AsTime().UnixNano() / (int64(time.Millisecond) / int64(time.Nanosecond))
+	return int64(timestamp) / 1_000_000
 }
--- a/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/helper_test.go
@ -10,13 +10,21 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/debbf30360b8d3a0ded8db09c4419d2a9c99b94a/pkg/translator/prometheusremotewrite/helper_test.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: Copyright The OpenTelemetry Authors.
+
 package prometheusremotewrite

 import (
 	"testing"
+	"time"

 	"github.com/stretchr/testify/assert"
 	"go.opentelemetry.io/collector/pdata/pcommon"
+	"go.opentelemetry.io/collector/pdata/pmetric"
+
+	"github.com/prometheus/common/model"

 	"github.com/prometheus/prometheus/prompb"
 )
@ -159,3 +167,239 @@ func TestCreateAttributes(t *testing.T) {
 		})
 	}
 }
+
+func Test_convertTimeStamp(t *testing.T) {
+	tests := []struct {
+		name string
+		arg  pcommon.Timestamp
+		want int64
+	}{
+		{"zero", 0, 0},
+		{"1ms", 1_000_000, 1},
+		{"1s", pcommon.Timestamp(time.Unix(1, 0).UnixNano()), 1000},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := convertTimeStamp(tt.arg)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestPrometheusConverter_AddSummaryDataPoints(t *testing.T) {
+	ts := pcommon.Timestamp(time.Now().UnixNano())
+	tests := []struct {
+		name   string
+		metric func() pmetric.Metric
+		want   func() map[uint64]*prompb.TimeSeries
+	}{
+		{
+			name: "summary with start time",
+			metric: func() pmetric.Metric {
+				metric := pmetric.NewMetric()
+				metric.SetName("test_summary")
+				metric.SetEmptySummary()
+
+				dp := metric.Summary().DataPoints().AppendEmpty()
+				dp.SetTimestamp(ts)
+				dp.SetStartTimestamp(ts)
+
+				return metric
+			},
+			want: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_summary" + countStr},
+				}
+				createdLabels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_summary" + createdSuffix},
+				}
+				sumLabels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_summary" + sumStr},
+				}
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Samples: []prompb.Sample{
+							{Value: 0, Timestamp: convertTimeStamp(ts)},
+						},
+					},
+					timeSeriesSignature(sumLabels): {
+						Labels: sumLabels,
+						Samples: []prompb.Sample{
+							{Value: 0, Timestamp: convertTimeStamp(ts)},
+						},
+					},
+					timeSeriesSignature(createdLabels): {
+						Labels: createdLabels,
+						Samples: []prompb.Sample{
+							{Value: float64(convertTimeStamp(ts)), Timestamp: convertTimeStamp(ts)},
+						},
+					},
+				}
+			},
+		},
+		{
+			name: "summary without start time",
+			metric: func() pmetric.Metric {
+				metric := pmetric.NewMetric()
+				metric.SetName("test_summary")
+				metric.SetEmptySummary()
+
+				dp := metric.Summary().DataPoints().AppendEmpty()
+				dp.SetTimestamp(ts)
+
+				return metric
+			},
+			want: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_summary" + countStr},
+				}
+				sumLabels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_summary" + sumStr},
+				}
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Samples: []prompb.Sample{
+							{Value: 0, Timestamp: convertTimeStamp(ts)},
+						},
+					},
+					timeSeriesSignature(sumLabels): {
+						Labels: sumLabels,
+						Samples: []prompb.Sample{
+							{Value: 0, Timestamp: convertTimeStamp(ts)},
+						},
+					},
+				}
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			metric := tt.metric()
+			converter := NewPrometheusConverter()
+
+			converter.addSummaryDataPoints(
+				metric.Summary().DataPoints(),
+				pcommon.NewResource(),
+				Settings{
+					ExportCreatedMetric: true,
+				},
+				metric.Name(),
+			)
+
+			assert.Equal(t, tt.want(), converter.unique)
+			assert.Empty(t, converter.conflicts)
+		})
+	}
+}
+
+func TestPrometheusConverter_AddHistogramDataPoints(t *testing.T) {
+	ts := pcommon.Timestamp(time.Now().UnixNano())
+	tests := []struct {
+		name   string
+		metric func() pmetric.Metric
+		want   func() map[uint64]*prompb.TimeSeries
+	}{
+		{
+			name: "histogram with start time",
+			metric: func() pmetric.Metric {
+				metric := pmetric.NewMetric()
+				metric.SetName("test_hist")
+				metric.SetEmptyHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
+
+				pt := metric.Histogram().DataPoints().AppendEmpty()
+				pt.SetTimestamp(ts)
+				pt.SetStartTimestamp(ts)
+
+				return metric
+			},
+			want: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_hist" + countStr},
+				}
+				createdLabels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_hist" + createdSuffix},
+				}
+				infLabels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_hist_bucket"},
+					{Name: model.BucketLabel, Value: "+Inf"},
+				}
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(infLabels): {
+						Labels: infLabels,
+						Samples: []prompb.Sample{
+							{Value: 0, Timestamp: convertTimeStamp(ts)},
+						},
+					},
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Samples: []prompb.Sample{
+							{Value: 0, Timestamp: convertTimeStamp(ts)},
+						},
+					},
+					timeSeriesSignature(createdLabels): {
+						Labels: createdLabels,
+						Samples: []prompb.Sample{
+							{Value: float64(convertTimeStamp(ts)), Timestamp: convertTimeStamp(ts)},
+						},
+					},
+				}
+			},
+		},
+		{
+			name: "histogram without start time",
+			metric: func() pmetric.Metric {
+				metric := pmetric.NewMetric()
+				metric.SetName("test_hist")
+				metric.SetEmptyHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
+
+				pt := metric.Histogram().DataPoints().AppendEmpty()
+				pt.SetTimestamp(ts)
+
+				return metric
+			},
+			want: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_hist" + countStr},
+				}
+				infLabels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_hist_bucket"},
+					{Name: model.BucketLabel, Value: "+Inf"},
+				}
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(infLabels): {
+						Labels: infLabels,
+						Samples: []prompb.Sample{
+							{Value: 0, Timestamp: convertTimeStamp(ts)},
+						},
+					},
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Samples: []prompb.Sample{
+							{Value: 0, Timestamp: convertTimeStamp(ts)},
+						},
+					},
+				}
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			metric := tt.metric()
+			converter := NewPrometheusConverter()
+
+			converter.addHistogramDataPoints(
+				metric.Histogram().DataPoints(),
+				pcommon.NewResource(),
+				Settings{
+					ExportCreatedMetric: true,
+				},
+				metric.Name(),
+			)
+
+			assert.Equal(t, tt.want(), converter.unique)
+			assert.Empty(t, converter.conflicts)
+		})
+	}
+}
--- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go
@ -26,6 +26,7 @@ import (

 	"github.com/prometheus/prometheus/model/value"
 	"github.com/prometheus/prometheus/prompb"
+	"github.com/prometheus/prometheus/util/annotations"
 )

 const defaultZeroThreshold = 1e-128
@ -33,13 +34,15 @@ const defaultZeroThreshold = 1e-128
 // addExponentialHistogramDataPoints adds OTel exponential histogram data points to the corresponding time series
 // as native histogram samples.
 func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice,
-	resource pcommon.Resource, settings Settings, promName string) error {
+	resource pcommon.Resource, settings Settings, promName string) (annotations.Annotations, error) {
+	var annots annotations.Annotations
 	for x := 0; x < dataPoints.Len(); x++ {
 		pt := dataPoints.At(x)

-		histogram, err := exponentialToNativeHistogram(pt)
+		histogram, ws, err := exponentialToNativeHistogram(pt)
+		annots.Merge(ws)
 		if err != nil {
-			return err
+			return annots, err
 		}

 		lbls := createAttributes(
@ -58,15 +61,16 @@ func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetr
 		ts.Exemplars = append(ts.Exemplars, exemplars...)
 	}

-	return nil
+	return annots, nil
 }

 // exponentialToNativeHistogram translates OTel Exponential Histogram data point
 // to Prometheus Native Histogram.
-func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, error) {
+func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, annotations.Annotations, error) {
+	var annots annotations.Annotations
 	scale := p.Scale()
 	if scale < -4 {
-		return prompb.Histogram{},
+		return prompb.Histogram{}, annots,
 			fmt.Errorf("cannot convert exponential to native histogram."+
 				" Scale must be >= -4, was %d", scale)
 	}
@ -114,8 +118,11 @@ func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prom
 			h.Sum = p.Sum()
 		}
 		h.Count = &prompb.Histogram_CountInt{CountInt: p.Count()}
+		if p.Count() == 0 && h.Sum != 0 {
+			annots.Add(fmt.Errorf("exponential histogram data point has zero count, but non-zero sum: %f", h.Sum))
 		}
-	return h, nil
+	}
+	return h, annots, nil
 }

 // convertBucketsLayout translates OTel Exponential Histogram dense buckets
--- a/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms_test.go
@ -0,0 +1,771 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/247a9f996e09a83cdc25addf70c05e42b8b30186/pkg/translator/prometheusremotewrite/histograms_test.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: Copyright The OpenTelemetry Authors.
+
+package prometheusremotewrite
+
+import (
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/prompb"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"go.opentelemetry.io/collector/pdata/pcommon"
+	"go.opentelemetry.io/collector/pdata/pmetric"
+
+	prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus"
+)
+
+type expectedBucketLayout struct {
+	wantSpans  []prompb.BucketSpan
+	wantDeltas []int64
+}
+
+func TestConvertBucketsLayout(t *testing.T) {
+	tests := []struct {
+		name       string
+		buckets    func() pmetric.ExponentialHistogramDataPointBuckets
+		wantLayout map[int32]expectedBucketLayout
+	}{
+		{
+			name: "zero offset",
+			buckets: func() pmetric.ExponentialHistogramDataPointBuckets {
+				b := pmetric.NewExponentialHistogramDataPointBuckets()
+				b.SetOffset(0)
+				b.BucketCounts().FromRaw([]uint64{4, 3, 2, 1})
+				return b
+			},
+			wantLayout: map[int32]expectedBucketLayout{
+				0: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 1,
+							Length: 4,
+						},
+					},
+					wantDeltas: []int64{4, -1, -1, -1},
+				},
+				1: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 1,
+							Length: 2,
+						},
+					},
+					// 4+3, 2+1 = 7, 3 =delta= 7, -4
+					wantDeltas: []int64{7, -4},
+				},
+				2: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 1,
+							Length: 1,
+						},
+					},
+					// 4+3+2+1 = 10 =delta= 10
+					wantDeltas: []int64{10},
+				},
+			},
+		},
+		{
+			name: "offset 1",
+			buckets: func() pmetric.ExponentialHistogramDataPointBuckets {
+				b := pmetric.NewExponentialHistogramDataPointBuckets()
+				b.SetOffset(1)
+				b.BucketCounts().FromRaw([]uint64{4, 3, 2, 1})
+				return b
+			},
+			wantLayout: map[int32]expectedBucketLayout{
+				0: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 2,
+							Length: 4,
+						},
+					},
+					wantDeltas: []int64{4, -1, -1, -1},
+				},
+				1: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 1,
+							Length: 3,
+						},
+					},
+					wantDeltas: []int64{4, 1, -4}, // 0+4, 3+2, 1+0 = 4, 5, 1
+				},
+				2: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 1,
+							Length: 2,
+						},
+					},
+					wantDeltas: []int64{9, -8}, // 0+4+3+2, 1+0+0+0 = 9, 1
+				},
+			},
+		},
+		{
+			name: "positive offset",
+			buckets: func() pmetric.ExponentialHistogramDataPointBuckets {
+				b := pmetric.NewExponentialHistogramDataPointBuckets()
+				b.SetOffset(4)
+				b.BucketCounts().FromRaw([]uint64{4, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1})
+				return b
+			},
+			wantLayout: map[int32]expectedBucketLayout{
+				0: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 5,
+							Length: 4,
+						},
+						{
+							Offset: 12,
+							Length: 1,
+						},
+					},
+					wantDeltas: []int64{4, -2, -2, 2, -1},
+				},
+				1: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 3,
+							Length: 2,
+						},
+						{
+							Offset: 6,
+							Length: 1,
+						},
+					},
+					// Downscale:
+					// 4+2, 0+2, 0+0, 0+0, 0+0, 0+0, 0+0, 0+0, 1+0 = 6, 2, 0, 0, 0, 0, 0, 0, 1
+					wantDeltas: []int64{6, -4, -1},
+				},
+				2: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 2,
+							Length: 1,
+						},
+						{
+							Offset: 3,
+							Length: 1,
+						},
+					},
+					// Downscale:
+					// 4+2+0+2, 0+0+0+0, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 0, 1
+					// Check from sclaing from previous: 6+2, 0+0, 0+0, 0+0, 1+0 = 8, 0, 0, 0, 1
+					wantDeltas: []int64{8, -7},
+				},
+			},
+		},
+		{
+			name: "scaledown merges spans",
+			buckets: func() pmetric.ExponentialHistogramDataPointBuckets {
+				b := pmetric.NewExponentialHistogramDataPointBuckets()
+				b.SetOffset(4)
+				b.BucketCounts().FromRaw([]uint64{4, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1})
+				return b
+			},
+			wantLayout: map[int32]expectedBucketLayout{
+				0: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 5,
+							Length: 4,
+						},
+						{
+							Offset: 8,
+							Length: 1,
+						},
+					},
+					wantDeltas: []int64{4, -2, -2, 2, -1},
+				},
+				1: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 3,
+							Length: 2,
+						},
+						{
+							Offset: 4,
+							Length: 1,
+						},
+					},
+					// Downscale:
+					// 4+2, 0+2, 0+0, 0+0, 0+0, 0+0, 1+0 = 6, 2, 0, 0, 0, 0, 1
+					wantDeltas: []int64{6, -4, -1},
+				},
+				2: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 2,
+							Length: 4,
+						},
+					},
+					// Downscale:
+					// 4+2+0+2, 0+0+0+0, 0+0+0+0, 1+0+0+0 = 8, 0, 0, 1
+					// Check from sclaing from previous: 6+2, 0+0, 0+0, 1+0 = 8, 0, 0, 1
+					wantDeltas: []int64{8, -8, 0, 1},
+				},
+			},
+		},
+		{
+			name: "negative offset",
+			buckets: func() pmetric.ExponentialHistogramDataPointBuckets {
+				b := pmetric.NewExponentialHistogramDataPointBuckets()
+				b.SetOffset(-2)
+				b.BucketCounts().FromRaw([]uint64{3, 1, 0, 0, 0, 1})
+				return b
+			},
+			wantLayout: map[int32]expectedBucketLayout{
+				0: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: -1,
+							Length: 2,
+						},
+						{
+							Offset: 3,
+							Length: 1,
+						},
+					},
+					wantDeltas: []int64{3, -2, 0},
+				},
+				1: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 0,
+							Length: 3,
+						},
+					},
+					// Downscale:
+					// 3+1, 0+0, 0+1 = 4, 0, 1
+					wantDeltas: []int64{4, -4, 1},
+				},
+				2: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 0,
+							Length: 2,
+						},
+					},
+					// Downscale:
+					// 0+0+3+1, 0+0+0+0 = 4, 1
+					wantDeltas: []int64{4, -3},
+				},
+			},
+		},
+		{
+			name: "buckets with gaps of size 1",
+			buckets: func() pmetric.ExponentialHistogramDataPointBuckets {
+				b := pmetric.NewExponentialHistogramDataPointBuckets()
+				b.SetOffset(-2)
+				b.BucketCounts().FromRaw([]uint64{3, 1, 0, 1, 0, 1})
+				return b
+			},
+			wantLayout: map[int32]expectedBucketLayout{
+				0: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: -1,
+							Length: 6,
+						},
+					},
+					wantDeltas: []int64{3, -2, -1, 1, -1, 1},
+				},
+				1: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 0,
+							Length: 3,
+						},
+					},
+					// Downscale:
+					// 3+1, 0+1, 0+1 = 4, 1, 1
+					wantDeltas: []int64{4, -3, 0},
+				},
+				2: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 0,
+							Length: 2,
+						},
+					},
+					// Downscale:
+					// 0+0+3+1, 0+1+0+1 = 4, 2
+					wantDeltas: []int64{4, -2},
+				},
+			},
+		},
+		{
+			name: "buckets with gaps of size 2",
+			buckets: func() pmetric.ExponentialHistogramDataPointBuckets {
+				b := pmetric.NewExponentialHistogramDataPointBuckets()
+				b.SetOffset(-2)
+				b.BucketCounts().FromRaw([]uint64{3, 0, 0, 1, 0, 0, 1})
+				return b
+			},
+			wantLayout: map[int32]expectedBucketLayout{
+				0: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: -1,
+							Length: 7,
+						},
+					},
+					wantDeltas: []int64{3, -3, 0, 1, -1, 0, 1},
+				},
+				1: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 0,
+							Length: 4,
+						},
+					},
+					// Downscale:
+					// 3+0, 0+1, 0+0, 0+1 = 3, 1, 0, 1
+					wantDeltas: []int64{3, -2, -1, 1},
+				},
+				2: {
+					wantSpans: []prompb.BucketSpan{
+						{
+							Offset: 0,
+							Length: 3,
+						},
+					},
+					// Downscale:
+					// 0+0+3+0, 0+1+0+0, 1+0+0+0 = 3, 1, 1
+					wantDeltas: []int64{3, -2, 0},
+				},
+			},
+		},
+		{
+			name:    "zero buckets",
+			buckets: pmetric.NewExponentialHistogramDataPointBuckets,
+			wantLayout: map[int32]expectedBucketLayout{
+				0: {
+					wantSpans:  nil,
+					wantDeltas: nil,
+				},
+				1: {
+					wantSpans:  nil,
+					wantDeltas: nil,
+				},
+				2: {
+					wantSpans:  nil,
+					wantDeltas: nil,
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		for scaleDown, wantLayout := range tt.wantLayout {
+			t.Run(fmt.Sprintf("%s-scaleby-%d", tt.name, scaleDown), func(t *testing.T) {
+				gotSpans, gotDeltas := convertBucketsLayout(tt.buckets(), scaleDown)
+				assert.Equal(t, wantLayout.wantSpans, gotSpans)
+				assert.Equal(t, wantLayout.wantDeltas, gotDeltas)
+			})
+		}
+	}
+}
+
+func BenchmarkConvertBucketLayout(b *testing.B) {
+	scenarios := []struct {
+		gap int
+	}{
+		{gap: 0},
+		{gap: 1},
+		{gap: 2},
+		{gap: 3},
+	}
+
+	for _, scenario := range scenarios {
+		buckets := pmetric.NewExponentialHistogramDataPointBuckets()
+		buckets.SetOffset(0)
+		for i := 0; i < 1000; i++ {
+			if i%(scenario.gap+1) == 0 {
+				buckets.BucketCounts().Append(10)
+			} else {
+				buckets.BucketCounts().Append(0)
+			}
+		}
+		b.Run(fmt.Sprintf("gap %d", scenario.gap), func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				convertBucketsLayout(buckets, 0)
+			}
+		})
+	}
+}
+
+func TestExponentialToNativeHistogram(t *testing.T) {
+	tests := []struct {
+		name            string
+		exponentialHist func() pmetric.ExponentialHistogramDataPoint
+		wantNativeHist  func() prompb.Histogram
+		wantErrMessage  string
+	}{
+		{
+			name: "convert exp. to native histogram",
+			exponentialHist: func() pmetric.ExponentialHistogramDataPoint {
+				pt := pmetric.NewExponentialHistogramDataPoint()
+				pt.SetStartTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(100)))
+				pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500)))
+				pt.SetCount(4)
+				pt.SetSum(10.1)
+				pt.SetScale(1)
+				pt.SetZeroCount(1)
+
+				pt.Positive().BucketCounts().FromRaw([]uint64{1, 1})
+				pt.Positive().SetOffset(1)
+
+				pt.Negative().BucketCounts().FromRaw([]uint64{1, 1})
+				pt.Negative().SetOffset(1)
+
+				return pt
+			},
+			wantNativeHist: func() prompb.Histogram {
+				return prompb.Histogram{
+					Count:          &prompb.Histogram_CountInt{CountInt: 4},
+					Sum:            10.1,
+					Schema:         1,
+					ZeroThreshold:  defaultZeroThreshold,
+					ZeroCount:      &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1},
+					NegativeSpans:  []prompb.BucketSpan{{Offset: 2, Length: 2}},
+					NegativeDeltas: []int64{1, 0},
+					PositiveSpans:  []prompb.BucketSpan{{Offset: 2, Length: 2}},
+					PositiveDeltas: []int64{1, 0},
+					Timestamp:      500,
+				}
+			},
+		},
+		{
+			name: "convert exp. to native histogram with no sum",
+			exponentialHist: func() pmetric.ExponentialHistogramDataPoint {
+				pt := pmetric.NewExponentialHistogramDataPoint()
+				pt.SetStartTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(100)))
+				pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500)))
+
+				pt.SetCount(4)
+				pt.SetScale(1)
+				pt.SetZeroCount(1)
+
+				pt.Positive().BucketCounts().FromRaw([]uint64{1, 1})
+				pt.Positive().SetOffset(1)
+
+				pt.Negative().BucketCounts().FromRaw([]uint64{1, 1})
+				pt.Negative().SetOffset(1)
+
+				return pt
+			},
+			wantNativeHist: func() prompb.Histogram {
+				return prompb.Histogram{
+					Count:          &prompb.Histogram_CountInt{CountInt: 4},
+					Schema:         1,
+					ZeroThreshold:  defaultZeroThreshold,
+					ZeroCount:      &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1},
+					NegativeSpans:  []prompb.BucketSpan{{Offset: 2, Length: 2}},
+					NegativeDeltas: []int64{1, 0},
+					PositiveSpans:  []prompb.BucketSpan{{Offset: 2, Length: 2}},
+					PositiveDeltas: []int64{1, 0},
+					Timestamp:      500,
+				}
+			},
+		},
+		{
+			name: "invalid negative scale",
+			exponentialHist: func() pmetric.ExponentialHistogramDataPoint {
+				pt := pmetric.NewExponentialHistogramDataPoint()
+				pt.SetScale(-10)
+				return pt
+			},
+			wantErrMessage: "cannot convert exponential to native histogram." +
+				" Scale must be >= -4, was -10",
+		},
+		{
+			name: "no downscaling at scale 8",
+			exponentialHist: func() pmetric.ExponentialHistogramDataPoint {
+				pt := pmetric.NewExponentialHistogramDataPoint()
+				pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500)))
+				pt.SetCount(6)
+				pt.SetSum(10.1)
+				pt.SetScale(8)
+				pt.SetZeroCount(1)
+
+				pt.Positive().BucketCounts().FromRaw([]uint64{1, 1, 1})
+				pt.Positive().SetOffset(1)
+
+				pt.Negative().BucketCounts().FromRaw([]uint64{1, 1, 1})
+				pt.Negative().SetOffset(2)
+				return pt
+			},
+			wantNativeHist: func() prompb.Histogram {
+				return prompb.Histogram{
+					Count:          &prompb.Histogram_CountInt{CountInt: 6},
+					Sum:            10.1,
+					Schema:         8,
+					ZeroThreshold:  defaultZeroThreshold,
+					ZeroCount:      &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1},
+					PositiveSpans:  []prompb.BucketSpan{{Offset: 2, Length: 3}},
+					PositiveDeltas: []int64{1, 0, 0}, // 1, 1, 1
+					NegativeSpans:  []prompb.BucketSpan{{Offset: 3, Length: 3}},
+					NegativeDeltas: []int64{1, 0, 0}, // 1, 1, 1
+					Timestamp:      500,
+				}
+			},
+		},
+		{
+			name: "downsample if scale is more than 8",
+			exponentialHist: func() pmetric.ExponentialHistogramDataPoint {
+				pt := pmetric.NewExponentialHistogramDataPoint()
+				pt.SetTimestamp(pcommon.NewTimestampFromTime(time.UnixMilli(500)))
+				pt.SetCount(6)
+				pt.SetSum(10.1)
+				pt.SetScale(9)
+				pt.SetZeroCount(1)
+
+				pt.Positive().BucketCounts().FromRaw([]uint64{1, 1, 1})
+				pt.Positive().SetOffset(1)
+
+				pt.Negative().BucketCounts().FromRaw([]uint64{1, 1, 1})
+				pt.Negative().SetOffset(2)
+				return pt
+			},
+			wantNativeHist: func() prompb.Histogram {
+				return prompb.Histogram{
+					Count:          &prompb.Histogram_CountInt{CountInt: 6},
+					Sum:            10.1,
+					Schema:         8,
+					ZeroThreshold:  defaultZeroThreshold,
+					ZeroCount:      &prompb.Histogram_ZeroCountInt{ZeroCountInt: 1},
+					PositiveSpans:  []prompb.BucketSpan{{Offset: 1, Length: 2}},
+					PositiveDeltas: []int64{1, 1}, // 0+1, 1+1 = 1, 2
+					NegativeSpans:  []prompb.BucketSpan{{Offset: 2, Length: 2}},
+					NegativeDeltas: []int64{2, -1}, // 1+1, 1+0 = 2, 1
+					Timestamp:      500,
+				}
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			validateExponentialHistogramCount(t, tt.exponentialHist()) // Sanity check.
+			got, annots, err := exponentialToNativeHistogram(tt.exponentialHist())
+			if tt.wantErrMessage != "" {
+				assert.ErrorContains(t, err, tt.wantErrMessage)
+				return
+			}
+
+			require.NoError(t, err)
+			require.Empty(t, annots)
+			assert.Equal(t, tt.wantNativeHist(), got)
+			validateNativeHistogramCount(t, got)
+		})
+	}
+}
+
+func validateExponentialHistogramCount(t *testing.T, h pmetric.ExponentialHistogramDataPoint) {
+	actualCount := uint64(0)
+	for _, bucket := range h.Positive().BucketCounts().AsRaw() {
+		actualCount += bucket
+	}
+	for _, bucket := range h.Negative().BucketCounts().AsRaw() {
+		actualCount += bucket
+	}
+	require.Equal(t, h.Count(), actualCount, "exponential histogram count mismatch")
+}
+
+func validateNativeHistogramCount(t *testing.T, h prompb.Histogram) {
+	require.NotNil(t, h.Count)
+	require.IsType(t, &prompb.Histogram_CountInt{}, h.Count)
+	want := h.Count.(*prompb.Histogram_CountInt).CountInt
+	var (
+		actualCount uint64
+		prevBucket  int64
+	)
+	for _, delta := range h.PositiveDeltas {
+		prevBucket += delta
+		actualCount += uint64(prevBucket)
+	}
+	prevBucket = 0
+	for _, delta := range h.NegativeDeltas {
+		prevBucket += delta
+		actualCount += uint64(prevBucket)
+	}
+	assert.Equal(t, want, actualCount, "native histogram count mismatch")
+}
+
+func TestPrometheusConverter_addExponentialHistogramDataPoints(t *testing.T) {
+	tests := []struct {
+		name       string
+		metric     func() pmetric.Metric
+		wantSeries func() map[uint64]*prompb.TimeSeries
+	}{
+		{
+			name: "histogram data points with same labels",
+			metric: func() pmetric.Metric {
+				metric := pmetric.NewMetric()
+				metric.SetName("test_hist")
+				metric.SetEmptyExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
+
+				pt := metric.ExponentialHistogram().DataPoints().AppendEmpty()
+				pt.SetCount(7)
+				pt.SetScale(1)
+				pt.Positive().SetOffset(-1)
+				pt.Positive().BucketCounts().FromRaw([]uint64{4, 2})
+				pt.Exemplars().AppendEmpty().SetDoubleValue(1)
+				pt.Attributes().PutStr("attr", "test_attr")
+
+				pt = metric.ExponentialHistogram().DataPoints().AppendEmpty()
+				pt.SetCount(4)
+				pt.SetScale(1)
+				pt.Positive().SetOffset(-1)
+				pt.Positive().BucketCounts().FromRaw([]uint64{4, 2, 1})
+				pt.Exemplars().AppendEmpty().SetDoubleValue(2)
+				pt.Attributes().PutStr("attr", "test_attr")
+
+				return metric
+			},
+			wantSeries: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_hist"},
+					{Name: "attr", Value: "test_attr"},
+				}
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Histograms: []prompb.Histogram{
+							{
+								Count:          &prompb.Histogram_CountInt{CountInt: 7},
+								Schema:         1,
+								ZeroThreshold:  defaultZeroThreshold,
+								ZeroCount:      &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0},
+								PositiveSpans:  []prompb.BucketSpan{{Offset: 0, Length: 2}},
+								PositiveDeltas: []int64{4, -2},
+							},
+							{
+								Count:          &prompb.Histogram_CountInt{CountInt: 4},
+								Schema:         1,
+								ZeroThreshold:  defaultZeroThreshold,
+								ZeroCount:      &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0},
+								PositiveSpans:  []prompb.BucketSpan{{Offset: 0, Length: 3}},
+								PositiveDeltas: []int64{4, -2, -1},
+							},
+						},
+						Exemplars: []prompb.Exemplar{
+							{Value: 1},
+							{Value: 2},
+						},
+					},
+				}
+			},
+		},
+		{
+			name: "histogram data points with different labels",
+			metric: func() pmetric.Metric {
+				metric := pmetric.NewMetric()
+				metric.SetName("test_hist")
+				metric.SetEmptyExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
+
+				pt := metric.ExponentialHistogram().DataPoints().AppendEmpty()
+				pt.SetCount(7)
+				pt.SetScale(1)
+				pt.Positive().SetOffset(-1)
+				pt.Positive().BucketCounts().FromRaw([]uint64{4, 2})
+				pt.Exemplars().AppendEmpty().SetDoubleValue(1)
+				pt.Attributes().PutStr("attr", "test_attr")
+
+				pt = metric.ExponentialHistogram().DataPoints().AppendEmpty()
+				pt.SetCount(4)
+				pt.SetScale(1)
+				pt.Negative().SetOffset(-1)
+				pt.Negative().BucketCounts().FromRaw([]uint64{4, 2, 1})
+				pt.Exemplars().AppendEmpty().SetDoubleValue(2)
+				pt.Attributes().PutStr("attr", "test_attr_two")
+
+				return metric
+			},
+			wantSeries: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_hist"},
+					{Name: "attr", Value: "test_attr"},
+				}
+				labelsAnother := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_hist"},
+					{Name: "attr", Value: "test_attr_two"},
+				}
+
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Histograms: []prompb.Histogram{
+							{
+								Count:          &prompb.Histogram_CountInt{CountInt: 7},
+								Schema:         1,
+								ZeroThreshold:  defaultZeroThreshold,
+								ZeroCount:      &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0},
+								PositiveSpans:  []prompb.BucketSpan{{Offset: 0, Length: 2}},
+								PositiveDeltas: []int64{4, -2},
+							},
+						},
+						Exemplars: []prompb.Exemplar{
+							{Value: 1},
+						},
+					},
+					timeSeriesSignature(labelsAnother): {
+						Labels: labelsAnother,
+						Histograms: []prompb.Histogram{
+							{
+								Count:          &prompb.Histogram_CountInt{CountInt: 4},
+								Schema:         1,
+								ZeroThreshold:  defaultZeroThreshold,
+								ZeroCount:      &prompb.Histogram_ZeroCountInt{ZeroCountInt: 0},
+								NegativeSpans:  []prompb.BucketSpan{{Offset: 0, Length: 3}},
+								NegativeDeltas: []int64{4, -2, -1},
+							},
+						},
+						Exemplars: []prompb.Exemplar{
+							{Value: 2},
+						},
+					},
+				}
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			metric := tt.metric()
+
+			converter := NewPrometheusConverter()
+			annots, err := converter.addExponentialHistogramDataPoints(
+				metric.ExponentialHistogram().DataPoints(),
+				pcommon.NewResource(),
+				Settings{
+					ExportCreatedMetric: true,
+				},
+				prometheustranslator.BuildCompliantName(metric, "", true),
+			)
+			require.NoError(t, err)
+			require.Empty(t, annots)
+
+			assert.Equal(t, tt.wantSeries(), converter.unique)
+			assert.Empty(t, converter.conflicts)
+		})
+	}
+}
--- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go
@ -27,6 +27,7 @@ import (

 	"github.com/prometheus/prometheus/prompb"
 	prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus"
+	"github.com/prometheus/prometheus/util/annotations"
 )

 type Settings struct {
@ -53,7 +54,7 @@ func NewPrometheusConverter() *PrometheusConverter {
 }

 // FromMetrics converts pmetric.Metrics to Prometheus remote write format.
-func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (errs error) {
+func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (annots annotations.Annotations, errs error) {
 	resourceMetricsSlice := md.ResourceMetrics()
 	for i := 0; i < resourceMetricsSlice.Len(); i++ {
 		resourceMetrics := resourceMetricsSlice.At(i)
@ -107,12 +108,14 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings)
 						errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name()))
 						break
 					}
-					errs = multierr.Append(errs, c.addExponentialHistogramDataPoints(
+					ws, err := c.addExponentialHistogramDataPoints(
 						dataPoints,
 						resource,
 						settings,
 						promName,
-					))
+					)
+					annots.Merge(ws)
+					errs = multierr.Append(errs, err)
 				case pmetric.MetricTypeSummary:
 					dataPoints := metric.Summary().DataPoints()
 					if dataPoints.Len() == 0 {
@ -128,7 +131,7 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings)
 		addResourceTargetInfo(resource, settings, mostRecentTimestamp, c)
 	}

-	return
+	return annots, errs
 }

 func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool {
--- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go
@ -27,6 +27,41 @@ import (
 	"go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp"
 )

+func TestFromMetrics(t *testing.T) {
+	t.Run("exponential histogram warnings for zero count and non-zero sum", func(t *testing.T) {
+		request := pmetricotlp.NewExportRequest()
+		rm := request.Metrics().ResourceMetrics().AppendEmpty()
+		generateAttributes(rm.Resource().Attributes(), "resource", 10)
+
+		metrics := rm.ScopeMetrics().AppendEmpty().Metrics()
+		ts := pcommon.NewTimestampFromTime(time.Now())
+
+		for i := 1; i <= 10; i++ {
+			m := metrics.AppendEmpty()
+			m.SetEmptyExponentialHistogram()
+			m.SetName(fmt.Sprintf("histogram-%d", i))
+			m.ExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
+			h := m.ExponentialHistogram().DataPoints().AppendEmpty()
+			h.SetTimestamp(ts)
+
+			h.SetCount(0)
+			h.SetSum(155)
+
+			generateAttributes(h.Attributes(), "series", 10)
+		}
+
+		converter := NewPrometheusConverter()
+		annots, err := converter.FromMetrics(request.Metrics(), Settings{})
+		require.NoError(t, err)
+		require.NotEmpty(t, annots)
+		ws, infos := annots.AsStrings("", 0, 0)
+		require.Empty(t, infos)
+		require.Equal(t, []string{
+			"exponential histogram data point has zero count, but non-zero sum: 155.000000",
+		}, ws)
+	})
+}
+
 func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) {
 	for _, resourceAttributeCount := range []int{0, 5, 50} {
 		b.Run(fmt.Sprintf("resource attribute count: %v", resourceAttributeCount), func(b *testing.B) {
@ -49,7 +84,9 @@ func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) {

 											for i := 0; i < b.N; i++ {
 												converter := NewPrometheusConverter()
-												require.NoError(b, converter.FromMetrics(payload.Metrics(), Settings{}))
+												annots, err := converter.FromMetrics(payload.Metrics(), Settings{})
+												require.NoError(b, err)
+												require.Empty(b, annots)
 												require.NotNil(b, converter.TimeSeries())
 											}
 										})
--- a/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/number_data_points_test.go
@ -0,0 +1,258 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/247a9f996e09a83cdc25addf70c05e42b8b30186/pkg/translator/prometheusremotewrite/number_data_points_test.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: Copyright The OpenTelemetry Authors.
+
+package prometheusremotewrite
+
+import (
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/prompb"
+	"github.com/stretchr/testify/assert"
+	"go.opentelemetry.io/collector/pdata/pcommon"
+	"go.opentelemetry.io/collector/pdata/pmetric"
+)
+
+func TestPrometheusConverter_addGaugeNumberDataPoints(t *testing.T) {
+	ts := uint64(time.Now().UnixNano())
+	tests := []struct {
+		name   string
+		metric func() pmetric.Metric
+		want   func() map[uint64]*prompb.TimeSeries
+	}{
+		{
+			name: "gauge",
+			metric: func() pmetric.Metric {
+				return getIntGaugeMetric(
+					"test",
+					pcommon.NewMap(),
+					1, ts,
+				)
+			},
+			want: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test"},
+				}
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Samples: []prompb.Sample{
+							{
+								Value:     1,
+								Timestamp: convertTimeStamp(pcommon.Timestamp(ts)),
+							}},
+					},
+				}
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			metric := tt.metric()
+			converter := NewPrometheusConverter()
+
+			converter.addGaugeNumberDataPoints(
+				metric.Gauge().DataPoints(),
+				pcommon.NewResource(),
+				Settings{
+					ExportCreatedMetric: true,
+				},
+				metric.Name(),
+			)
+
+			assert.Equal(t, tt.want(), converter.unique)
+			assert.Empty(t, converter.conflicts)
+		})
+	}
+}
+
+func TestPrometheusConverter_addSumNumberDataPoints(t *testing.T) {
+	ts := pcommon.Timestamp(time.Now().UnixNano())
+	tests := []struct {
+		name   string
+		metric func() pmetric.Metric
+		want   func() map[uint64]*prompb.TimeSeries
+	}{
+		{
+			name: "sum",
+			metric: func() pmetric.Metric {
+				return getIntSumMetric(
+					"test",
+					pcommon.NewMap(),
+					1,
+					uint64(ts.AsTime().UnixNano()),
+				)
+			},
+			want: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test"},
+				}
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Samples: []prompb.Sample{
+							{
+								Value:     1,
+								Timestamp: convertTimeStamp(ts),
+							}},
+					},
+				}
+			},
+		},
+		{
+			name: "sum with exemplars",
+			metric: func() pmetric.Metric {
+				m := getIntSumMetric(
+					"test",
+					pcommon.NewMap(),
+					1,
+					uint64(ts.AsTime().UnixNano()),
+				)
+				m.Sum().DataPoints().At(0).Exemplars().AppendEmpty().SetDoubleValue(2)
+				return m
+			},
+			want: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test"},
+				}
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Samples: []prompb.Sample{{
+							Value:     1,
+							Timestamp: convertTimeStamp(ts),
+						}},
+						Exemplars: []prompb.Exemplar{
+							{Value: 2},
+						},
+					},
+				}
+			},
+		},
+		{
+			name: "monotonic cumulative sum with start timestamp",
+			metric: func() pmetric.Metric {
+				metric := pmetric.NewMetric()
+				metric.SetName("test_sum")
+				metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
+				metric.SetEmptySum().SetIsMonotonic(true)
+
+				dp := metric.Sum().DataPoints().AppendEmpty()
+				dp.SetDoubleValue(1)
+				dp.SetTimestamp(ts)
+				dp.SetStartTimestamp(ts)
+
+				return metric
+			},
+			want: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_sum"},
+				}
+				createdLabels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_sum" + createdSuffix},
+				}
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Samples: []prompb.Sample{
+							{Value: 1, Timestamp: convertTimeStamp(ts)},
+						},
+					},
+					timeSeriesSignature(createdLabels): {
+						Labels: createdLabels,
+						Samples: []prompb.Sample{
+							{Value: float64(convertTimeStamp(ts)), Timestamp: convertTimeStamp(ts)},
+						},
+					},
+				}
+			},
+		},
+		{
+			name: "monotonic cumulative sum with no start time",
+			metric: func() pmetric.Metric {
+				metric := pmetric.NewMetric()
+				metric.SetName("test_sum")
+				metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
+				metric.SetEmptySum().SetIsMonotonic(true)
+
+				dp := metric.Sum().DataPoints().AppendEmpty()
+				dp.SetTimestamp(ts)
+
+				return metric
+			},
+			want: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_sum"},
+				}
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Samples: []prompb.Sample{
+							{Value: 0, Timestamp: convertTimeStamp(ts)},
+						},
+					},
+				}
+			},
+		},
+		{
+			name: "non-monotonic cumulative sum with start time",
+			metric: func() pmetric.Metric {
+				metric := pmetric.NewMetric()
+				metric.SetName("test_sum")
+				metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
+				metric.SetEmptySum().SetIsMonotonic(false)
+
+				dp := metric.Sum().DataPoints().AppendEmpty()
+				dp.SetTimestamp(ts)
+
+				return metric
+			},
+			want: func() map[uint64]*prompb.TimeSeries {
+				labels := []prompb.Label{
+					{Name: model.MetricNameLabel, Value: "test_sum"},
+				}
+				return map[uint64]*prompb.TimeSeries{
+					timeSeriesSignature(labels): {
+						Labels: labels,
+						Samples: []prompb.Sample{
+							{Value: 0, Timestamp: convertTimeStamp(ts)},
+						},
+					},
+				}
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			metric := tt.metric()
+			converter := NewPrometheusConverter()
+
+			converter.addSumNumberDataPoints(
+				metric.Sum().DataPoints(),
+				pcommon.NewResource(),
+				metric,
+				Settings{
+					ExportCreatedMetric: true,
+				},
+				metric.Name(),
+			)
+
+			assert.Equal(t, tt.want(), converter.unique)
+			assert.Empty(t, converter.conflicts)
+		})
+	}
+}
--- a/storage/remote/otlptranslator/prometheusremotewrite/testutil_test.go
+++ b/storage/remote/otlptranslator/prometheusremotewrite/testutil_test.go
@ -0,0 +1,55 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// Provenance-includes-location: https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/247a9f996e09a83cdc25addf70c05e42b8b30186/pkg/translator/prometheusremotewrite/testutil_test.go
+// Provenance-includes-license: Apache-2.0
+// Provenance-includes-copyright: Copyright The OpenTelemetry Authors.
+
+package prometheusremotewrite
+
+import (
+	"strings"
+
+	"go.opentelemetry.io/collector/pdata/pcommon"
+	"go.opentelemetry.io/collector/pdata/pmetric"
+)
+
+func getIntGaugeMetric(name string, attributes pcommon.Map, value int64, ts uint64) pmetric.Metric {
+	metric := pmetric.NewMetric()
+	metric.SetName(name)
+	dp := metric.SetEmptyGauge().DataPoints().AppendEmpty()
+	if strings.HasPrefix(name, "staleNaN") {
+		dp.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true))
+	}
+	dp.SetIntValue(value)
+	attributes.CopyTo(dp.Attributes())
+
+	dp.SetStartTimestamp(pcommon.Timestamp(0))
+	dp.SetTimestamp(pcommon.Timestamp(ts))
+	return metric
+}
+
+func getIntSumMetric(name string, attributes pcommon.Map, value int64, ts uint64) pmetric.Metric {
+	metric := pmetric.NewMetric()
+	metric.SetName(name)
+	metric.SetEmptySum().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative)
+	dp := metric.Sum().DataPoints().AppendEmpty()
+	if strings.HasPrefix(name, "staleNaN") {
+		dp.SetFlags(pmetric.DefaultDataPointFlags.WithNoRecordedValue(true))
+	}
+	dp.SetIntValue(value)
+	attributes.CopyTo(dp.Attributes())
+
+	dp.SetStartTimestamp(pcommon.Timestamp(0))
+	dp.SetTimestamp(pcommon.Timestamp(ts))
+	return metric
+}
--- a/storage/remote/queue_manager_test.go
+++ b/storage/remote/queue_manager_test.go
@ -930,7 +930,7 @@ func createHistograms(numSamples, numSeries int, floatHistogram bool) ([]record.
 }

 func createSeriesMetadata(series []record.RefSeries) []record.RefMetadata {
-	metas := make([]record.RefMetadata, len(series))
+	metas := make([]record.RefMetadata, 0, len(series))

 	for _, s := range series {
 		metas = append(metas, record.RefMetadata{
--- a/storage/remote/read.go
+++ b/storage/remote/read.go
@ -165,11 +165,11 @@ func (q *querier) Select(ctx context.Context, sortSeries bool, hints *storage.Se
 		return storage.ErrSeriesSet(fmt.Errorf("toQuery: %w", err))
 	}

-	res, err := q.client.Read(ctx, query)
+	res, err := q.client.Read(ctx, query, sortSeries)
 	if err != nil {
 		return storage.ErrSeriesSet(fmt.Errorf("remote_read: %w", err))
 	}
-	return newSeriesSetFilter(FromQueryResult(sortSeries, res), added)
+	return newSeriesSetFilter(res, added)
 }

 // addExternalLabels adds matchers for each external label. External labels
--- a/storage/remote/read_handler_test.go
+++ b/storage/remote/read_handler_test.go
@ -179,7 +179,7 @@ func BenchmarkStreamReadEndpoint(b *testing.B) {
 		require.Equal(b, 2, recorder.Code/100)

 		var results []*prompb.ChunkedReadResponse
-		stream := NewChunkedReader(recorder.Result().Body, DefaultChunkedReadLimit, nil)
+		stream := NewChunkedReader(recorder.Result().Body, config.DefaultChunkedReadLimit, nil)

 		for {
 			res := &prompb.ChunkedReadResponse{}
@ -280,7 +280,7 @@ func TestStreamReadEndpoint(t *testing.T) {
 	require.Equal(t, "", recorder.Result().Header.Get("Content-Encoding"))

 	var results []*prompb.ChunkedReadResponse
-	stream := NewChunkedReader(recorder.Result().Body, DefaultChunkedReadLimit, nil)
+	stream := NewChunkedReader(recorder.Result().Body, config.DefaultChunkedReadLimit, nil)
 	for {
 		res := &prompb.ChunkedReadResponse{}
 		err := stream.NextProto(res)
--- a/storage/remote/read_test.go
+++ b/storage/remote/read_test.go
@ -27,6 +27,7 @@ import (
 	"github.com/prometheus/prometheus/config"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/prompb"
+	"github.com/prometheus/prometheus/storage"
 	"github.com/prometheus/prometheus/util/annotations"
 	"github.com/prometheus/prometheus/util/testutil"
 )
@ -198,7 +199,7 @@ type mockedRemoteClient struct {
 	b     labels.ScratchBuilder
 }

-func (c *mockedRemoteClient) Read(_ context.Context, query *prompb.Query) (*prompb.QueryResult, error) {
+func (c *mockedRemoteClient) Read(_ context.Context, query *prompb.Query, sortSeries bool) (storage.SeriesSet, error) {
 	if c.got != nil {
 		return nil, fmt.Errorf("expected only one call to remote client got: %v", query)
 	}
@ -227,7 +228,7 @@ func (c *mockedRemoteClient) Read(_ context.Context, query *prompb.Query) (*prom
 			q.Timeseries = append(q.Timeseries, &prompb.TimeSeries{Labels: s.Labels})
 		}
 	}
-	return q, nil
+	return FromQueryResult(sortSeries, q), nil
 }

 func (c *mockedRemoteClient) reset() {
--- a/storage/remote/storage.go
+++ b/storage/remote/storage.go
@ -115,6 +115,7 @@ func (s *Storage) ApplyConfig(conf *config.Config) error {
 		c, err := NewReadClient(name, &ClientConfig{
 			URL:              rrConf.URL,
 			Timeout:          rrConf.RemoteTimeout,
+			ChunkedReadLimit: rrConf.ChunkedReadLimit,
 			HTTPClientConfig: rrConf.HTTPClientConfig,
 			Headers:          rrConf.Headers,
 		})
--- a/storage/remote/write_handler.go
+++ b/storage/remote/write_handler.go
@ -28,6 +28,7 @@ import (
 	"github.com/golang/snappy"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
+	"github.com/prometheus/common/model"

 	"github.com/prometheus/prometheus/config"
 	"github.com/prometheus/prometheus/model/exemplar"
@ -236,11 +237,16 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err
 	b := labels.NewScratchBuilder(0)
 	for _, ts := range req.Timeseries {
 		ls := ts.ToLabels(&b, nil)
-		if !ls.Has(labels.MetricName) || !ls.IsValid() {
-			level.Warn(h.logger).Log("msg", "Invalid metric names or labels", "got", ls.String())
-			samplesWithInvalidLabels++
+
 		// TODO(bwplotka): Even as per 1.0 spec, this should be a 400 error, while other samples are
 		// potentially written. Perhaps unify with fixed writeV2 implementation a bit.
+		if !ls.Has(labels.MetricName) || !ls.IsValid(model.NameValidationScheme) {
+			level.Warn(h.logger).Log("msg", "Invalid metric names or labels", "got", ls.String())
+			samplesWithInvalidLabels++
+			continue
+		} else if duplicateLabel, hasDuplicate := ls.HasDuplicateLabelNames(); hasDuplicate {
+			level.Warn(h.logger).Log("msg", "Invalid labels for series.", "labels", ls.String(), "duplicated_label", duplicateLabel)
+			samplesWithInvalidLabels++
 			continue
 		}

@ -375,10 +381,14 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs *
 		// Validate series labels early.
 		// NOTE(bwplotka): While spec allows UTF-8, Prometheus Receiver may impose
 		// specific limits and follow https://prometheus.io/docs/specs/remote_write_spec_2_0/#invalid-samples case.
-		if !ls.Has(labels.MetricName) || !ls.IsValid() {
+		if !ls.Has(labels.MetricName) || !ls.IsValid(model.NameValidationScheme) {
 			badRequestErrs = append(badRequestErrs, fmt.Errorf("invalid metric name or labels, got %v", ls.String()))
 			samplesWithInvalidLabels += len(ts.Samples) + len(ts.Histograms)
 			continue
+		} else if duplicateLabel, hasDuplicate := ls.HasDuplicateLabelNames(); hasDuplicate {
+			badRequestErrs = append(badRequestErrs, fmt.Errorf("invalid labels for series, labels %v, duplicated label %s", ls.String(), duplicateLabel))
+			samplesWithInvalidLabels += len(ts.Samples) + len(ts.Histograms)
+			continue
 		}

 		allSamplesSoFar := rs.AllSamples()
@ -502,12 +512,17 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 	otlpCfg := h.configFunc().OTLPConfig

 	converter := otlptranslator.NewPrometheusConverter()
-	if err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{
+	annots, err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{
 		AddMetricSuffixes:         true,
 		PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes,
-	}); err != nil {
+	})
+	if err != nil {
 		level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err)
 	}
+	ws, _ := annots.AsStrings("", 0, 0)
+	if len(ws) > 0 {
+		level.Warn(h.logger).Log("msg", "Warnings translating OTLP metrics to Prometheus write request", "warnings", ws)
+	}

 	err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{
 		Timeseries: converter.TimeSeries(),
--- a/storage/remote/write_handler_test.go
+++ b/storage/remote/write_handler_test.go
@ -338,6 +338,15 @@ func TestRemoteWriteHandler_V2Message(t *testing.T) {
 			expectedCode:     http.StatusBadRequest,
 			expectedRespBody: "invalid metric name or labels, got {__name__=\"\"}\n",
 		},
+		{
+			desc: "Partial write; first series with duplicate labels",
+			input: append(
+				// Series with __name__="test_metric1",test_metric1="test_metric1",test_metric1="test_metric1" labels.
+				[]writev2.TimeSeries{{LabelsRefs: []uint32{1, 2, 2, 2, 2, 2}, Samples: []writev2.Sample{{Value: 1, Timestamp: 1}}}},
+				writeV2RequestFixture.Timeseries...),
+			expectedCode:     http.StatusBadRequest,
+			expectedRespBody: "invalid labels for series, labels {__name__=\"test_metric1\", test_metric1=\"test_metric1\", test_metric1=\"test_metric1\"}, duplicated label test_metric1\n",
+		},
 		{
 			desc: "Partial write; first series with one OOO sample",
 			input: func() []writev2.TimeSeries {
@ -836,6 +845,13 @@ func (m *mockAppendable) Append(_ storage.SeriesRef, l labels.Labels, t int64, v
 		return 0, storage.ErrDuplicateSampleForTimestamp
 	}

+	if l.IsEmpty() {
+		return 0, tsdb.ErrInvalidSample
+	}
+	if _, hasDuplicates := l.HasDuplicateLabelNames(); hasDuplicates {
+		return 0, tsdb.ErrInvalidSample
+	}
+
 	m.latestSample[l.Hash()] = t
 	m.samples = append(m.samples, mockSample{l, t, v})
 	return 0, nil
@ -887,6 +903,13 @@ func (m *mockAppendable) AppendHistogram(_ storage.SeriesRef, l labels.Labels, t
 		return 0, storage.ErrDuplicateSampleForTimestamp
 	}

+	if l.IsEmpty() {
+		return 0, tsdb.ErrInvalidSample
+	}
+	if _, hasDuplicates := l.HasDuplicateLabelNames(); hasDuplicates {
+		return 0, tsdb.ErrInvalidSample
+	}
+
 	m.latestHistogram[l.Hash()] = t
 	m.histograms = append(m.histograms, mockHistogram{l, t, h, fh})
 	return 0, nil
--- a/template/template.go
+++ b/template/template.go
@ -166,7 +166,7 @@ func NewTemplateExpander(
 				return html_template.HTML(text)
 			},
 			"match":     regexp.MatchString,
-			"title":     strings.Title,
+			"title":     strings.Title, //nolint:staticcheck // TODO(beorn7): Need to come up with a replacement using the cases package.
 			"toUpper":   strings.ToUpper,
 			"toLower":   strings.ToLower,
 			"graphLink": strutil.GraphLinkForExpression,
--- a/tracing/testdata/ca.cer
+++ b/tracing/testdata/ca.cer
@ -1,3 +1,61 @@
+Certificate:
+    Data:
+        Version: 3 (0x2)
+        Serial Number:
+            93:6c:9e:29:8d:37:7b:66
+        Signature Algorithm: sha256WithRSAEncryption
+        Issuer: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA
+        Validity
+            Not Before: Aug 20 11:51:23 2024 GMT
+            Not After : Dec  5 11:51:23 2044 GMT
+        Subject: C = XX, L = Default City, O = Default Company Ltd, CN = Prometheus Test CA
+        Subject Public Key Info:
+            Public Key Algorithm: rsaEncryption
+                Public-Key: (2048 bit)
+                Modulus:
+                    00:e9:52:05:4d:f2:5a:95:04:2d:b8:73:8b:3c:e7:
+                    47:48:67:00:be:dd:6c:41:f3:7c:4b:44:73:a3:77:
+                    3e:84:af:30:d7:2a:ad:45:6a:b7:89:23:05:15:b3:
+                    aa:46:79:b8:95:64:cc:13:c4:44:a1:01:a0:e2:3d:
+                    a5:67:2b:aa:d3:13:06:43:33:1c:96:36:12:9e:c6:
+                    1d:36:9b:d7:47:bd:28:2d:88:15:04:fa:14:a3:ff:
+                    8c:26:22:c5:a2:15:c7:76:b3:11:f6:a3:44:9a:28:
+                    0f:ca:fb:f4:51:a8:6a:05:94:7c:77:47:c8:21:56:
+                    25:bf:e2:2e:df:33:f3:e4:bd:d6:47:a5:49:13:12:
+                    c8:1f:4c:d7:2a:56:a2:6c:c1:cf:55:05:5d:9a:75:
+                    a2:23:4e:e6:8e:ff:76:05:d6:e0:c8:0b:51:f0:b6:
+                    a1:b2:7d:8f:9c:6a:a5:ce:86:92:15:8c:5b:86:45:
+                    c0:4a:ff:54:b8:ee:cf:11:bd:07:cb:4b:7d:0b:a1:
+                    9d:72:86:9f:55:bc:f9:6c:d9:55:60:96:30:3f:ec:
+                    2d:f6:5f:9a:32:9a:5a:5c:1c:5f:32:f9:d6:0f:04:
+                    f8:81:08:04:9a:95:c3:9d:5a:30:8e:a5:0e:47:2f:
+                    00:ce:e0:2e:ad:5a:b8:b6:4c:55:7c:8a:59:22:b0:
+                    ed:73
+                Exponent: 65537 (0x10001)
+        X509v3 extensions:
+            X509v3 Subject Key Identifier: 
+                CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B
+            X509v3 Authority Key Identifier: 
+                CC:F5:05:99:E5:AB:12:69:D8:78:89:4A:31:CA:F0:8B:0B:AD:66:1B
+            X509v3 Basic Constraints: 
+                CA:TRUE
+    Signature Algorithm: sha256WithRSAEncryption
+    Signature Value:
+        4a:a1:b0:bc:c8:87:4f:7c:96:62:e5:09:29:ae:3a:2e:68:d0:
+        d2:c5:68:ed:ea:83:36:b1:86:f3:b9:e9:19:2b:b6:73:10:6f:
+        df:7f:bb:f1:76:81:03:c1:a1:5a:ee:6c:44:b8:7c:10:d1:5a:
+        d7:c1:92:64:59:35:a6:e0:aa:08:41:37:6e:e7:c8:b6:bd:0c:
+        4b:47:78:ec:c4:b4:15:a3:62:76:4a:39:8e:6e:19:ff:f0:c0:
+        8a:7e:1c:cd:87:e5:00:6c:f1:ce:27:26:ff:b8:e9:eb:f7:2f:
+        bd:c2:4b:9c:d6:57:de:74:74:b3:4f:03:98:9a:b5:08:2d:16:
+        ca:7f:b6:c8:76:62:86:1b:7c:f2:3e:6c:78:cc:2c:95:9a:bb:
+        77:25:e8:80:ff:9b:e8:f8:9a:85:3b:85:b7:17:4e:77:a1:cf:
+        4d:b9:d0:25:e8:5d:8c:e6:7c:f1:d9:52:30:3d:ec:2b:37:91:
+        bc:e2:e8:39:31:6f:3d:e9:98:70:80:7c:41:dd:19:13:05:21:
+        94:7b:16:cf:d8:ee:4e:38:34:5e:6a:ff:cd:85:ac:8f:94:9a:
+        dd:4e:77:05:13:a6:b4:80:52:b2:97:64:76:88:f4:dd:42:0a:
+        50:1c:80:fd:4b:6e:a9:62:10:aa:ef:2e:c1:2f:be:0e:c2:2e:
+        b5:28:5f:83
 -----BEGIN CERTIFICATE-----
 MIIDkTCCAnmgAwIBAgIJAJNsnimNN3tmMA0GCSqGSIb3DQEBCwUAMF8xCzAJBgNV
 BAYTAlhYMRUwEwYDVQQHDAxEZWZhdWx0IENpdHkxHDAaBgNVBAoME0RlZmF1bHQg
--- a/tsdb/chunks/head_chunks.go
+++ b/tsdb/chunks/head_chunks.go
@ -191,7 +191,7 @@ func (f *chunkPos) bytesToWriteForChunk(chkLen uint64) uint64 {
 // ChunkDiskMapper is for writing the Head block chunks to disk
 // and access chunks via mmapped files.
 type ChunkDiskMapper struct {
-	/// Writer.
+	// Writer.
 	dir             *os.File
 	writeBufferSize int

@ -210,7 +210,7 @@ type ChunkDiskMapper struct {
 	crc32        hash.Hash
 	writePathMtx sync.Mutex

-	/// Reader.
+	// Reader.
 	// The int key in the map is the file number on the disk.
 	mmappedChunkFiles map[int]*mmappedChunkFile // Contains the m-mapped files for each chunk file mapped with its index.
 	closers           map[int]io.Closer         // Closers for resources behind the byte slices.
--- a/tsdb/compact_test.go
+++ b/tsdb/compact_test.go
@ -2018,7 +2018,7 @@ func TestDelayedCompaction(t *testing.T) {
 				// This implies that the compaction delay doesn't block or wait on the initial trigger.
 				// 3 is an arbitrary value because it's difficult to determine the precise value.
 				require.GreaterOrEqual(t, prom_testutil.ToFloat64(db.metrics.compactionsTriggered)-prom_testutil.ToFloat64(db.metrics.compactionsSkipped), 3.0)
-				// The delay doesn't change the head blocks alignement.
+				// The delay doesn't change the head blocks alignment.
 				require.Eventually(t, func() bool {
 					return db.head.MinTime() == db.compactor.(*LeveledCompactor).ranges[0]+1
 				}, 500*time.Millisecond, 10*time.Millisecond)
--- a/tsdb/db.go
+++ b/tsdb/db.go
@ -49,7 +49,7 @@ import (
 )

 const (
-	// Default duration of a block in milliseconds.
+	// DefaultBlockDuration in milliseconds.
 	DefaultBlockDuration = int64(2 * time.Hour / time.Millisecond)

 	// Block dir suffixes to make deletion and creation operations atomic.
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@ -5036,16 +5036,21 @@ func testOOOQueryAfterRestartWithSnapshotAndRemovedWBL(t *testing.T, scenario sa

 func Test_Querier_OOOQuery(t *testing.T) {
 	opts := DefaultOptions()
-	opts.OutOfOrderCapMax = 30
 	opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds()

 	series1 := labels.FromStrings("foo", "bar1")

+	type filterFunc func(t int64) bool
+	defaultFilterFunc := func(t int64) bool { return true }
+
 	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
-	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample) ([]chunks.Sample, int) {
+	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc) ([]chunks.Sample, int) {
 		app := db.Appender(context.Background())
 		totalAppended := 0
 		for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() {
+			if !filter(m / time.Minute.Milliseconds()) {
+				continue
+			}
 			_, err := app.Append(0, series1, m, float64(m))
 			if m >= queryMinT && m <= queryMaxT {
 				expSamples = append(expSamples, sample{t: m, f: float64(m)})
@ -5054,39 +5059,158 @@ func Test_Querier_OOOQuery(t *testing.T) {
 			totalAppended++
 		}
 		require.NoError(t, app.Commit())
+		require.Positive(t, totalAppended, 0) // Sanity check that filter is not too zealous.
 		return expSamples, totalAppended
 	}

+	type sampleBatch struct {
+		minT   int64
+		maxT   int64
+		filter filterFunc
+		isOOO  bool
+	}
+
 	tests := []struct {
 		name      string
+		oooCap    int64
 		queryMinT int64
 		queryMaxT int64
-		inOrderMinT int64
-		inOrderMaxT int64
-		oooMinT     int64
-		oooMaxT     int64
+		batches   []sampleBatch
 	}{
 		{
 			name:      "query interval covering ooomint and inordermaxt returns all ingested samples",
+			oooCap:    30,
 			queryMinT: minutes(0),
 			queryMaxT: minutes(200),
-			inOrderMinT: minutes(100),
-			inOrderMaxT: minutes(200),
-			oooMinT:     minutes(0),
-			oooMaxT:     minutes(99),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: defaultFilterFunc,
+				},
+				{
+					minT:   minutes(0),
+					maxT:   minutes(99),
+					filter: defaultFilterFunc,
+					isOOO:  true,
+				},
+			},
 		},
 		{
 			name:      "partial query interval returns only samples within interval",
+			oooCap:    30,
 			queryMinT: minutes(20),
 			queryMaxT: minutes(180),
-			inOrderMinT: minutes(100),
-			inOrderMaxT: minutes(200),
-			oooMinT:     minutes(0),
-			oooMaxT:     minutes(99),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: defaultFilterFunc,
+				},
+				{
+					minT:   minutes(0),
+					maxT:   minutes(99),
+					filter: defaultFilterFunc,
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query overlapping inorder and ooo samples returns all ingested samples at the end of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(170),
+					maxT:   minutes(180),
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query overlapping inorder and ooo in-memory samples returns all ingested samples at the beginning of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(100),
+					maxT:   minutes(110),
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval",
+			oooCap:    5,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(101),
+					maxT:   minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+				{
+					minT:   minutes(191),
+					maxT:   minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(101),
+					maxT:   minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+				{
+					minT:   minutes(191),
+					maxT:   minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
 		},
 	}
 	for _, tc := range tests {
 		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
+			opts.OutOfOrderCapMax = tc.oooCap
 			db := openTestDB(t, opts, nil)
 			db.DisableCompactions()
 			defer func() {
@ -5094,12 +5218,14 @@ func Test_Querier_OOOQuery(t *testing.T) {
 			}()

 			var expSamples []chunks.Sample
+			var oooSamples, appendedCount int

-			// Add in-order samples.
-			expSamples, _ = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
-
-			// Add out-of-order samples.
-			expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
+			for _, batch := range tc.batches {
+				expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter)
+				if batch.isOOO {
+					oooSamples += appendedCount
+				}
+			}

 			sort.Slice(expSamples, func(i, j int) bool {
 				return expSamples[i].T() < expSamples[j].T()
@ -5125,11 +5251,17 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) {

 	series1 := labels.FromStrings("foo", "bar1")

+	type filterFunc func(t int64) bool
+	defaultFilterFunc := func(t int64) bool { return true }
+
 	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
-	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample) ([]chunks.Sample, int) {
+	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []chunks.Sample, filter filterFunc) ([]chunks.Sample, int) {
 		app := db.Appender(context.Background())
 		totalAppended := 0
 		for m := fromMins; m <= toMins; m += time.Minute.Milliseconds() {
+			if !filter(m / time.Minute.Milliseconds()) {
+				continue
+			}
 			_, err := app.Append(0, series1, m, float64(m))
 			if m >= queryMinT && m <= queryMaxT {
 				expSamples = append(expSamples, sample{t: m, f: float64(m)})
@ -5138,39 +5270,158 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) {
 			totalAppended++
 		}
 		require.NoError(t, app.Commit())
+		require.Positive(t, totalAppended) // Sanity check that filter is not too zealous.
 		return expSamples, totalAppended
 	}

+	type sampleBatch struct {
+		minT   int64
+		maxT   int64
+		filter filterFunc
+		isOOO  bool
+	}
+
 	tests := []struct {
 		name      string
+		oooCap    int64
 		queryMinT int64
 		queryMaxT int64
-		inOrderMinT int64
-		inOrderMaxT int64
-		oooMinT     int64
-		oooMaxT     int64
+		batches   []sampleBatch
 	}{
 		{
 			name:      "query interval covering ooomint and inordermaxt returns all ingested samples",
+			oooCap:    30,
 			queryMinT: minutes(0),
 			queryMaxT: minutes(200),
-			inOrderMinT: minutes(100),
-			inOrderMaxT: minutes(200),
-			oooMinT:     minutes(0),
-			oooMaxT:     minutes(99),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: defaultFilterFunc,
+				},
+				{
+					minT:   minutes(0),
+					maxT:   minutes(99),
+					filter: defaultFilterFunc,
+					isOOO:  true,
+				},
+			},
 		},
 		{
 			name:      "partial query interval returns only samples within interval",
+			oooCap:    30,
 			queryMinT: minutes(20),
 			queryMaxT: minutes(180),
-			inOrderMinT: minutes(100),
-			inOrderMaxT: minutes(200),
-			oooMinT:     minutes(0),
-			oooMaxT:     minutes(99),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: defaultFilterFunc,
+				},
+				{
+					minT:   minutes(0),
+					maxT:   minutes(99),
+					filter: defaultFilterFunc,
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query overlapping inorder and ooo samples returns all ingested samples at the end of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(170),
+					maxT:   minutes(180),
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query overlapping inorder and ooo in-memory samples returns all ingested samples at the beginning of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(100),
+					maxT:   minutes(110),
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query inorder contain ooo mmaped samples returns all ingested samples at the beginning of the interval",
+			oooCap:    5,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(101),
+					maxT:   minutes(101 + (5-1)*2), // Append samples to fit in a single mmmaped OOO chunk and fit inside the first in-order mmaped chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+				{
+					minT:   minutes(191),
+					maxT:   minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
+		},
+		{
+			name:      "query overlapping inorder and ooo mmaped samples returns all ingested samples at the beginning of the interval",
+			oooCap:    30,
+			queryMinT: minutes(0),
+			queryMaxT: minutes(200),
+			batches: []sampleBatch{
+				{
+					minT:   minutes(100),
+					maxT:   minutes(200),
+					filter: func(t int64) bool { return t%2 == 0 },
+					isOOO:  false,
+				},
+				{
+					minT:   minutes(101),
+					maxT:   minutes(101 + (30-1)*2), // Append samples to fit in a single mmmaped OOO chunk and overlap the first in-order mmaped chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+				{
+					minT:   minutes(191),
+					maxT:   minutes(193), // Append some more OOO samples to trigger mapping the OOO chunk, but use time 151 to not overlap with in-order head chunk.
+					filter: func(t int64) bool { return t%2 == 1 },
+					isOOO:  true,
+				},
+			},
 		},
 	}
 	for _, tc := range tests {
 		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
+			opts.OutOfOrderCapMax = tc.oooCap
 			db := openTestDB(t, opts, nil)
 			db.DisableCompactions()
 			defer func() {
@ -5178,12 +5429,14 @@ func Test_ChunkQuerier_OOOQuery(t *testing.T) {
 			}()

 			var expSamples []chunks.Sample
+			var oooSamples, appendedCount int

-			// Add in-order samples.
-			expSamples, _ = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
-
-			// Add out-of-order samples.
-			expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
+			for _, batch := range tc.batches {
+				expSamples, appendedCount = addSample(db, batch.minT, batch.maxT, tc.queryMinT, tc.queryMaxT, expSamples, batch.filter)
+				if batch.isOOO {
+					oooSamples += appendedCount
+				}
+			}

 			sort.Slice(expSamples, func(i, j int) bool {
 				return expSamples[i].T() < expSamples[j].T()
--- a/tsdb/encoding/encoding.go
+++ b/tsdb/encoding/encoding.go
@ -201,8 +201,8 @@ func (d *Decbuf) UvarintStr() string {
 	return string(d.UvarintBytes())
 }

-// The return value becomes invalid if the byte slice goes away.
-// Compared to UvarintStr, this avoid allocations.
+// UvarintBytes returns invalid values if the byte slice goes away.
+// Compared to UvarintStr, it avoid allocations.
 func (d *Decbuf) UvarintBytes() []byte {
 	l := d.Uvarint64()
 	if d.E != nil {
--- a/tsdb/head.go
+++ b/tsdb/head.go
@ -178,7 +178,6 @@ type HeadOptions struct {
 	WALReplayConcurrency int

 	// EnableSharding enables ShardedPostings() support in the Head.
-	// EnableSharding is temporarily disabled during Init().
 	EnableSharding bool
 }

@ -610,7 +609,7 @@ const cardinalityCacheExpirationTime = time.Duration(30) * time.Second
 // Init loads data from the write ahead log and prepares the head for writes.
 // It should be called before using an appender so that it
 // limits the ingested samples to the head min valid time.
-func (h *Head) Init(minValidTime int64) (err error) {
+func (h *Head) Init(minValidTime int64) error {
 	h.minValidTime.Store(minValidTime)
 	defer func() {
 		h.postings.EnsureOrder(h.opts.WALReplayConcurrency)
@ -624,24 +623,6 @@ func (h *Head) Init(minValidTime int64) (err error) {
 		}
 	}()

-	// If sharding is enabled, disable it while initializing, and calculate the shards later.
-	// We're going to use that field for other purposes during WAL replay,
-	// so we don't want to waste time on calculating the shard that we're going to lose anyway.
-	if h.opts.EnableSharding {
-		h.opts.EnableSharding = false
-		defer func() {
-			h.opts.EnableSharding = true
-			if err == nil {
-				// No locking is needed here as nobody should be writing while we're in Init.
-				for _, stripe := range h.series.series {
-					for _, s := range stripe {
-						s.shardHashOrMemoryMappedMaxTime = labels.StableHash(s.lset)
-					}
-				}
-			}
-		}()
-	}
-
 	level.Info(h.logger).Log("msg", "Replaying on-disk memory mappable chunks if any")
 	start := time.Now()

@ -702,6 +683,7 @@ func (h *Head) Init(minValidTime int64) (err error) {
 		mmappedChunks    map[chunks.HeadSeriesRef][]*mmappedChunk
 		oooMmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk
 		lastMmapRef      chunks.ChunkDiskMapperRef
+		err              error

 		mmapChunkReplayDuration time.Duration
 	)
@ -2086,11 +2068,9 @@ type memSeries struct {
 	ref  chunks.HeadSeriesRef
 	meta *metadata.Metadata

-	// Series labels hash to use for sharding purposes.
-	// The value is always 0 when sharding has not been explicitly enabled in TSDB.
-	// While the WAL replay the value stored here is the max time of any mmapped chunk,
-	// and the shard hash is re-calculated after WAL replay is complete.
-	shardHashOrMemoryMappedMaxTime uint64
+	// Series labels hash to use for sharding purposes. The value is always 0 when sharding has not
+	// been explicitly enabled in TSDB.
+	shardHash uint64

 	// Everything after here should only be accessed with the lock held.
 	sync.Mutex
@ -2115,6 +2095,8 @@ type memSeries struct {

 	ooo *memSeriesOOOFields

+	mmMaxTime int64 // Max time of any mmapped chunk, only used during WAL replay.
+
 	nextAt                           int64 // Timestamp at which to cut the next chunk.
 	histogramChunkHasComputedEndTime bool  // True if nextAt has been predicted for the current histograms chunk; false otherwise.
 	pendingCommit                    bool  // Whether there are samples waiting to be committed to this series.
@ -2148,7 +2130,7 @@ func newMemSeries(lset labels.Labels, id chunks.HeadSeriesRef, shardHash uint64,
 		lset:      lset,
 		ref:       id,
 		nextAt:    math.MinInt64,
-		shardHashOrMemoryMappedMaxTime: shardHash,
+		shardHash: shardHash,
 	}
 	if !isolationDisabled {
 		s.txs = newTxRing(0)
@ -2236,12 +2218,6 @@ func (s *memSeries) truncateChunksBefore(mint int64, minOOOMmapRef chunks.ChunkD
 	return removedInOrder + removedOOO
 }

-// shardHash returns the shard hash of the series, only available after WAL replay.
-func (s *memSeries) shardHash() uint64 { return s.shardHashOrMemoryMappedMaxTime }
-
-// mmMaxTime returns the max time of any mmapped chunk in the series, only available during WAL replay.
-func (s *memSeries) mmMaxTime() int64 { return int64(s.shardHashOrMemoryMappedMaxTime) }
-
 // cleanupAppendIDsBelow cleans up older appendIDs. Has to be called after
 // acquiring lock.
 func (s *memSeries) cleanupAppendIDsBelow(bound uint64) {
--- a/tsdb/head_other.go
+++ b/tsdb/head_other.go
@ -26,7 +26,7 @@ func (s *memSeries) labels() labels.Labels {
 	return s.lset
 }

-// No-op when not using dedupelabels.
+// RebuildSymbolTable is a no-op when not using dedupelabels.
 func (h *Head) RebuildSymbolTable(logger log.Logger) *labels.SymbolTable {
 	return nil
 }
--- a/tsdb/head_read.go
+++ b/tsdb/head_read.go
@ -170,7 +170,7 @@ func (h *headIndexReader) ShardedPostings(p index.Postings, shardIndex, shardCou
 		}

 		// Check if the series belong to the shard.
-		if s.shardHash()%shardCount != shardIndex {
+		if s.shardHash%shardCount != shardIndex {
 			continue
 		}

@ -366,7 +366,7 @@ func (h *headChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Ch
 // If copyLastChunk is true, then it makes a copy of the head chunk if asked for it.
 // Also returns max time of the chunk.
 func (h *headChunkReader) chunk(meta chunks.Meta, copyLastChunk bool) (chunkenc.Chunk, int64, error) {
-	sid, cid := chunks.HeadChunkRef(meta.Ref).Unpack()
+	sid, cid, isOOO := unpackHeadChunkRef(meta.Ref)

 	s := h.head.series.getByID(sid)
 	// This means that the series has been garbage collected.
@ -376,12 +376,21 @@ func (h *headChunkReader) chunk(meta chunks.Meta, copyLastChunk bool) (chunkenc.

 	s.Lock()
 	defer s.Unlock()
-	return h.chunkFromSeries(s, cid, copyLastChunk)
+	return h.head.chunkFromSeries(s, cid, isOOO, h.mint, h.maxt, h.isoState, copyLastChunk)
+}
+
+// Dumb thing to defeat chunk pool.
+type wrapOOOHeadChunk struct {
+	chunkenc.Chunk
 }

 // Call with s locked.
-func (h *headChunkReader) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID, copyLastChunk bool) (chunkenc.Chunk, int64, error) {
-	c, headChunk, isOpen, err := s.chunk(cid, h.head.chunkDiskMapper, &h.head.memChunkPool)
+func (h *Head) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID, isOOO bool, mint, maxt int64, isoState *isolationState, copyLastChunk bool) (chunkenc.Chunk, int64, error) {
+	if isOOO {
+		chk, maxTime, err := s.oooChunk(cid, h.chunkDiskMapper, &h.memChunkPool)
+		return wrapOOOHeadChunk{chk}, maxTime, err
+	}
+	c, headChunk, isOpen, err := s.chunk(cid, h.chunkDiskMapper, &h.memChunkPool)
 	if err != nil {
 		return nil, 0, err
 	}
@ -390,12 +399,12 @@ func (h *headChunkReader) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID,
 			// Set this to nil so that Go GC can collect it after it has been used.
 			c.chunk = nil
 			c.prev = nil
-			h.head.memChunkPool.Put(c)
+			h.memChunkPool.Put(c)
 		}
 	}()

 	// This means that the chunk is outside the specified range.
-	if !c.OverlapsClosedInterval(h.mint, h.maxt) {
+	if !c.OverlapsClosedInterval(mint, maxt) {
 		return nil, 0, storage.ErrNotFound
 	}

@ -407,7 +416,7 @@ func (h *headChunkReader) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID,
 		newB := make([]byte, len(b))
 		copy(newB, b) // TODO(codesome): Use bytes.Clone() when we upgrade to Go 1.20.
 		// TODO(codesome): Put back in the pool (non-trivial).
-		chk, err = h.head.opts.ChunkPool.Get(s.headChunks.chunk.Encoding(), newB)
+		chk, err = h.opts.ChunkPool.Get(s.headChunks.chunk.Encoding(), newB)
 		if err != nil {
 			return nil, 0, err
 		}
@ -417,7 +426,7 @@ func (h *headChunkReader) chunkFromSeries(s *memSeries, cid chunks.HeadChunkID,
 		Chunk:    chk,
 		s:        s,
 		cid:      cid,
-		isoState: h.isoState,
+		isoState: isoState,
 	}, maxTime, nil
 }

@ -430,7 +439,7 @@ func (s *memSeries) chunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDi
 	// incremented by 1 when new chunk is created, hence (id - firstChunkID) gives the slice index.
 	// The max index for the s.mmappedChunks slice can be len(s.mmappedChunks)-1, hence if the ix
 	// is >= len(s.mmappedChunks), it represents one of the chunks on s.headChunks linked list.
-	// The order of elemens is different for slice and linked list.
+	// The order of elements is different for slice and linked list.
 	// For s.mmappedChunks slice newer chunks are appended to it.
 	// For s.headChunks list newer chunks are prepended to it.
 	//
@ -481,104 +490,19 @@ func (s *memSeries) chunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDi
 	return elem, true, offset == 0, nil
 }

-// mergedChunks return an iterable over one or more OOO chunks for the given
-// chunks.Meta reference from memory or by m-mapping it from the disk. The
-// returned iterable will be a merge of all the overlapping chunks, if any,
-// amongst all the chunks in the OOOHead.
-// If hr is non-nil then in-order chunks are included.
-// This function is not thread safe unless the caller holds a lock.
-// The caller must ensure that s.ooo is not nil.
-func (s *memSeries) mergedChunks(meta chunks.Meta, cdm *chunks.ChunkDiskMapper, hr *headChunkReader, mint, maxt int64, maxMmapRef chunks.ChunkDiskMapperRef) (chunkenc.Iterable, error) {
-	_, cid, _ := unpackHeadChunkRef(meta.Ref)
+// oooChunk returns the chunk for the HeadChunkID by m-mapping it from the disk.
+// It never returns the head OOO chunk.
+func (s *memSeries) oooChunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDiskMapper, memChunkPool *sync.Pool) (chunk chunkenc.Chunk, maxTime int64, err error) {
+	// ix represents the index of chunk in the s.ooo.oooMmappedChunks slice. The chunk id's are
+	// incremented by 1 when new chunk is created, hence (id - firstOOOChunkID) gives the slice index.
+	ix := int(id) - int(s.ooo.firstOOOChunkID)

-	// ix represents the index of chunk in the s.mmappedChunks slice. The chunk meta's are
-	// incremented by 1 when new chunk is created, hence (meta - firstChunkID) gives the slice index.
-	// The max index for the s.mmappedChunks slice can be len(s.mmappedChunks)-1, hence if the ix
-	// is len(s.mmappedChunks), it represents the next chunk, which is the head chunk.
-	ix := int(cid) - int(s.ooo.firstOOOChunkID)
-	if ix < 0 || ix > len(s.ooo.oooMmappedChunks) {
-		return nil, storage.ErrNotFound
+	if ix < 0 || ix >= len(s.ooo.oooMmappedChunks) {
+		return nil, 0, storage.ErrNotFound
 	}

-	if ix == len(s.ooo.oooMmappedChunks) {
-		if s.ooo.oooHeadChunk == nil {
-			return nil, errors.New("invalid ooo head chunk")
-		}
-	}
-
-	// We create a temporary slice of chunk metas to hold the information of all
-	// possible chunks that may overlap with the requested chunk.
-	tmpChks := make([]chunkMetaAndChunkDiskMapperRef, 0, len(s.ooo.oooMmappedChunks)+1)
-
-	for i, c := range s.ooo.oooMmappedChunks {
-		if maxMmapRef != 0 && c.ref > maxMmapRef {
-			break
-		}
-		if c.OverlapsClosedInterval(mint, maxt) {
-			tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{
-				meta: chunks.Meta{
-					MinTime: c.minTime,
-					MaxTime: c.maxTime,
-					Ref:     chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(i))),
-				},
-				ref: c.ref,
-			})
-		}
-	}
-	// Add in data copied from the head OOO chunk.
-	if meta.Chunk != nil {
-		tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{meta: meta})
-	}
-
-	if hr != nil { // Include in-order chunks.
-		metas := appendSeriesChunks(s, max(meta.MinTime, mint), min(meta.MaxTime, maxt), nil)
-		for _, m := range metas {
-			tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{
-				meta: m,
-				ref:  0, // This tells the loop below it's an in-order head chunk.
-			})
-		}
-	}
-
-	// Next we want to sort all the collected chunks by min time so we can find
-	// those that overlap and stop when we know the rest don't.
-	slices.SortFunc(tmpChks, refLessByMinTimeAndMinRef)
-
-	mc := &mergedOOOChunks{}
-	absoluteMax := int64(math.MinInt64)
-	for _, c := range tmpChks {
-		if c.meta.Ref != meta.Ref && (len(mc.chunkIterables) == 0 || c.meta.MinTime > absoluteMax) {
-			continue
-		}
-		var iterable chunkenc.Iterable
-		switch {
-		case c.meta.Chunk != nil:
-			iterable = c.meta.Chunk
-		case c.ref == 0: // This is an in-order head chunk.
-			_, cid := chunks.HeadChunkRef(c.meta.Ref).Unpack()
-			var err error
-			iterable, _, err = hr.chunkFromSeries(s, cid, false)
-			if err != nil {
-				return nil, fmt.Errorf("invalid head chunk: %w", err)
-			}
-		default:
-			chk, err := cdm.Chunk(c.ref)
-			if err != nil {
-				var cerr *chunks.CorruptionErr
-				if errors.As(err, &cerr) {
-					return nil, fmt.Errorf("invalid ooo mmapped chunk: %w", err)
-				}
-				return nil, err
-			}
-			iterable = chk
-		}
-		mc.chunkIterables = append(mc.chunkIterables, iterable)
-		if c.meta.MaxTime > absoluteMax {
-			absoluteMax = c.meta.MaxTime
-		}
-	}
-
-	return mc, nil
+	chk, err := chunkDiskMapper.Chunk(s.ooo.oooMmappedChunks[ix].ref)
+	return chk, s.ooo.oooMmappedChunks[ix].maxTime, err
 }

 // safeHeadChunk makes sure that the chunk can be accessed without a race condition.
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@ -23,7 +23,6 @@ import (
 	"path"
 	"path/filepath"
 	"reflect"
-	"runtime/pprof"
 	"sort"
 	"strconv"
 	"strings"
@ -90,43 +89,6 @@ func newTestHeadWithOptions(t testing.TB, compressWAL wlog.CompressionType, opts
 	return h, wal
 }

-// BenchmarkLoadRealWLs will be skipped unless the BENCHMARK_LOAD_REAL_WLS_DIR environment variable is set.
-// BENCHMARK_LOAD_REAL_WLS_DIR should be the folder where `wal` and `chunks_head` are located.
-// Optionally, BENCHMARK_LOAD_REAL_WLS_PROFILE can be set to a file path to write a CPU profile.
-func BenchmarkLoadRealWLs(b *testing.B) {
-	dir := os.Getenv("BENCHMARK_LOAD_REAL_WLS_DIR")
-	if dir == "" {
-		b.Skipped()
-	}
-
-	profileFile := os.Getenv("BENCHMARK_LOAD_REAL_WLS_PROFILE")
-	if profileFile != "" {
-		b.Logf("Will profile in %s", profileFile)
-		f, err := os.Create(profileFile)
-		require.NoError(b, err)
-		b.Cleanup(func() { f.Close() })
-		require.NoError(b, pprof.StartCPUProfile(f))
-		b.Cleanup(pprof.StopCPUProfile)
-	}
-
-	wal, err := wlog.New(nil, nil, filepath.Join(dir, "wal"), wlog.CompressionNone)
-	require.NoError(b, err)
-	b.Cleanup(func() { wal.Close() })
-
-	wbl, err := wlog.New(nil, nil, filepath.Join(dir, "wbl"), wlog.CompressionNone)
-	require.NoError(b, err)
-	b.Cleanup(func() { wbl.Close() })
-
-	// Load the WAL.
-	for i := 0; i < b.N; i++ {
-		opts := DefaultHeadOptions()
-		opts.ChunkDirRoot = dir
-		h, err := NewHead(nil, nil, wal, wbl, opts, nil)
-		require.NoError(b, err)
-		h.Init(0)
-	}
-}
-
 func BenchmarkCreateSeries(b *testing.B) {
 	series := genSeries(b.N, 10, 0, 0)
 	h, _ := newTestHead(b, 10000, wlog.CompressionNone, false)
--- a/tsdb/head_wal.go
+++ b/tsdb/head_wal.go
@ -435,8 +435,6 @@ Outer:
 	return nil
 }

-func minInt64() int64 { return math.MinInt64 }
-
 // resetSeriesWithMMappedChunks is only used during the WAL replay.
 func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc, oooMmc []*mmappedChunk, walSeriesRef chunks.HeadSeriesRef) (overlapped bool) {
 	if mSeries.ref != walSeriesRef {
@ -483,11 +481,10 @@ func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc, oooMmc []*m
 	}
 	// Cache the last mmapped chunk time, so we can skip calling append() for samples it will reject.
 	if len(mmc) == 0 {
-		mSeries.shardHashOrMemoryMappedMaxTime = uint64(minInt64())
+		mSeries.mmMaxTime = math.MinInt64
 	} else {
-		mmMaxTime := mmc[len(mmc)-1].maxTime
-		mSeries.shardHashOrMemoryMappedMaxTime = uint64(mmMaxTime)
-		h.updateMinMaxTime(mmc[0].minTime, mmMaxTime)
+		mSeries.mmMaxTime = mmc[len(mmc)-1].maxTime
+		h.updateMinMaxTime(mmc[0].minTime, mSeries.mmMaxTime)
 	}
 	if len(oooMmc) != 0 {
 		// Mint and maxt can be in any chunk, they are not sorted.
@ -588,7 +585,7 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp
 				unknownRefs++
 				continue
 			}
-			if s.T <= ms.mmMaxTime() {
+			if s.T <= ms.mmMaxTime {
 				continue
 			}
 			if _, chunkCreated := ms.append(s.T, s.V, 0, appendChunkOpts); chunkCreated {
@ -617,7 +614,7 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmapp
 				unknownHistogramRefs++
 				continue
 			}
-			if s.t <= ms.mmMaxTime() {
+			if s.t <= ms.mmMaxTime {
 				continue
 			}
 			var chunkCreated bool
--- a/tsdb/index/index.go
+++ b/tsdb/index/index.go
@ -196,8 +196,9 @@ func NewTOCFromByteSlice(bs ByteSlice) (*TOC, error) {
 	return toc, d.Err()
 }

-// NewWriter returns a new Writer to the given filename. It serializes data in format version 2.
-// It uses the given encoder to encode each postings list.
+// NewWriterWithEncoder returns a new Writer to the given filename. It
+// serializes data in format version 2. It uses the given encoder to encode each
+// postings list.
 func NewWriterWithEncoder(ctx context.Context, fn string, encoder PostingsEncoder) (*Writer, error) {
 	dir := filepath.Dir(fn)

--- a/tsdb/ooo_head_read.go
+++ b/tsdb/ooo_head_read.go
@ -16,6 +16,7 @@ package tsdb
 import (
 	"context"
 	"errors"
+	"fmt"
 	"math"
 	"slices"

@ -139,33 +140,39 @@ func getOOOSeriesChunks(s *memSeries, mint, maxt int64, lastGarbageCollectedMmap
 	// those that overlap.
 	slices.SortFunc(tmpChks, lessByMinTimeAndMinRef)

-	// Next we want to iterate the sorted collected chunks and only return the
-	// chunks Meta the first chunk that overlaps with others.
+	// Next we want to iterate the sorted collected chunks and return composites for chunks that overlap with others.
 	// Example chunks of a series: 5:(100, 200) 6:(500, 600) 7:(150, 250) 8:(550, 650)
-	// In the example 5 overlaps with 7 and 6 overlaps with 8 so we only want to
-	// return chunk Metas for chunk 5 and chunk 6e
-	*chks = append(*chks, tmpChks[0])
-	maxTime := tmpChks[0].MaxTime // Tracks the maxTime of the previous "to be merged chunk".
+	// In the example 5 overlaps with 7 and 6 overlaps with 8 so we will return
+	// [5,7], [6,8].
+	toBeMerged := tmpChks[0]
 	for _, c := range tmpChks[1:] {
-		switch {
-		case c.MinTime > maxTime:
-			*chks = append(*chks, c)
-			maxTime = c.MaxTime
-		case c.MaxTime > maxTime:
-			maxTime = c.MaxTime
-			(*chks)[len(*chks)-1].MaxTime = c.MaxTime
-			fallthrough
-		default:
-			// If the head OOO chunk is part of an output chunk, copy the chunk pointer.
-			if c.Chunk != nil {
-				(*chks)[len(*chks)-1].Chunk = c.Chunk
+		if c.MinTime > toBeMerged.MaxTime {
+			// This chunk doesn't overlap. Send current toBeMerged to output and start a new one.
+			*chks = append(*chks, toBeMerged)
+			toBeMerged = c
+		} else {
+			// Merge this chunk with existing toBeMerged.
+			if mm, ok := toBeMerged.Chunk.(*multiMeta); ok {
+				mm.metas = append(mm.metas, c)
+			} else {
+				toBeMerged.Chunk = &multiMeta{metas: []chunks.Meta{toBeMerged, c}}
+			}
+			if toBeMerged.MaxTime < c.MaxTime {
+				toBeMerged.MaxTime = c.MaxTime
 			}
 		}
 	}
+	*chks = append(*chks, toBeMerged)

 	return nil
 }

+// Fake Chunk object to pass a set of Metas inside Meta.Chunk.
+type multiMeta struct {
+	chunkenc.Chunk // We don't expect any of the methods to be called.
+	metas          []chunks.Meta
+}
+
 // LabelValues needs to be overridden from the headIndexReader implementation
 // so we can return labels within either in-order range or ooo range.
 func (oh *HeadAndOOOIndexReader) LabelValues(ctx context.Context, name string, matchers ...*labels.Matcher) ([]string, error) {
@ -180,29 +187,6 @@ func (oh *HeadAndOOOIndexReader) LabelValues(ctx context.Context, name string, m
 	return labelValuesWithMatchers(ctx, oh, name, matchers...)
 }

-type chunkMetaAndChunkDiskMapperRef struct {
-	meta chunks.Meta
-	ref  chunks.ChunkDiskMapperRef
-}
-
-func refLessByMinTimeAndMinRef(a, b chunkMetaAndChunkDiskMapperRef) int {
-	switch {
-	case a.meta.MinTime < b.meta.MinTime:
-		return -1
-	case a.meta.MinTime > b.meta.MinTime:
-		return 1
-	}
-
-	switch {
-	case a.meta.Ref < b.meta.Ref:
-		return -1
-	case a.meta.Ref > b.meta.Ref:
-		return 1
-	default:
-		return 0
-	}
-}
-
 func lessByMinTimeAndMinRef(a, b chunks.Meta) int {
 	switch {
 	case a.MinTime < b.MinTime:
@ -241,33 +225,55 @@ func NewHeadAndOOOChunkReader(head *Head, mint, maxt int64, cr *headChunkReader,
 }

 func (cr *HeadAndOOOChunkReader) ChunkOrIterable(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, error) {
-	sid, _, isOOO := unpackHeadChunkRef(meta.Ref)
-	if !isOOO {
-		return cr.cr.ChunkOrIterable(meta)
+	c, it, _, err := cr.chunkOrIterable(meta, false)
+	return c, it, err
 }

+// ChunkOrIterableWithCopy implements ChunkReaderWithCopy. The special Copy
+// behaviour is only implemented for the in-order head chunk.
+func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) {
+	return cr.chunkOrIterable(meta, true)
+}
+
+func (cr *HeadAndOOOChunkReader) chunkOrIterable(meta chunks.Meta, copyLastChunk bool) (chunkenc.Chunk, chunkenc.Iterable, int64, error) {
+	sid, cid, isOOO := unpackHeadChunkRef(meta.Ref)
 	s := cr.head.series.getByID(sid)
 	// This means that the series has been garbage collected.
 	if s == nil {
-		return nil, nil, storage.ErrNotFound
+		return nil, nil, 0, storage.ErrNotFound
+	}
+	var isoState *isolationState
+	if cr.cr != nil {
+		isoState = cr.cr.isoState
 	}

 	s.Lock()
-	mc, err := s.mergedChunks(meta, cr.head.chunkDiskMapper, cr.cr, cr.mint, cr.maxt, cr.maxMmapRef)
-	s.Unlock()
+	defer s.Unlock()

-	return nil, mc, err
+	if meta.Chunk == nil {
+		c, maxt, err := cr.head.chunkFromSeries(s, cid, isOOO, meta.MinTime, meta.MaxTime, isoState, copyLastChunk)
+		return c, nil, maxt, err
 	}
-
-// ChunkOrIterableWithCopy: implements ChunkReaderWithCopy. The special Copy behaviour
-// is only implemented for the in-order head chunk.
-func (cr *HeadAndOOOChunkReader) ChunkOrIterableWithCopy(meta chunks.Meta) (chunkenc.Chunk, chunkenc.Iterable, int64, error) {
-	_, _, isOOO := unpackHeadChunkRef(meta.Ref)
-	if !isOOO {
-		return cr.cr.ChunkOrIterableWithCopy(meta)
+	mm, ok := meta.Chunk.(*multiMeta)
+	if !ok { // Complete chunk was supplied.
+		return meta.Chunk, nil, meta.MaxTime, nil
 	}
-	chk, iter, err := cr.ChunkOrIterable(meta)
-	return chk, iter, 0, err
+	// We have a composite meta: construct a composite iterable.
+	mc := &mergedOOOChunks{}
+	for _, m := range mm.metas {
+		switch {
+		case m.Chunk != nil:
+			mc.chunkIterables = append(mc.chunkIterables, m.Chunk)
+		default:
+			_, cid, isOOO := unpackHeadChunkRef(m.Ref)
+			iterable, _, err := cr.head.chunkFromSeries(s, cid, isOOO, m.MinTime, m.MaxTime, isoState, copyLastChunk)
+			if err != nil {
+				return nil, nil, 0, fmt.Errorf("invalid head chunk: %w", err)
+			}
+			mc.chunkIterables = append(mc.chunkIterables, iterable)
+		}
+	}
+	return nil, mc, meta.MaxTime, nil
 }

 func (cr *HeadAndOOOChunkReader) Close() error {
--- a/tsdb/ooo_head_read_test.go
+++ b/tsdb/ooo_head_read_test.go
@ -39,6 +39,11 @@ type chunkInterval struct {
 	maxt int64
 }

+type expChunk struct {
+	c chunkInterval
+	m []chunkInterval
+}
+
 // permutateChunkIntervals returns all possible orders of the given chunkIntervals.
 func permutateChunkIntervals(in []chunkInterval, out [][]chunkInterval, left, right int) [][]chunkInterval {
 	if left == right {
@ -65,7 +70,7 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 		queryMinT           int64
 		queryMaxT           int64
 		inputChunkIntervals []chunkInterval
-		expChunks           []chunkInterval
+		expChunks           []expChunk
 	}{
 		{
 			name:      "Empty result and no error when head is empty",
@ -107,8 +112,8 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700
 			// Query Interval                [-----------------------------------------------------------]
 			// Chunk 0:                                 [---------------------------------------]
-			expChunks: []chunkInterval{
-				{0, 150, 350},
+			expChunks: []expChunk{
+				{c: chunkInterval{0, 150, 350}},
 			},
 		},
 		{
@ -121,8 +126,8 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700
 			// Query Interval:                          [---------------------------------------]
 			// Chunk 0:                       [-----------------------------------------------------------]
-			expChunks: []chunkInterval{
-				{0, 100, 400},
+			expChunks: []expChunk{
+				{c: chunkInterval{0, 100, 400}},
 			},
 		},
 		{
@ -142,9 +147,9 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 			// Chunk 2:                                  [-------------------]
 			// Chunk 3:                                                                                                                  [-------------------]
 			// Output Graphically              [-----------------------------]                                                 [-----------------------------]
-			expChunks: []chunkInterval{
-				{0, 100, 250},
-				{1, 500, 650},
+			expChunks: []expChunk{
+				{c: chunkInterval{0, 100, 250}, m: []chunkInterval{{0, 100, 200}, {2, 150, 250}}},
+				{c: chunkInterval{1, 500, 650}, m: []chunkInterval{{1, 500, 600}, {3, 550, 650}}},
 			},
 		},
 		{
@ -164,8 +169,8 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 			// Chunk 2:                                                                [-------------------]
 			// Chunk 3:                                                                                    [------------------]
 			// Output Graphically              [------------------------------------------------------------------------------]
-			expChunks: []chunkInterval{
-				{0, 100, 500},
+			expChunks: []expChunk{
+				{c: chunkInterval{0, 100, 500}, m: []chunkInterval{{0, 100, 200}, {1, 200, 300}, {2, 300, 400}, {3, 400, 500}}},
 			},
 		},
 		{
@ -185,11 +190,11 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 			// Chunk 2:                                                                [------------------]
 			// Chunk 3:                                                                                    [------------------]
 			// Output Graphically              [------------------][------------------][------------------][------------------]
-			expChunks: []chunkInterval{
-				{0, 100, 199},
-				{1, 200, 299},
-				{2, 300, 399},
-				{3, 400, 499},
+			expChunks: []expChunk{
+				{c: chunkInterval{0, 100, 199}},
+				{c: chunkInterval{1, 200, 299}},
+				{c: chunkInterval{2, 300, 399}},
+				{c: chunkInterval{3, 400, 499}},
 			},
 		},
 		{
@ -209,8 +214,8 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 			// Chunk 2:                                                     [------------------]
 			// Chunk 3:                                                                                             [------------------]
 			// Output Graphically              [-----------------------------------------------]
-			expChunks: []chunkInterval{
-				{0, 100, 350},
+			expChunks: []expChunk{
+				{c: chunkInterval{0, 100, 350}, m: []chunkInterval{{0, 100, 200}, {1, 150, 300}, {2, 250, 350}}},
 			},
 		},
 		{
@ -228,8 +233,8 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 			// Chunk 1:             [-----------------------------]
 			// Chunk 2:                                [------------------------------]
 			// Output Graphically   [-----------------------------------------------------------------------------------------]
-			expChunks: []chunkInterval{
-				{1, 0, 500},
+			expChunks: []expChunk{
+				{c: chunkInterval{1, 0, 500}, m: []chunkInterval{{1, 0, 200}, {2, 150, 300}, {0, 250, 500}}},
 			},
 		},
 		{
@ -251,9 +256,9 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 			// Chunk 3:                                                                                                                                      [-------------------]
 			// Chunk 4:                                                                                                                             [---------------------------------------]
 			// Output Graphically              [---------------------------------------]                                                            [------------------------------------------------]
-			expChunks: []chunkInterval{
-				{0, 100, 300},
-				{4, 600, 850},
+			expChunks: []expChunk{
+				{c: chunkInterval{0, 100, 300}, m: []chunkInterval{{0, 100, 300}, {2, 150, 250}}},
+				{c: chunkInterval{4, 600, 850}, m: []chunkInterval{{4, 600, 800}, {3, 650, 750}, {1, 770, 850}}},
 			},
 		},
 		{
@ -271,10 +276,10 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 			// Chunk 1:                                                              [----------]
 			// Chunk 2:                                           [--------]
 			// Output Graphically              [-------]          [--------]         [----------]
-			expChunks: []chunkInterval{
-				{0, 100, 150},
-				{1, 300, 350},
-				{2, 200, 250},
+			expChunks: []expChunk{
+				{c: chunkInterval{0, 100, 150}},
+				{c: chunkInterval{2, 200, 250}},
+				{c: chunkInterval{1, 300, 350}},
 			},
 		},
 	}
@ -305,24 +310,38 @@ func TestOOOHeadIndexReader_Series(t *testing.T) {
 					s1.ooo = &memSeriesOOOFields{}

 					// define our expected chunks, by looking at the expected ChunkIntervals and setting...
+					// Ref to whatever Ref the chunk has, that we refer to by ID
+					findID := func(id int) chunks.ChunkRef {
+						for ref, c := range intervals {
+							if c.ID == id {
+								return chunks.ChunkRef(chunks.NewHeadChunkRef(chunks.HeadSeriesRef(s1ID), s1.oooHeadChunkID(ref)))
+							}
+						}
+						return 0
+					}
 					var expChunks []chunks.Meta
 					for _, e := range tc.expChunks {
+						var chunk chunkenc.Chunk
+						if len(e.m) > 0 {
+							mm := &multiMeta{}
+							for _, x := range e.m {
 								meta := chunks.Meta{
-							Chunk:   chunkenc.Chunk(nil),
-							MinTime: e.mint,
-							MaxTime: e.maxt,
+									MinTime: x.mint,
+									MaxTime: x.maxt,
+									Ref:     findID(x.ID),
 								}
-
-						// Ref to whatever Ref the chunk has, that we refer to by ID
-						for ref, c := range intervals {
-							if c.ID == e.ID {
-								meta.Ref = chunks.ChunkRef(chunks.NewHeadChunkRef(chunks.HeadSeriesRef(s1ID), s1.oooHeadChunkID(ref)))
-								break
+								mm.metas = append(mm.metas, meta)
 							}
+							chunk = mm
+						}
+						meta := chunks.Meta{
+							Chunk:   chunk,
+							MinTime: e.c.mint,
+							MaxTime: e.c.maxt,
+							Ref:     findID(e.c.ID),
 						}
 						expChunks = append(expChunks, meta)
 					}
-					slices.SortFunc(expChunks, lessByMinTimeAndMinRef) // We always want the chunks to come back sorted by minTime asc.

 					if headChunk && len(intervals) > 0 {
 						// Put the last interval in the head chunk
@ -497,6 +516,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) {
 		queryMaxT            int64
 		firstInOrderSampleAt int64
 		inputSamples         []testValue
+		expSingleChunks      bool
 		expChunkError        bool
 		expChunksSamples     []chunks.SampleSlice
 	}{
@ -510,6 +530,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) {
 				{Ts: minutes(40), V: 0},
 			},
 			expChunkError:   false,
+			expSingleChunks: true,
 			// ts (in minutes)         0       10       20       30       40       50       60       70       80       90       100
 			// Query Interval          [------------------------------------------------------------------------------------------]
 			// Chunk 0: Current Head                              [--------] (With 2 samples)
@ -690,6 +711,7 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) {
 				{Ts: minutes(42), V: 3},
 			},
 			expChunkError:   false,
+			expSingleChunks: true,
 			// ts (in minutes)         0       10       20       30       40       50       60       70       80       90       100
 			// Query Interval          [------------------------------------------------------------------------------------------]
 			// Chunk 0                          [-------]
@ -844,9 +866,13 @@ func testOOOHeadChunkReader_Chunk(t *testing.T, scenario sampleTypeScenario) {
 			for i := 0; i < len(chks); i++ {
 				c, iterable, err := cr.ChunkOrIterable(chks[i])
 				require.NoError(t, err)
+				var it chunkenc.Iterator
+				if tc.expSingleChunks {
+					it = c.Iterator(nil)
+				} else {
 					require.Nil(t, c)
-
-				it := iterable.Iterator(nil)
+					it = iterable.Iterator(nil)
+				}
 				resultSamples, err := storage.ExpandSamples(it, nil)
 				require.NoError(t, err)
 				requireEqualSamples(t, s1.String(), tc.expChunksSamples[i], resultSamples, true)
@ -1029,94 +1055,6 @@ func testOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding(
 	}
 }

-// TestSortByMinTimeAndMinRef tests that the sort function for chunk metas does sort
-// by chunk meta MinTime and in case of same references by the lower reference.
-func TestSortByMinTimeAndMinRef(t *testing.T) {
-	tests := []struct {
-		name  string
-		input []chunkMetaAndChunkDiskMapperRef
-		exp   []chunkMetaAndChunkDiskMapperRef
-	}{
-		{
-			name: "chunks are ordered by min time",
-			input: []chunkMetaAndChunkDiskMapperRef{
-				{
-					meta: chunks.Meta{
-						Ref:     0,
-						MinTime: 0,
-					},
-					ref: chunks.ChunkDiskMapperRef(0),
-				},
-				{
-					meta: chunks.Meta{
-						Ref:     1,
-						MinTime: 1,
-					},
-					ref: chunks.ChunkDiskMapperRef(1),
-				},
-			},
-			exp: []chunkMetaAndChunkDiskMapperRef{
-				{
-					meta: chunks.Meta{
-						Ref:     0,
-						MinTime: 0,
-					},
-					ref: chunks.ChunkDiskMapperRef(0),
-				},
-				{
-					meta: chunks.Meta{
-						Ref:     1,
-						MinTime: 1,
-					},
-					ref: chunks.ChunkDiskMapperRef(1),
-				},
-			},
-		},
-		{
-			name: "if same mintime, lower reference goes first",
-			input: []chunkMetaAndChunkDiskMapperRef{
-				{
-					meta: chunks.Meta{
-						Ref:     10,
-						MinTime: 0,
-					},
-					ref: chunks.ChunkDiskMapperRef(0),
-				},
-				{
-					meta: chunks.Meta{
-						Ref:     5,
-						MinTime: 0,
-					},
-					ref: chunks.ChunkDiskMapperRef(1),
-				},
-			},
-			exp: []chunkMetaAndChunkDiskMapperRef{
-				{
-					meta: chunks.Meta{
-						Ref:     5,
-						MinTime: 0,
-					},
-					ref: chunks.ChunkDiskMapperRef(1),
-				},
-				{
-					meta: chunks.Meta{
-						Ref:     10,
-						MinTime: 0,
-					},
-					ref: chunks.ChunkDiskMapperRef(0),
-				},
-			},
-		},
-	}
-
-	for _, tc := range tests {
-		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
-			slices.SortFunc(tc.input, refLessByMinTimeAndMinRef)
-			require.Equal(t, tc.exp, tc.input)
-		})
-	}
-}
-
 // TestSortMetaByMinTimeAndMinRef tests that the sort function for chunk metas does sort
 // by chunk meta MinTime and in case of same references by the lower reference.
 func TestSortMetaByMinTimeAndMinRef(t *testing.T) {
--- a/tsdb/querier.go
+++ b/tsdb/querier.go
@ -972,7 +972,7 @@ func (p *populateWithDelChunkSeriesIterator) populateChunksFromIterable() bool {
 		// Check if the encoding has changed (i.e. we need to create a new
 		// chunk as chunks can't have multiple encoding types).
 		// For the first sample, the following condition will always be true as
-		// ValNoneNone != ValFloat | ValHistogram | ValFloatHistogram.
+		// ValNone != ValFloat | ValHistogram | ValFloatHistogram.
 		if currentValueType != prevValueType {
 			if prevValueType != chunkenc.ValNone {
 				p.chunksFromIterable = append(p.chunksFromIterable, chunks.Meta{Chunk: currentChunk, MinTime: cmint, MaxTime: cmaxt})
--- a/tsdb/wlog/watcher.go
+++ b/tsdb/wlog/watcher.go
@ -58,15 +58,16 @@ type WriteTo interface {
 	StoreSeries([]record.RefSeries, int)
 	StoreMetadata([]record.RefMetadata)

-	// Next two methods are intended for garbage-collection: first we call
-	// UpdateSeriesSegment on all current series
+	// UpdateSeriesSegment and SeriesReset are intended for
+	// garbage-collection:
+	// First we call UpdateSeriesSegment on all current series.
 	UpdateSeriesSegment([]record.RefSeries, int)
-	// Then SeriesReset is called to allow the deletion
-	// of all series created in a segment lower than the argument.
+	// Then SeriesReset is called to allow the deletion of all series
+	// created in a segment lower than the argument.
 	SeriesReset(int)
 }

-// Used to notify the watcher that data has been written so that it can read.
+// WriteNotified notifies the watcher that data has been written so that it can read.
 type WriteNotified interface {
 	Notify()
 }
@ -602,7 +603,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
 			}

 		case record.Metadata:
-			if !w.sendMetadata || !tail {
+			if !w.sendMetadata {
 				break
 			}
 			meta, err := dec.Metadata(rec, metadata[:0])
--- a/tsdb/wlog/wlog.go
+++ b/tsdb/wlog/wlog.go
@ -38,8 +38,8 @@ import (
 )

 const (
-	DefaultSegmentSize = 128 * 1024 * 1024 // 128 MB
-	pageSize           = 32 * 1024         // 32KB
+	DefaultSegmentSize = 128 * 1024 * 1024 // DefaultSegmentSize is 128 MB.
+	pageSize           = 32 * 1024         // pageSize is 32KB.
 	recordHeaderSize   = 7
 	WblDirName         = "wbl"
 )
--- a/Show more
+++ b/Show more