From c1b669bf9b0b1286ece53c977262089119783105 Mon Sep 17 00:00:00 2001
From: Jesus Vazquez <jesusvazquez@users.noreply.github.com>
Date: Tue, 20 Sep 2022 19:05:50 +0200
Subject: [PATCH] Add out-of-order sample support to the TSDB (#11075)

* Introduce out-of-order TSDB support

This implementation is based on this design doc:
https://docs.google.com/document/d/1Kppm7qL9C-BJB1j6yb6-9ObG3AbdZnFUBYPNNWwDBYM/edit?usp=sharing

This commit adds support to accept out-of-order ("OOO") sample into the TSDB
up to a configurable time allowance. If OOO is enabled, overlapping querying
are automatically enabled.

Most of the additions have been borrowed from
https://github.com/grafana/mimir-prometheus/
Here is the list ist of the original commits cherry picked
from mimir-prometheus into this branch:
- 4b2198d7ec47d50989b7c2df66b7b207c32f7f6e
- 2836e5513f1bc591535a859f5d41154a75e7c6bc
- 00b379c3a5b1ec3799699b6242f300a2b3ea30f0
- ff0dc757587cada63ca948d2d5eb00bf090d63e0
- a632c73352a7e39d60b445700beb47d691549c3e
- c6f3d4ab339ab80bbbce74c9946237ced01f0509
- 5e8406a1d4a50d0052bbee83e28ca3b3371408aa
- abde1e0ba128936b9eb0224ee1551e56216ebd4a
- e70e7698897bb03860bee0467c733fa44e14c9bd
- df59320886e03a555d379ac4b0b3130f661407e0

Co-authored-by: Jesus Vazquez <jesus.vazquez@grafana.com>
Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com>
Co-authored-by: Dieter Plaetinck <dieter@grafana.com>
Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* gofumpt files

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Add license header to missing files

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Fix OOO tests due to existing chunk disk mapper implementation

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Fix truncate int overflow

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Add Sync method to the WAL and update tests

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* remove useless sync

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Update minOOOTime after truncating Head

* Update minOOOTime after truncating Head

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Fix lint

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Add a unit test

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>
Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Load OutOfOrderTimeWindow only once per appender

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Fix OOO Head LabelValues and PostingsForMatchers

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Fix replay of OOO mmap chunks

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Remove unnecessary err check

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Prevent panic with ApplyConfig

Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com
Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Run OOO compaction after restart if there is OOO data from WBL

Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com
Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Apply Bartek's suggestions

Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Refactor OOO compaction

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Address comments and TODOs

- Added a comment explaining why we need the allow overlapping
  compaction toggle
- Clarified TSDBConfig OutOfOrderTimeWindow doc
- Added an owner to all the TODOs in the code

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Run go format

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Fix remaining review comments

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Fix tests

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Change wbl reference when truncating ooo in TestHeadMinOOOTimeUpdate

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>

* Fix TestWBLAndMmapReplay test failure on windows

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Address most of the feedback

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Refactor the block meta for out of order

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Fix windows error

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

* Fix review comments

Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>

Signed-off-by: Jesus Vazquez <jesus.vazquez@grafana.com>
Signed-off-by: Ganesh Vernekar <ganeshvern@gmail.com>
Signed-off-by: Ganesh Vernekar 15064823+codesome@users.noreply.github.com
Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
Co-authored-by: Ganesh Vernekar <ganeshvern@gmail.com>
Co-authored-by: Dieter Plaetinck <dieter@grafana.com>
Co-authored-by: Oleg Zaytsev <mail@olegzaytsev.com>
Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
---
 cmd/prometheus/main.go          |    8 +-
 cmd/promtool/rules_test.go      |    6 +-
 cmd/promtool/tsdb.go            |    2 +-
 config/config.go                |   28 +
 storage/interface.go            |   11 +-
 storage/merge.go                |   53 +
 storage/merge_test.go           |  134 ++
 tsdb/agent/db.go                |    3 +-
 tsdb/block.go                   |   29 +-
 tsdb/block_test.go              |   63 +-
 tsdb/blockwriter.go             |    4 +-
 tsdb/chunkenc/chunk.go          |   21 +-
 tsdb/chunkenc/xor.go            |    9 +
 tsdb/chunks/chunks.go           |   13 +-
 tsdb/chunks/chunks_test.go      |    2 +-
 tsdb/chunks/head_chunks.go      |   33 +-
 tsdb/chunks/head_chunks_test.go |   50 +-
 tsdb/compact_test.go            |    2 +-
 tsdb/db.go                      |  394 +++++-
 tsdb/db_test.go                 | 2020 ++++++++++++++++++++++++++++++-
 tsdb/head.go                    |  489 ++++++--
 tsdb/head_append.go             |  293 ++++-
 tsdb/head_bench_test.go         |    6 +-
 tsdb/head_read.go               |  269 +++-
 tsdb/head_read_test.go          |  178 +++
 tsdb/head_test.go               |  510 +++++++-
 tsdb/head_wal.go                |  315 ++++-
 tsdb/ooo_head.go                |  159 +++
 tsdb/ooo_head_read.go           |  433 +++++++
 tsdb/ooo_head_read_test.go      | 1207 ++++++++++++++++++
 tsdb/ooo_head_test.go           |   93 ++
 tsdb/querier.go                 |    4 +-
 tsdb/querier_bench_test.go      |    4 +-
 tsdb/querier_test.go            |   56 +-
 tsdb/record/record.go           |   52 +-
 tsdb/wal/wal.go                 |   65 +-
 tsdb/wal/watcher_test.go        |   15 +-
 web/api/v1/api_test.go          |    2 +-
 38 files changed, 6655 insertions(+), 380 deletions(-)
 create mode 100644 tsdb/head_read_test.go
 create mode 100644 tsdb/ooo_head.go
 create mode 100644 tsdb/ooo_head_read.go
 create mode 100644 tsdb/ooo_head_read_test.go
 create mode 100644 tsdb/ooo_head_test.go

diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go
index ba267dca17..596e962fd2 100644
--- a/cmd/prometheus/main.go
+++ b/cmd/prometheus/main.go
@@ -463,6 +463,9 @@ func main() {
 		}
 		cfg.tsdb.MaxExemplars = int64(cfgFile.StorageConfig.ExemplarsConfig.MaxExemplars)
 	}
+	if cfgFile.StorageConfig.TSDBConfig != nil {
+		cfg.tsdb.OutOfOrderTimeWindow = cfgFile.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
+	}
 
 	// Now that the validity of the config is established, set the config
 	// success metrics accordingly, although the config isn't really loaded
@@ -1537,6 +1540,7 @@ type tsdbOptions struct {
 	StripeSize                     int
 	MinBlockDuration               model.Duration
 	MaxBlockDuration               model.Duration
+	OutOfOrderTimeWindow           int64
 	EnableExemplarStorage          bool
 	MaxExemplars                   int64
 	EnableMemorySnapshotOnShutdown bool
@@ -1549,7 +1553,8 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
 		RetentionDuration:              int64(time.Duration(opts.RetentionDuration) / time.Millisecond),
 		MaxBytes:                       int64(opts.MaxBytes),
 		NoLockfile:                     opts.NoLockfile,
-		AllowOverlappingBlocks:         opts.AllowOverlappingBlocks,
+		AllowOverlappingCompaction:     opts.AllowOverlappingBlocks,
+		AllowOverlappingQueries:        opts.AllowOverlappingBlocks,
 		WALCompression:                 opts.WALCompression,
 		HeadChunksWriteQueueSize:       opts.HeadChunksWriteQueueSize,
 		StripeSize:                     opts.StripeSize,
@@ -1558,6 +1563,7 @@ func (opts tsdbOptions) ToTSDBOptions() tsdb.Options {
 		EnableExemplarStorage:          opts.EnableExemplarStorage,
 		MaxExemplars:                   opts.MaxExemplars,
 		EnableMemorySnapshotOnShutdown: opts.EnableMemorySnapshotOnShutdown,
+		OutOfOrderTimeWindow:           opts.OutOfOrderTimeWindow,
 	}
 }
 
diff --git a/cmd/promtool/rules_test.go b/cmd/promtool/rules_test.go
index 1248c26bb0..a184311e5a 100644
--- a/cmd/promtool/rules_test.go
+++ b/cmd/promtool/rules_test.go
@@ -117,7 +117,8 @@ func TestBackfillRuleIntegration(t *testing.T) {
 				}
 
 				opts := tsdb.DefaultOptions()
-				opts.AllowOverlappingBlocks = true
+				opts.AllowOverlappingQueries = true
+				opts.AllowOverlappingCompaction = true
 				db, err := tsdb.Open(tmpDir, nil, nil, opts, nil)
 				require.NoError(t, err)
 
@@ -245,7 +246,8 @@ func TestBackfillLabels(t *testing.T) {
 	}
 
 	opts := tsdb.DefaultOptions()
-	opts.AllowOverlappingBlocks = true
+	opts.AllowOverlappingQueries = true
+	opts.AllowOverlappingCompaction = true
 	db, err := tsdb.Open(tmpDir, nil, nil, opts, nil)
 	require.NoError(t, err)
 
diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go
index 7707a99043..7c7c8f6ec0 100644
--- a/cmd/promtool/tsdb.go
+++ b/cmd/promtool/tsdb.go
@@ -597,7 +597,7 @@ func analyzeCompaction(block tsdb.BlockReader, indexr tsdb.IndexReader) (err err
 
 		for _, chk := range chks {
 			// Load the actual data of the chunk.
-			chk, err := chunkr.Chunk(chk.Ref)
+			chk, err := chunkr.Chunk(chk)
 			if err != nil {
 				return err
 			}
diff --git a/config/config.go b/config/config.go
index 036faaeef7..a13f397f81 100644
--- a/config/config.go
+++ b/config/config.go
@@ -501,9 +501,37 @@ func (c *ScrapeConfig) MarshalYAML() (interface{}, error) {
 
 // StorageConfig configures runtime reloadable configuration options.
 type StorageConfig struct {
+	TSDBConfig      *TSDBConfig      `yaml:"tsdb,omitempty"`
 	ExemplarsConfig *ExemplarsConfig `yaml:"exemplars,omitempty"`
 }
 
+// TSDBConfig configures runtime reloadable configuration options.
+type TSDBConfig struct {
+	// OutOfOrderTimeWindow sets how long back in time an out-of-order sample can be inserted
+	// into the TSDB. This flag is typically set while unmarshaling the configuration file and translating
+	// OutOfOrderTimeWindowFlag's duration. The unit of this flag is expected to be the same as any
+	// other timestamp in the TSDB.
+	OutOfOrderTimeWindow int64
+
+	// OutOfOrderTimeWindowFlag holds the parsed duration from the config file.
+	// During unmarshall, this is converted into milliseconds and stored in OutOfOrderTimeWindow.
+	// This should not be used directly and must be converted into OutOfOrderTimeWindow.
+	OutOfOrderTimeWindowFlag model.Duration `yaml:"out_of_order_time_window,omitempty"`
+}
+
+// UnmarshalYAML implements the yaml.Unmarshaler interface.
+func (t *TSDBConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
+	*t = TSDBConfig{}
+	type plain TSDBConfig
+	if err := unmarshal((*plain)(t)); err != nil {
+		return err
+	}
+
+	t.OutOfOrderTimeWindow = time.Duration(t.OutOfOrderTimeWindowFlag).Milliseconds()
+
+	return nil
+}
+
 type TracingClientType string
 
 const (
diff --git a/storage/interface.go b/storage/interface.go
index f5af49eb73..d73ec72203 100644
--- a/storage/interface.go
+++ b/storage/interface.go
@@ -27,10 +27,15 @@ import (
 
 // The errors exposed.
 var (
-	ErrNotFound                    = errors.New("not found")
-	ErrOutOfOrderSample            = errors.New("out of order sample")
+	ErrNotFound = errors.New("not found")
+	// ErrOutOfOrderSample is when out of order support is disabled and the sample is out of order.
+	ErrOutOfOrderSample = errors.New("out of order sample")
+	// ErrOutOfBounds is when out of order support is disabled and the sample is older than the min valid time for the append.
+	ErrOutOfBounds = errors.New("out of bounds")
+	// ErrTooOldSample is when out of order support is enabled but the sample is outside the time window allowed.
+	ErrTooOldSample = errors.New("too old sample")
+	// ErrDuplicateSampleForTimestamp is when the sample has same timestamp but different value.
 	ErrDuplicateSampleForTimestamp = errors.New("duplicate sample for timestamp")
-	ErrOutOfBounds                 = errors.New("out of bounds")
 	ErrOutOfOrderExemplar          = errors.New("out of order exemplar")
 	ErrDuplicateExemplar           = errors.New("duplicate exemplar")
 	ErrExemplarLabelLength         = fmt.Errorf("label length for exemplar exceeds maximum of %d UTF-8 characters", exemplar.ExemplarMaxLabelSetLength)
diff --git a/storage/merge.go b/storage/merge.go
index 7726f9bdc9..2f175d3e7e 100644
--- a/storage/merge.go
+++ b/storage/merge.go
@@ -717,3 +717,56 @@ func (h *chunkIteratorHeap) Pop() interface{} {
 	*h = old[0 : n-1]
 	return x
 }
+
+// NewConcatenatingChunkSeriesMerger returns a VerticalChunkSeriesMergeFunc that simply concatenates the
+// chunks from the series. The resultant stream of chunks for a series might be overlapping and unsorted.
+func NewConcatenatingChunkSeriesMerger() VerticalChunkSeriesMergeFunc {
+	return func(series ...ChunkSeries) ChunkSeries {
+		if len(series) == 0 {
+			return nil
+		}
+		return &ChunkSeriesEntry{
+			Lset: series[0].Labels(),
+			ChunkIteratorFn: func() chunks.Iterator {
+				iterators := make([]chunks.Iterator, 0, len(series))
+				for _, s := range series {
+					iterators = append(iterators, s.Iterator())
+				}
+				return &concatenatingChunkIterator{
+					iterators: iterators,
+				}
+			},
+		}
+	}
+}
+
+type concatenatingChunkIterator struct {
+	iterators []chunks.Iterator
+	idx       int
+
+	curr chunks.Meta
+}
+
+func (c *concatenatingChunkIterator) At() chunks.Meta {
+	return c.curr
+}
+
+func (c *concatenatingChunkIterator) Next() bool {
+	if c.idx >= len(c.iterators) {
+		return false
+	}
+	if c.iterators[c.idx].Next() {
+		c.curr = c.iterators[c.idx].At()
+		return true
+	}
+	c.idx++
+	return c.Next()
+}
+
+func (c *concatenatingChunkIterator) Err() error {
+	errs := tsdb_errors.NewMulti()
+	for _, iter := range c.iterators {
+		errs.Add(iter.Err())
+	}
+	return errs.Err()
+}
diff --git a/storage/merge_test.go b/storage/merge_test.go
index 90bc1f9d0e..36ce726b1c 100644
--- a/storage/merge_test.go
+++ b/storage/merge_test.go
@@ -499,6 +499,140 @@ func TestCompactingChunkSeriesMerger(t *testing.T) {
 	}
 }
 
+func TestConcatenatingChunkSeriesMerger(t *testing.T) {
+	m := NewConcatenatingChunkSeriesMerger()
+
+	for _, tc := range []struct {
+		name     string
+		input    []ChunkSeries
+		expected ChunkSeries
+	}{
+		{
+			name: "single empty series",
+			input: []ChunkSeries{
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil),
+			},
+			expected: NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil),
+		},
+		{
+			name: "single series",
+			input: []ChunkSeries{
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}}),
+			},
+			expected: NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}}),
+		},
+		{
+			name: "two empty series",
+			input: []ChunkSeries{
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil),
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil),
+			},
+			expected: NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), nil, nil),
+		},
+		{
+			name: "two non overlapping",
+			input: []ChunkSeries{
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{5, 5}}),
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}}),
+			},
+			expected: NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{5, 5}}, []tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}}),
+		},
+		{
+			name: "two overlapping",
+			input: []ChunkSeries{
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{8, 8}}),
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}}),
+			},
+			expected: NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"),
+				[]tsdbutil.Sample{sample{1, 1}, sample{2, 2}}, []tsdbutil.Sample{sample{3, 3}, sample{8, 8}},
+				[]tsdbutil.Sample{sample{7, 7}, sample{9, 9}}, []tsdbutil.Sample{sample{10, 10}},
+			),
+		},
+		{
+			name: "two duplicated",
+			input: []ChunkSeries{
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}),
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{2, 2}, sample{3, 3}, sample{5, 5}}),
+			},
+			expected: NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"),
+				[]tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}},
+				[]tsdbutil.Sample{sample{2, 2}, sample{3, 3}, sample{5, 5}},
+			),
+		},
+		{
+			name: "three overlapping",
+			input: []ChunkSeries{
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}),
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{2, 2}, sample{3, 3}, sample{6, 6}}),
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{0, 0}, sample{4, 4}}),
+			},
+			expected: NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"),
+				[]tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}},
+				[]tsdbutil.Sample{sample{2, 2}, sample{3, 3}, sample{6, 6}},
+				[]tsdbutil.Sample{sample{0, 0}, sample{4, 4}},
+			),
+		},
+		{
+			name: "three in chained overlap",
+			input: []ChunkSeries{
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}}),
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{4, 4}, sample{6, 66}}),
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{6, 6}, sample{10, 10}}),
+			},
+			expected: NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"),
+				[]tsdbutil.Sample{sample{1, 1}, sample{2, 2}, sample{3, 3}, sample{5, 5}},
+				[]tsdbutil.Sample{sample{4, 4}, sample{6, 66}},
+				[]tsdbutil.Sample{sample{6, 6}, sample{10, 10}},
+			),
+		},
+		{
+			name: "three in chained overlap complex",
+			input: []ChunkSeries{
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{0, 0}, sample{5, 5}}, []tsdbutil.Sample{sample{10, 10}, sample{15, 15}}),
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{2, 2}, sample{20, 20}}, []tsdbutil.Sample{sample{25, 25}, sample{30, 30}}),
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), []tsdbutil.Sample{sample{18, 18}, sample{26, 26}}, []tsdbutil.Sample{sample{31, 31}, sample{35, 35}}),
+			},
+			expected: NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"),
+				[]tsdbutil.Sample{sample{0, 0}, sample{5, 5}}, []tsdbutil.Sample{sample{10, 10}, sample{15, 15}},
+				[]tsdbutil.Sample{sample{2, 2}, sample{20, 20}}, []tsdbutil.Sample{sample{25, 25}, sample{30, 30}},
+				[]tsdbutil.Sample{sample{18, 18}, sample{26, 26}}, []tsdbutil.Sample{sample{31, 31}, sample{35, 35}},
+			),
+		},
+		{
+			name: "110 overlapping",
+			input: []ChunkSeries{
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(0, 110)), // [0 - 110)
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(60, 50)), // [60 - 110)
+			},
+			expected: NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"),
+				tsdbutil.GenerateSamples(0, 110),
+				tsdbutil.GenerateSamples(60, 50),
+			),
+		},
+		{
+			name: "150 overlapping samples, simply concatenated and no splits",
+			input: []ChunkSeries{
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(0, 90)),  // [0 - 90)
+				NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"), tsdbutil.GenerateSamples(60, 90)), // [90 - 150)
+			},
+			expected: NewListChunkSeriesFromSamples(labels.FromStrings("bar", "baz"),
+				tsdbutil.GenerateSamples(0, 90),
+				tsdbutil.GenerateSamples(60, 90),
+			),
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			merged := m(tc.input...)
+			require.Equal(t, tc.expected.Labels(), merged.Labels())
+			actChks, actErr := ExpandChunks(merged.Iterator())
+			expChks, expErr := ExpandChunks(tc.expected.Iterator())
+
+			require.Equal(t, expErr, actErr)
+			require.Equal(t, expChks, actChks)
+		})
+	}
+}
+
 type mockQuerier struct {
 	LabelQuerier
 
diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go
index 3feb55623a..e210cdc99e 100644
--- a/tsdb/agent/db.go
+++ b/tsdb/agent/db.go
@@ -567,8 +567,7 @@ func (db *DB) truncate(mint int64) error {
 
 	// Start a new segment so low ingestion volume instances don't have more WAL
 	// than needed.
-	err = db.wal.NextSegment()
-	if err != nil {
+	if _, err := db.wal.NextSegment(); err != nil {
 		return errors.Wrap(err, "next segment")
 	}
 
diff --git a/tsdb/block.go b/tsdb/block.go
index 6b8b65dda7..8fd1066ba2 100644
--- a/tsdb/block.go
+++ b/tsdb/block.go
@@ -116,7 +116,7 @@ type ChunkWriter interface {
 // ChunkReader provides reading access of serialized time series data.
 type ChunkReader interface {
 	// Chunk returns the series data chunk with the given reference.
-	Chunk(ref chunks.ChunkRef) (chunkenc.Chunk, error)
+	Chunk(meta chunks.Meta) (chunkenc.Chunk, error)
 
 	// Close releases all underlying resources of the reader.
 	Close() error
@@ -189,12 +189,39 @@ type BlockMetaCompaction struct {
 	// this block.
 	Parents []BlockDesc `json:"parents,omitempty"`
 	Failed  bool        `json:"failed,omitempty"`
+	// Additional information about the compaction, for example, block created from out-of-order chunks.
+	Hints []string `json:"hints,omitempty"`
+}
+
+func (bm *BlockMetaCompaction) SetOutOfOrder() {
+	if bm.containsHint(CompactionHintFromOutOfOrder) {
+		return
+	}
+	bm.Hints = append(bm.Hints, CompactionHintFromOutOfOrder)
+	sort.Strings(bm.Hints)
+}
+
+func (bm *BlockMetaCompaction) FromOutOfOrder() bool {
+	return bm.containsHint(CompactionHintFromOutOfOrder)
+}
+
+func (bm *BlockMetaCompaction) containsHint(hint string) bool {
+	for _, h := range bm.Hints {
+		if h == hint {
+			return true
+		}
+	}
+	return false
 }
 
 const (
 	indexFilename = "index"
 	metaFilename  = "meta.json"
 	metaVersion1  = 1
+
+	// CompactionHintFromOutOfOrder is a hint noting that the block
+	// was created from out-of-order chunks.
+	CompactionHintFromOutOfOrder = "from-out-of-order"
 )
 
 func chunkDir(dir string) string { return filepath.Join(dir, "chunks") }
diff --git a/tsdb/block_test.go b/tsdb/block_test.go
index cf208caf1b..9ebd823d31 100644
--- a/tsdb/block_test.go
+++ b/tsdb/block_test.go
@@ -27,6 +27,7 @@ import (
 	"testing"
 
 	"github.com/go-kit/log"
+	prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
 	"github.com/stretchr/testify/require"
 
 	"github.com/prometheus/prometheus/model/labels"
@@ -487,7 +488,7 @@ func createBlockFromHead(tb testing.TB, dir string, head *Head) string {
 func createHead(tb testing.TB, w *wal.WAL, series []storage.Series, chunkDir string) *Head {
 	opts := DefaultHeadOptions()
 	opts.ChunkDirRoot = chunkDir
-	head, err := NewHead(nil, nil, w, opts, nil)
+	head, err := NewHead(nil, nil, w, nil, opts, nil)
 	require.NoError(tb, err)
 
 	app := head.Appender(context.Background())
@@ -506,6 +507,66 @@ func createHead(tb testing.TB, w *wal.WAL, series []storage.Series, chunkDir str
 	return head
 }
 
+func createHeadWithOOOSamples(tb testing.TB, w *wal.WAL, series []storage.Series, chunkDir string, oooSampleFrequency int) *Head {
+	opts := DefaultHeadOptions()
+	opts.ChunkDirRoot = chunkDir
+	opts.OutOfOrderTimeWindow.Store(10000000000)
+	head, err := NewHead(nil, nil, w, nil, opts, nil)
+	require.NoError(tb, err)
+
+	oooSampleLabels := make([]labels.Labels, 0, len(series))
+	oooSamples := make([]tsdbutil.SampleSlice, 0, len(series))
+
+	totalSamples := 0
+	app := head.Appender(context.Background())
+	for _, s := range series {
+		ref := storage.SeriesRef(0)
+		it := s.Iterator()
+		lset := s.Labels()
+		os := tsdbutil.SampleSlice{}
+		count := 0
+		for it.Next() {
+			totalSamples++
+			count++
+			t, v := it.At()
+			if count%oooSampleFrequency == 0 {
+				os = append(os, sample{t: t, v: v})
+				continue
+			}
+			ref, err = app.Append(ref, lset, t, v)
+			require.NoError(tb, err)
+		}
+		require.NoError(tb, it.Err())
+		if len(os) > 0 {
+			oooSampleLabels = append(oooSampleLabels, lset)
+			oooSamples = append(oooSamples, os)
+		}
+	}
+	require.NoError(tb, app.Commit())
+
+	oooSamplesAppended := 0
+	require.Equal(tb, float64(0), prom_testutil.ToFloat64(head.metrics.outOfOrderSamplesAppended))
+
+	app = head.Appender(context.Background())
+	for i, lset := range oooSampleLabels {
+		ref := storage.SeriesRef(0)
+		for _, sample := range oooSamples[i] {
+			ref, err = app.Append(ref, lset, sample.T(), sample.V())
+			require.NoError(tb, err)
+			oooSamplesAppended++
+		}
+	}
+	require.NoError(tb, app.Commit())
+
+	actOOOAppended := prom_testutil.ToFloat64(head.metrics.outOfOrderSamplesAppended)
+	require.GreaterOrEqual(tb, actOOOAppended, float64(oooSamplesAppended-len(series)))
+	require.LessOrEqual(tb, actOOOAppended, float64(oooSamplesAppended))
+
+	require.Equal(tb, float64(totalSamples), prom_testutil.ToFloat64(head.metrics.samplesAppended))
+
+	return head
+}
+
 const (
 	defaultLabelName  = "labelName"
 	defaultLabelValue = "labelValue"
diff --git a/tsdb/blockwriter.go b/tsdb/blockwriter.go
index 09b355368d..4db3079975 100644
--- a/tsdb/blockwriter.go
+++ b/tsdb/blockwriter.go
@@ -39,7 +39,7 @@ type BlockWriter struct {
 }
 
 // ErrNoSeriesAppended is returned if the series count is zero while flushing blocks.
-var ErrNoSeriesAppended error = errors.New("no series appended, aborting")
+var ErrNoSeriesAppended = errors.New("no series appended, aborting")
 
 // NewBlockWriter create a new block writer.
 //
@@ -71,7 +71,7 @@ func (w *BlockWriter) initHead() error {
 	opts := DefaultHeadOptions()
 	opts.ChunkRange = w.blockSize
 	opts.ChunkDirRoot = w.chunkDir
-	h, err := NewHead(nil, w.logger, nil, opts, NewHeadStats())
+	h, err := NewHead(nil, w.logger, nil, nil, opts, NewHeadStats())
 	if err != nil {
 		return errors.Wrap(err, "tsdb.NewHead")
 	}
diff --git a/tsdb/chunkenc/chunk.go b/tsdb/chunkenc/chunk.go
index bffb7e75ab..c5f8036a71 100644
--- a/tsdb/chunkenc/chunk.go
+++ b/tsdb/chunkenc/chunk.go
@@ -39,6 +39,21 @@ const (
 	EncXOR
 )
 
+// Chunk encodings for out-of-order chunks.
+// These encodings must be only used by the Head block for its internal bookkeeping.
+const (
+	OutOfOrderMask = 0b10000000
+	EncOOOXOR      = EncXOR | OutOfOrderMask
+)
+
+func IsOutOfOrderChunk(e Encoding) bool {
+	return (e & OutOfOrderMask) != 0
+}
+
+func IsValidEncoding(e Encoding) bool {
+	return e == EncXOR || e == EncOOOXOR
+}
+
 // Chunk holds a sequence of sample pairs that can be iterated over and appended to.
 type Chunk interface {
 	// Bytes returns the underlying byte slice of the chunk.
@@ -155,7 +170,7 @@ func NewPool() Pool {
 
 func (p *pool) Get(e Encoding, b []byte) (Chunk, error) {
 	switch e {
-	case EncXOR:
+	case EncXOR, EncOOOXOR:
 		c := p.xor.Get().(*XORChunk)
 		c.b.stream = b
 		c.b.count = 0
@@ -166,7 +181,7 @@ func (p *pool) Get(e Encoding, b []byte) (Chunk, error) {
 
 func (p *pool) Put(c Chunk) error {
 	switch c.Encoding() {
-	case EncXOR:
+	case EncXOR, EncOOOXOR:
 		xc, ok := c.(*XORChunk)
 		// This may happen often with wrapped chunks. Nothing we can really do about
 		// it but returning an error would cause a lot of allocations again. Thus,
@@ -188,7 +203,7 @@ func (p *pool) Put(c Chunk) error {
 // bytes.
 func FromData(e Encoding, d []byte) (Chunk, error) {
 	switch e {
-	case EncXOR:
+	case EncXOR, EncOOOXOR:
 		return &XORChunk{b: bstream{count: 0, stream: d}}, nil
 	}
 	return nil, errors.Errorf("invalid chunk encoding %q", e)
diff --git a/tsdb/chunkenc/xor.go b/tsdb/chunkenc/xor.go
index ba00a6e811..716f0698f0 100644
--- a/tsdb/chunkenc/xor.go
+++ b/tsdb/chunkenc/xor.go
@@ -457,3 +457,12 @@ func (it *xorIterator) readValue() bool {
 	it.numRead++
 	return true
 }
+
+// OOOXORChunk holds a XORChunk and overrides the Encoding() method.
+type OOOXORChunk struct {
+	*XORChunk
+}
+
+func (c *OOOXORChunk) Encoding() Encoding {
+	return EncOOOXOR
+}
diff --git a/tsdb/chunks/chunks.go b/tsdb/chunks/chunks.go
index a88884a2e6..6d04998e80 100644
--- a/tsdb/chunks/chunks.go
+++ b/tsdb/chunks/chunks.go
@@ -121,6 +121,15 @@ type Meta struct {
 	// Time range the data covers.
 	// When MaxTime == math.MaxInt64 the chunk is still open and being appended to.
 	MinTime, MaxTime int64
+
+	// OOOLastRef, OOOLastMinTime and OOOLastMaxTime are kept as markers for
+	// overlapping chunks.
+	// These fields point to the last created out of order Chunk (the head) that existed
+	// when Series() was called and was overlapping.
+	// Series() and Chunk() method responses should be consistent for the same
+	// query even if new data is added in between the calls.
+	OOOLastRef                     ChunkRef
+	OOOLastMinTime, OOOLastMaxTime int64
 }
 
 // Iterator iterates over the chunks of a single time series.
@@ -556,8 +565,8 @@ func (s *Reader) Size() int64 {
 }
 
 // Chunk returns a chunk from a given reference.
-func (s *Reader) Chunk(ref ChunkRef) (chunkenc.Chunk, error) {
-	sgmIndex, chkStart := BlockChunkRef(ref).Unpack()
+func (s *Reader) Chunk(meta Meta) (chunkenc.Chunk, error) {
+	sgmIndex, chkStart := BlockChunkRef(meta.Ref).Unpack()
 
 	if sgmIndex >= len(s.bs) {
 		return nil, errors.Errorf("segment index %d out of range", sgmIndex)
diff --git a/tsdb/chunks/chunks_test.go b/tsdb/chunks/chunks_test.go
index 6a4d13db82..affaa4b9f1 100644
--- a/tsdb/chunks/chunks_test.go
+++ b/tsdb/chunks/chunks_test.go
@@ -23,6 +23,6 @@ func TestReaderWithInvalidBuffer(t *testing.T) {
 	b := realByteSlice([]byte{0x81, 0x81, 0x81, 0x81, 0x81, 0x81})
 	r := &Reader{bs: []ByteSlice{b}}
 
-	_, err := r.Chunk(0)
+	_, err := r.Chunk(Meta{Ref: 0})
 	require.Error(t, err)
 }
diff --git a/tsdb/chunks/head_chunks.go b/tsdb/chunks/head_chunks.go
index edd7dd5419..dce874a35f 100644
--- a/tsdb/chunks/head_chunks.go
+++ b/tsdb/chunks/head_chunks.go
@@ -87,6 +87,18 @@ func (ref ChunkDiskMapperRef) Unpack() (seq, offset int) {
 	return seq, offset
 }
 
+func (ref ChunkDiskMapperRef) GreaterThanOrEqualTo(r ChunkDiskMapperRef) bool {
+	s1, o1 := ref.Unpack()
+	s2, o2 := r.Unpack()
+	return s1 > s2 || (s1 == s2 && o1 >= o2)
+}
+
+func (ref ChunkDiskMapperRef) GreaterThan(r ChunkDiskMapperRef) bool {
+	s1, o1 := ref.Unpack()
+	s2, o2 := r.Unpack()
+	return s1 > s2 || (s1 == s2 && o1 > o2)
+}
+
 // CorruptionErr is an error that's returned when corruption is encountered.
 type CorruptionErr struct {
 	Dir       string
@@ -736,7 +748,7 @@ func (cdm *ChunkDiskMapper) Chunk(ref ChunkDiskMapperRef) (chunkenc.Chunk, error
 // and runs the provided function with information about each chunk. It returns on the first error encountered.
 // NOTE: This method needs to be called at least once after creating ChunkDiskMapper
 // to set the maxt of all the file.
-func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chunkRef ChunkDiskMapperRef, mint, maxt int64, numSamples uint16) error) (err error) {
+func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chunkRef ChunkDiskMapperRef, mint, maxt int64, numSamples uint16, encoding chunkenc.Encoding) error) (err error) {
 	cdm.writePathMtx.Lock()
 	defer cdm.writePathMtx.Unlock()
 
@@ -799,7 +811,8 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chu
 				break
 			}
 
-			idx += ChunkEncodingSize // Skip encoding.
+			chkEnc := chunkenc.Encoding(mmapFile.byteSlice.Range(idx, idx+ChunkEncodingSize)[0])
+			idx += ChunkEncodingSize
 			dataLen, n := binary.Uvarint(mmapFile.byteSlice.Range(idx, idx+MaxChunkLengthFieldSize))
 			idx += n
 
@@ -834,7 +847,7 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chu
 				mmapFile.maxt = maxt
 			}
 
-			if err := f(seriesRef, chunkRef, mint, maxt, numSamples); err != nil {
+			if err := f(seriesRef, chunkRef, mint, maxt, numSamples, chkEnc); err != nil {
 				if cerr, ok := err.(*CorruptionErr); ok {
 					cerr.Dir = cdm.dir.Name()
 					cerr.FileIndex = segID
@@ -857,12 +870,8 @@ func (cdm *ChunkDiskMapper) IterateAllChunks(f func(seriesRef HeadSeriesRef, chu
 	return nil
 }
 
-// Truncate deletes the head chunk files which are strictly below the mint.
-// mint should be in milliseconds.
-func (cdm *ChunkDiskMapper) Truncate(mint int64) error {
-	if !cdm.fileMaxtSet {
-		return errors.New("maxt of the files are not set")
-	}
+// Truncate deletes the head chunk files whose file number is less than given fileNo.
+func (cdm *ChunkDiskMapper) Truncate(fileNo uint32) error {
 	cdm.readPathMtx.RLock()
 
 	// Sort the file indices, else if files deletion fails in between,
@@ -875,12 +884,10 @@ func (cdm *ChunkDiskMapper) Truncate(mint int64) error {
 
 	var removedFiles []int
 	for _, seq := range chkFileIndices {
-		if seq == cdm.curFileSequence || cdm.mmappedChunkFiles[seq].maxt >= mint {
+		if seq == cdm.curFileSequence || uint32(seq) >= fileNo {
 			break
 		}
-		if cdm.mmappedChunkFiles[seq].maxt < mint {
-			removedFiles = append(removedFiles, seq)
-		}
+		removedFiles = append(removedFiles, seq)
 	}
 	cdm.readPathMtx.RUnlock()
 
diff --git a/tsdb/chunks/head_chunks_test.go b/tsdb/chunks/head_chunks_test.go
index cc4fc2c09f..68a44479a8 100644
--- a/tsdb/chunks/head_chunks_test.go
+++ b/tsdb/chunks/head_chunks_test.go
@@ -58,6 +58,7 @@ func TestChunkDiskMapper_WriteChunk_Chunk_IterateChunks(t *testing.T) {
 		mint, maxt int64
 		numSamples uint16
 		chunk      chunkenc.Chunk
+		isOOO      bool
 	}
 	expectedData := []expectedDataType{}
 
@@ -67,7 +68,7 @@ func TestChunkDiskMapper_WriteChunk_Chunk_IterateChunks(t *testing.T) {
 	for hrw.curFileSequence < 3 || hrw.chkWriter.Buffered() == 0 {
 		addChunks := func(numChunks int) {
 			for i := 0; i < numChunks; i++ {
-				seriesRef, chkRef, mint, maxt, chunk := createChunk(t, totalChunks, hrw)
+				seriesRef, chkRef, mint, maxt, chunk, isOOO := createChunk(t, totalChunks, hrw)
 				totalChunks++
 				expectedData = append(expectedData, expectedDataType{
 					seriesRef:  seriesRef,
@@ -76,6 +77,7 @@ func TestChunkDiskMapper_WriteChunk_Chunk_IterateChunks(t *testing.T) {
 					chunkRef:   chkRef,
 					chunk:      chunk,
 					numSamples: uint16(chunk.NumSamples()),
+					isOOO:      isOOO,
 				})
 
 				if hrw.curFileSequence != 1 {
@@ -147,7 +149,7 @@ func TestChunkDiskMapper_WriteChunk_Chunk_IterateChunks(t *testing.T) {
 	hrw = createChunkDiskMapper(t, dir)
 
 	idx := 0
-	require.NoError(t, hrw.IterateAllChunks(func(seriesRef HeadSeriesRef, chunkRef ChunkDiskMapperRef, mint, maxt int64, numSamples uint16) error {
+	require.NoError(t, hrw.IterateAllChunks(func(seriesRef HeadSeriesRef, chunkRef ChunkDiskMapperRef, mint, maxt int64, numSamples uint16, encoding chunkenc.Encoding) error {
 		t.Helper()
 
 		expData := expectedData[idx]
@@ -156,6 +158,7 @@ func TestChunkDiskMapper_WriteChunk_Chunk_IterateChunks(t *testing.T) {
 		require.Equal(t, expData.maxt, maxt)
 		require.Equal(t, expData.maxt, maxt)
 		require.Equal(t, expData.numSamples, numSamples)
+		require.Equal(t, expData.isOOO, chunkenc.IsOutOfOrderChunk(encoding))
 
 		actChunk, err := hrw.Chunk(expData.chunkRef)
 		require.NoError(t, err)
@@ -178,9 +181,7 @@ func TestChunkDiskMapper_Truncate(t *testing.T) {
 	}()
 
 	timeRange := 0
-	fileTimeStep := 100
-	var thirdFileMinT, sixthFileMinT int64
-	addChunk := func() int {
+	addChunk := func() {
 		t.Helper()
 
 		step := 100
@@ -194,8 +195,6 @@ func TestChunkDiskMapper_Truncate(t *testing.T) {
 		<-awaitCb
 		require.NoError(t, err)
 		timeRange += step
-
-		return mint
 	}
 
 	verifyFiles := func(remainingFiles []int) {
@@ -216,17 +215,12 @@ func TestChunkDiskMapper_Truncate(t *testing.T) {
 	// Create segments 1 to 7.
 	for i := 1; i <= 7; i++ {
 		hrw.CutNewFile()
-		mint := int64(addChunk())
-		if i == 3 {
-			thirdFileMinT = mint
-		} else if i == 6 {
-			sixthFileMinT = mint
-		}
+		addChunk()
 	}
 	verifyFiles([]int{1, 2, 3, 4, 5, 6, 7})
 
 	// Truncating files.
-	require.NoError(t, hrw.Truncate(thirdFileMinT))
+	require.NoError(t, hrw.Truncate(3))
 
 	// Add a chunk to trigger cutting of new file.
 	addChunk()
@@ -245,11 +239,11 @@ func TestChunkDiskMapper_Truncate(t *testing.T) {
 	verifyFiles([]int{3, 4, 5, 6, 7, 8, 9})
 
 	// Truncating files after restart.
-	require.NoError(t, hrw.Truncate(sixthFileMinT))
+	require.NoError(t, hrw.Truncate(6))
 	verifyFiles([]int{6, 7, 8, 9})
 
 	// Truncating a second time without adding a chunk shouldn't create a new file.
-	require.NoError(t, hrw.Truncate(sixthFileMinT+1))
+	require.NoError(t, hrw.Truncate(6))
 	verifyFiles([]int{6, 7, 8, 9})
 
 	// Add a chunk to trigger cutting of new file.
@@ -257,8 +251,12 @@ func TestChunkDiskMapper_Truncate(t *testing.T) {
 
 	verifyFiles([]int{6, 7, 8, 9, 10})
 
+	// Truncation by file number.
+	require.NoError(t, hrw.Truncate(8))
+	verifyFiles([]int{8, 9, 10})
+
 	// Truncating till current time should not delete the current active file.
-	require.NoError(t, hrw.Truncate(int64(timeRange+(2*fileTimeStep))))
+	require.NoError(t, hrw.Truncate(10))
 
 	// Add a chunk to trigger cutting of new file.
 	addChunk()
@@ -335,8 +333,7 @@ func TestChunkDiskMapper_Truncate_PreservesFileSequence(t *testing.T) {
 
 	// Truncating files till 2. It should not delete anything after 3 (inclusive)
 	// though files 4 and 6 are empty.
-	file2Maxt := hrw.mmappedChunkFiles[2].maxt
-	require.NoError(t, hrw.Truncate(file2Maxt+1))
+	require.NoError(t, hrw.Truncate(3))
 	verifyFiles([]int{3, 4, 5, 6})
 
 	// Add chunk, so file 6 is not empty anymore.
@@ -344,8 +341,7 @@ func TestChunkDiskMapper_Truncate_PreservesFileSequence(t *testing.T) {
 	verifyFiles([]int{3, 4, 5, 6})
 
 	// Truncating till file 3 should also delete file 4, because it is empty.
-	file3Maxt := hrw.mmappedChunkFiles[3].maxt
-	require.NoError(t, hrw.Truncate(file3Maxt+1))
+	require.NoError(t, hrw.Truncate(5))
 	addChunk()
 	verifyFiles([]int{5, 6, 7})
 
@@ -381,7 +377,7 @@ func TestHeadReadWriter_TruncateAfterFailedIterateChunks(t *testing.T) {
 	hrw = createChunkDiskMapper(t, dir)
 
 	// Forcefully failing IterateAllChunks.
-	require.Error(t, hrw.IterateAllChunks(func(_ HeadSeriesRef, _ ChunkDiskMapperRef, _, _ int64, _ uint16) error {
+	require.Error(t, hrw.IterateAllChunks(func(_ HeadSeriesRef, _ ChunkDiskMapperRef, _, _ int64, _ uint16, _ chunkenc.Encoding) error {
 		return errors.New("random error")
 	}))
 
@@ -471,7 +467,9 @@ func createChunkDiskMapper(t *testing.T, dir string) *ChunkDiskMapper {
 	hrw, err := NewChunkDiskMapper(nil, dir, chunkenc.NewPool(), DefaultWriteBufferSize, writeQueueSize)
 	require.NoError(t, err)
 	require.False(t, hrw.fileMaxtSet)
-	require.NoError(t, hrw.IterateAllChunks(func(_ HeadSeriesRef, _ ChunkDiskMapperRef, _, _ int64, _ uint16) error { return nil }))
+	require.NoError(t, hrw.IterateAllChunks(func(_ HeadSeriesRef, _ ChunkDiskMapperRef, _, _ int64, _ uint16, _ chunkenc.Encoding) error {
+		return nil
+	}))
 	require.True(t, hrw.fileMaxtSet)
 
 	return hrw
@@ -488,13 +486,17 @@ func randomChunk(t *testing.T) chunkenc.Chunk {
 	return chunk
 }
 
-func createChunk(t *testing.T, idx int, hrw *ChunkDiskMapper) (seriesRef HeadSeriesRef, chunkRef ChunkDiskMapperRef, mint, maxt int64, chunk chunkenc.Chunk) {
+func createChunk(t *testing.T, idx int, hrw *ChunkDiskMapper) (seriesRef HeadSeriesRef, chunkRef ChunkDiskMapperRef, mint, maxt int64, chunk chunkenc.Chunk, isOOO bool) {
 	var err error
 	seriesRef = HeadSeriesRef(rand.Int63())
 	mint = int64((idx)*1000 + 1)
 	maxt = int64((idx + 1) * 1000)
 	chunk = randomChunk(t)
 	awaitCb := make(chan struct{})
+	if rand.Intn(2) == 0 {
+		isOOO = true
+		chunk = &chunkenc.OOOXORChunk{XORChunk: chunk.(*chunkenc.XORChunk)}
+	}
 	chunkRef = hrw.WriteChunk(seriesRef, mint, maxt, chunk, func(cbErr error) {
 		require.NoError(t, err)
 		close(awaitCb)
diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go
index 9b55131bef..9f24a81428 100644
--- a/tsdb/compact_test.go
+++ b/tsdb/compact_test.go
@@ -1080,7 +1080,7 @@ func BenchmarkCompactionFromHead(b *testing.B) {
 			opts := DefaultHeadOptions()
 			opts.ChunkRange = 1000
 			opts.ChunkDirRoot = chunkDir
-			h, err := NewHead(nil, nil, nil, opts, nil)
+			h, err := NewHead(nil, nil, nil, nil, opts, nil)
 			require.NoError(b, err)
 			for ln := 0; ln < labelNames; ln++ {
 				app := h.Appender(context.Background())
diff --git a/tsdb/db.go b/tsdb/db.go
index 00c1bceedf..7cf70bcc2d 100644
--- a/tsdb/db.go
+++ b/tsdb/db.go
@@ -33,6 +33,7 @@ import (
 	"github.com/oklog/ulid"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
+	"go.uber.org/atomic"
 	"golang.org/x/sync/errgroup"
 
 	"github.com/prometheus/prometheus/config"
@@ -69,18 +70,19 @@ var ErrNotReady = errors.New("TSDB not ready")
 // millisecond precision timestamps.
 func DefaultOptions() *Options {
 	return &Options{
-		WALSegmentSize:            wal.DefaultSegmentSize,
-		MaxBlockChunkSegmentSize:  chunks.DefaultChunkSegmentSize,
-		RetentionDuration:         int64(15 * 24 * time.Hour / time.Millisecond),
-		MinBlockDuration:          DefaultBlockDuration,
-		MaxBlockDuration:          DefaultBlockDuration,
-		NoLockfile:                false,
-		AllowOverlappingBlocks:    false,
-		WALCompression:            false,
-		StripeSize:                DefaultStripeSize,
-		HeadChunksWriteBufferSize: chunks.DefaultWriteBufferSize,
-		IsolationDisabled:         defaultIsolationDisabled,
-		HeadChunksWriteQueueSize:  chunks.DefaultWriteQueueSize,
+		WALSegmentSize:             wal.DefaultSegmentSize,
+		MaxBlockChunkSegmentSize:   chunks.DefaultChunkSegmentSize,
+		RetentionDuration:          int64(15 * 24 * time.Hour / time.Millisecond),
+		MinBlockDuration:           DefaultBlockDuration,
+		MaxBlockDuration:           DefaultBlockDuration,
+		NoLockfile:                 false,
+		AllowOverlappingCompaction: false,
+		AllowOverlappingQueries:    false,
+		WALCompression:             false,
+		StripeSize:                 DefaultStripeSize,
+		HeadChunksWriteBufferSize:  chunks.DefaultWriteBufferSize,
+		IsolationDisabled:          defaultIsolationDisabled,
+		OutOfOrderCapMax:           DefaultOutOfOrderCapMax,
 	}
 }
 
@@ -112,9 +114,19 @@ type Options struct {
 	// NoLockfile disables creation and consideration of a lock file.
 	NoLockfile bool
 
-	// Overlapping blocks are allowed if AllowOverlappingBlocks is true.
-	// This in-turn enables vertical compaction and vertical query merge.
-	AllowOverlappingBlocks bool
+	// Querying on overlapping blocks are allowed if AllowOverlappingQueries is true.
+	// Since querying is a required operation for TSDB, if there are going to be
+	// overlapping blocks, then this should be set to true.
+	// NOTE: Do not use this directly in DB. Use it via DB.AllowOverlappingQueries().
+	AllowOverlappingQueries bool
+
+	// Compaction of overlapping blocks are allowed if AllowOverlappingCompaction is true.
+	// This is an optional flag for overlapping blocks.
+	// The reason why this flag exists is because there are various users of the TSDB
+	// that do not want vertical compaction happening on ingest time. Instead,
+	// they'd rather keep overlapping blocks and let another component do the overlapping compaction later.
+	// For Prometheus, this will always be enabled if overlapping queries is enabled.
+	AllowOverlappingCompaction bool
 
 	// WALCompression will turn on Snappy compression for records on the WAL.
 	WALCompression bool
@@ -160,6 +172,15 @@ type Options struct {
 
 	// Disables isolation between reads and in-flight appends.
 	IsolationDisabled bool
+
+	// OutOfOrderTimeWindow specifies how much out of order is allowed, if any.
+	// This can change during run-time, so this value from here should only be used
+	// while initialising.
+	OutOfOrderTimeWindow int64
+
+	// OutOfOrderCapMax is maximum capacity for OOO chunks (in samples).
+	// If it is <=0, the default value is assumed.
+	OutOfOrderCapMax int64
 }
 
 type BlocksToDeleteFunc func(blocks []*Block) map[ulid.ULID]struct{}
@@ -197,6 +218,13 @@ type DB struct {
 
 	// Cancel a running compaction when a shutdown is initiated.
 	compactCancel context.CancelFunc
+
+	// oooWasEnabled is true if out of order support was enabled at least one time
+	// during the time TSDB was up. In which case we need to keep supporting
+	// out-of-order compaction and vertical queries.
+	oooWasEnabled atomic.Bool
+
+	registerer prometheus.Registerer
 }
 
 type dbMetrics struct {
@@ -372,9 +400,17 @@ func (db *DBReadOnly) FlushWAL(dir string) (returnErr error) {
 	if err != nil {
 		return err
 	}
+	var wbl *wal.WAL
+	wblDir := filepath.Join(db.dir, wal.WblDirName)
+	if _, err := os.Stat(wblDir); !os.IsNotExist(err) {
+		wbl, err = wal.Open(db.logger, wblDir)
+		if err != nil {
+			return err
+		}
+	}
 	opts := DefaultHeadOptions()
 	opts.ChunkDirRoot = db.dir
-	head, err := NewHead(nil, db.logger, w, opts, NewHeadStats())
+	head, err := NewHead(nil, db.logger, w, wbl, opts, NewHeadStats())
 	if err != nil {
 		return err
 	}
@@ -430,7 +466,7 @@ func (db *DBReadOnly) loadDataAsQueryable(maxt int64) (storage.SampleAndChunkQue
 
 	opts := DefaultHeadOptions()
 	opts.ChunkDirRoot = db.dir
-	head, err := NewHead(nil, db.logger, nil, opts, NewHeadStats())
+	head, err := NewHead(nil, db.logger, nil, nil, opts, NewHeadStats())
 	if err != nil {
 		return nil, err
 	}
@@ -448,9 +484,17 @@ func (db *DBReadOnly) loadDataAsQueryable(maxt int64) (storage.SampleAndChunkQue
 		if err != nil {
 			return nil, err
 		}
+		var wbl *wal.WAL
+		wblDir := filepath.Join(db.dir, wal.WblDirName)
+		if _, err := os.Stat(wblDir); !os.IsNotExist(err) {
+			wbl, err = wal.Open(db.logger, wblDir)
+			if err != nil {
+				return nil, err
+			}
+		}
 		opts := DefaultHeadOptions()
 		opts.ChunkDirRoot = db.dir
-		head, err = NewHead(nil, db.logger, w, opts, NewHeadStats())
+		head, err = NewHead(nil, db.logger, w, wbl, opts, NewHeadStats())
 		if err != nil {
 			return nil, err
 		}
@@ -598,6 +642,15 @@ func validateOpts(opts *Options, rngs []int64) (*Options, []int64) {
 	if opts.MinBlockDuration > opts.MaxBlockDuration {
 		opts.MaxBlockDuration = opts.MinBlockDuration
 	}
+	if opts.OutOfOrderTimeWindow > 0 {
+		opts.AllowOverlappingQueries = true
+	}
+	if opts.OutOfOrderCapMax <= 0 {
+		opts.OutOfOrderCapMax = DefaultOutOfOrderCapMax
+	}
+	if opts.OutOfOrderTimeWindow < 0 {
+		opts.OutOfOrderTimeWindow = 0
+	}
 
 	if len(rngs) == 0 {
 		// Start with smallest block duration and create exponential buckets until the exceed the
@@ -634,6 +687,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs
 	}
 
 	walDir := filepath.Join(dir, "wal")
+	wblDir := filepath.Join(dir, wal.WblDirName)
 
 	// Migrate old WAL if one exists.
 	if err := MigrateWAL(l, walDir); err != nil {
@@ -656,6 +710,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs
 		autoCompact:    true,
 		chunkPool:      chunkenc.NewPool(),
 		blocksToDelete: opts.BlocksToDelete,
+		registerer:     r,
 	}
 	defer func() {
 		// Close files if startup fails somewhere.
@@ -694,7 +749,7 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs
 	}
 	db.compactCancel = cancel
 
-	var wlog *wal.WAL
+	var wlog, wblog *wal.WAL
 	segmentSize := wal.DefaultSegmentSize
 	// Wal is enabled.
 	if opts.WALSegmentSize >= 0 {
@@ -706,8 +761,19 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs
 		if err != nil {
 			return nil, err
 		}
+		// Check if there is a WBL on disk, in which case we should replay that data.
+		wblSize, err := fileutil.DirSize(wblDir)
+		if err != nil && !os.IsNotExist(err) {
+			return nil, err
+		}
+		if opts.OutOfOrderTimeWindow > 0 || wblSize > 0 {
+			wblog, err = wal.NewSize(l, r, wblDir, segmentSize, opts.WALCompression)
+			if err != nil {
+				return nil, err
+			}
+		}
 	}
-
+	db.oooWasEnabled.Store(opts.OutOfOrderTimeWindow > 0)
 	headOpts := DefaultHeadOptions()
 	headOpts.ChunkRange = rngs[0]
 	headOpts.ChunkDirRoot = dir
@@ -719,11 +785,13 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs
 	headOpts.EnableExemplarStorage = opts.EnableExemplarStorage
 	headOpts.MaxExemplars.Store(opts.MaxExemplars)
 	headOpts.EnableMemorySnapshotOnShutdown = opts.EnableMemorySnapshotOnShutdown
+	headOpts.OutOfOrderTimeWindow.Store(opts.OutOfOrderTimeWindow)
+	headOpts.OutOfOrderCapMax.Store(opts.OutOfOrderCapMax)
 	if opts.IsolationDisabled {
 		// We only override this flag if isolation is disabled at DB level. We use the default otherwise.
 		headOpts.IsolationDisabled = opts.IsolationDisabled
 	}
-	db.head, err = NewHead(r, l, wlog, headOpts, stats.Head)
+	db.head, err = NewHead(r, l, wlog, wblog, headOpts, stats.Head)
 	if err != nil {
 		return nil, err
 	}
@@ -741,20 +809,36 @@ func open(dir string, l log.Logger, r prometheus.Registerer, opts *Options, rngs
 	}
 	// Set the min valid time for the ingested samples
 	// to be no lower than the maxt of the last block.
-	blocks := db.Blocks()
 	minValidTime := int64(math.MinInt64)
-	if len(blocks) > 0 {
-		minValidTime = blocks[len(blocks)-1].Meta().MaxTime
+	// We do not consider blocks created from out-of-order samples for Head's minValidTime
+	// since minValidTime is only for the in-order data and we do not want to discard unnecessary
+	// samples from the Head.
+	inOrderMaxTime, ok := db.inOrderBlocksMaxTime()
+	if ok {
+		minValidTime = inOrderMaxTime
 	}
 
 	if initErr := db.head.Init(minValidTime); initErr != nil {
 		db.head.metrics.walCorruptionsTotal.Inc()
-		level.Warn(db.logger).Log("msg", "Encountered WAL read error, attempting repair", "err", initErr)
-		if err := wlog.Repair(initErr); err != nil {
-			return nil, errors.Wrap(err, "repair corrupted WAL")
+		isOOOErr := isErrLoadOOOWal(initErr)
+		if isOOOErr {
+			level.Warn(db.logger).Log("msg", "Encountered OOO WAL read error, attempting repair", "err", initErr)
+			if err := wblog.Repair(initErr); err != nil {
+				return nil, errors.Wrap(err, "repair corrupted OOO WAL")
+			}
+		} else {
+			level.Warn(db.logger).Log("msg", "Encountered WAL read error, attempting repair", "err", initErr)
+			if err := wlog.Repair(initErr); err != nil {
+				return nil, errors.Wrap(err, "repair corrupted WAL")
+			}
 		}
 	}
 
+	if db.head.MinOOOTime() != int64(math.MaxInt64) {
+		// Some OOO data was replayed from the disk that needs compaction and cleanup.
+		db.oooWasEnabled.Store(true)
+	}
+
 	go db.run()
 
 	return db, nil
@@ -846,8 +930,58 @@ func (db *DB) Appender(ctx context.Context) storage.Appender {
 	return dbAppender{db: db, Appender: db.head.Appender(ctx)}
 }
 
+// ApplyConfig applies a new config to the DB.
+// Behaviour of 'OutOfOrderTimeWindow' is as follows:
+// OOO enabled = oooTimeWindow > 0. OOO disabled = oooTimeWindow is 0.
+// 1) Before: OOO disabled, Now: OOO enabled =>
+//   - A new WBL is created for the head block.
+//   - OOO compaction is enabled.
+//   - Overlapping queries are enabled.
+//
+// 2) Before: OOO enabled, Now: OOO enabled =>
+//   - Only the time window is updated.
+//
+// 3) Before: OOO enabled, Now: OOO disabled =>
+//   - Time Window set to 0. So no new OOO samples will be allowed.
+//   - OOO WBL will stay and will be eventually cleaned up.
+//   - OOO Compaction and overlapping queries will remain enabled until a restart or until all OOO samples are compacted.
+//
+// 4) Before: OOO disabled, Now: OOO disabled => no-op.
 func (db *DB) ApplyConfig(conf *config.Config) error {
-	return db.head.ApplyConfig(conf)
+	oooTimeWindow := int64(0)
+	if conf.StorageConfig.TSDBConfig != nil {
+		oooTimeWindow = conf.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
+	}
+	if oooTimeWindow < 0 {
+		oooTimeWindow = 0
+	}
+
+	// Create WBL if it was not present and if OOO is enabled with WAL enabled.
+	var wblog *wal.WAL
+	var err error
+	if db.head.wbl != nil {
+		// The existing WBL from the disk might have been replayed while OOO was disabled.
+		wblog = db.head.wbl
+	} else if !db.oooWasEnabled.Load() && oooTimeWindow > 0 && db.opts.WALSegmentSize >= 0 {
+		segmentSize := wal.DefaultSegmentSize
+		// Wal is set to a custom size.
+		if db.opts.WALSegmentSize > 0 {
+			segmentSize = db.opts.WALSegmentSize
+		}
+		oooWalDir := filepath.Join(db.dir, wal.WblDirName)
+		wblog, err = wal.NewSize(db.logger, db.registerer, oooWalDir, segmentSize, db.opts.WALCompression)
+		if err != nil {
+			return err
+		}
+	}
+
+	db.opts.OutOfOrderTimeWindow = oooTimeWindow
+	db.head.ApplyConfig(conf, wblog)
+
+	if !db.oooWasEnabled.Load() {
+		db.oooWasEnabled.Store(oooTimeWindow > 0)
+	}
+	return nil
 }
 
 // dbAppender wraps the DB's head appender and triggers compactions on commit
@@ -946,6 +1080,14 @@ func (db *DB) Compact() (returnErr error) {
 			"block_range", db.head.chunkRange.Load(),
 		)
 	}
+
+	if lastBlockMaxt != math.MinInt64 {
+		// The head was compacted, so we compact OOO head as well.
+		if err := db.compactOOOHead(); err != nil {
+			return errors.Wrap(err, "compact ooo head")
+		}
+	}
+
 	return db.compactBlocks()
 }
 
@@ -964,6 +1106,102 @@ func (db *DB) CompactHead(head *RangeHead) error {
 	return nil
 }
 
+// CompactOOOHead compacts the OOO Head.
+func (db *DB) CompactOOOHead() error {
+	db.cmtx.Lock()
+	defer db.cmtx.Unlock()
+
+	return db.compactOOOHead()
+}
+
+func (db *DB) compactOOOHead() error {
+	if !db.oooWasEnabled.Load() {
+		return nil
+	}
+	oooHead, err := NewOOOCompactionHead(db.head)
+	if err != nil {
+		return errors.Wrap(err, "get ooo compaction head")
+	}
+
+	ulids, err := db.compactOOO(db.dir, oooHead)
+	if err != nil {
+		return errors.Wrap(err, "compact ooo head")
+	}
+	if err := db.reloadBlocks(); err != nil {
+		errs := tsdb_errors.NewMulti(err)
+		for _, uid := range ulids {
+			if errRemoveAll := os.RemoveAll(filepath.Join(db.dir, uid.String())); errRemoveAll != nil {
+				errs.Add(errRemoveAll)
+			}
+		}
+		return errors.Wrap(errs.Err(), "reloadBlocks blocks after failed compact ooo head")
+	}
+
+	lastWBLFile, minOOOMmapRef := oooHead.LastWBLFile(), oooHead.LastMmapRef()
+	if lastWBLFile != 0 || minOOOMmapRef != 0 {
+		if err := db.head.truncateOOO(lastWBLFile, minOOOMmapRef); err != nil {
+			return errors.Wrap(err, "truncate ooo wbl")
+		}
+	}
+
+	return nil
+}
+
+// compactOOO creates a new block per possible block range in the compactor's directory from the OOO Head given.
+// Each ULID in the result corresponds to a block in a unique time range.
+func (db *DB) compactOOO(dest string, oooHead *OOOCompactionHead) (_ []ulid.ULID, err error) {
+	start := time.Now()
+
+	blockSize := oooHead.ChunkRange()
+	oooHeadMint, oooHeadMaxt := oooHead.MinTime(), oooHead.MaxTime()
+	ulids := make([]ulid.ULID, 0)
+	defer func() {
+		if err != nil {
+			// Best effort removal of created block on any error.
+			for _, uid := range ulids {
+				_ = os.RemoveAll(filepath.Join(db.dir, uid.String()))
+			}
+		}
+	}()
+
+	for t := blockSize * (oooHeadMint / blockSize); t <= oooHeadMaxt; t = t + blockSize {
+		mint, maxt := t, t+blockSize
+		// Block intervals are half-open: [b.MinTime, b.MaxTime). Block intervals are always +1 than the total samples it includes.
+		uid, err := db.compactor.Write(dest, oooHead.CloneForTimeRange(mint, maxt-1), mint, maxt, nil)
+		if err != nil {
+			return nil, err
+		}
+		if uid.Compare(ulid.ULID{}) != 0 {
+			ulids = append(ulids, uid)
+			blockDir := filepath.Join(dest, uid.String())
+			meta, _, err := readMetaFile(blockDir)
+			if err != nil {
+				return ulids, errors.Wrap(err, "read meta")
+			}
+			meta.Compaction.SetOutOfOrder()
+			_, err = writeMetaFile(db.logger, blockDir, meta)
+			if err != nil {
+				return ulids, errors.Wrap(err, "write meta")
+			}
+		}
+	}
+
+	if len(ulids) == 0 {
+		level.Info(db.logger).Log(
+			"msg", "compact ooo head resulted in no blocks",
+			"duration", time.Since(start),
+		)
+		return nil, nil
+	}
+
+	level.Info(db.logger).Log(
+		"msg", "out-of-order compaction completed",
+		"duration", time.Since(start),
+		"ulids", fmt.Sprintf("%v", ulids),
+	)
+	return ulids, nil
+}
+
 // compactHead compacts the given RangeHead.
 // The compaction mutex should be held before calling this method.
 func (db *DB) compactHead(head *RangeHead) error {
@@ -1038,10 +1276,11 @@ func (db *DB) reload() error {
 	if err := db.reloadBlocks(); err != nil {
 		return errors.Wrap(err, "reloadBlocks")
 	}
-	if len(db.blocks) == 0 {
+	maxt, ok := db.inOrderBlocksMaxTime()
+	if !ok {
 		return nil
 	}
-	if err := db.head.Truncate(db.blocks[len(db.blocks)-1].MaxTime()); err != nil {
+	if err := db.head.Truncate(maxt); err != nil {
 		return errors.Wrap(err, "head truncate")
 	}
 	return nil
@@ -1121,7 +1360,7 @@ func (db *DB) reloadBlocks() (err error) {
 	sort.Slice(toLoad, func(i, j int) bool {
 		return toLoad[i].Meta().MinTime < toLoad[j].Meta().MinTime
 	})
-	if !db.opts.AllowOverlappingBlocks {
+	if !db.AllowOverlappingQueries() {
 		if err := validateBlockSequence(toLoad); err != nil {
 			return errors.Wrap(err, "invalid block sequence")
 		}
@@ -1151,6 +1390,10 @@ func (db *DB) reloadBlocks() (err error) {
 	return nil
 }
 
+func (db *DB) AllowOverlappingQueries() bool {
+	return db.opts.AllowOverlappingQueries || db.oooWasEnabled.Load()
+}
+
 func openBlocks(l log.Logger, dir string, loaded []*Block, chunkPool chunkenc.Pool) (blocks []*Block, corrupted map[ulid.ULID]error, err error) {
 	bDirs, err := blockDirs(dir)
 	if err != nil {
@@ -1428,6 +1671,21 @@ func (db *DB) Blocks() []*Block {
 	return db.blocks
 }
 
+// inOrderBlocksMaxTime returns the max time among the blocks that were not totally created
+// out of out-of-order data. If the returned boolean is true, it means there is at least
+// one such block.
+func (db *DB) inOrderBlocksMaxTime() (maxt int64, ok bool) {
+	maxt, ok = int64(math.MinInt64), false
+	// If blocks are overlapping, last block might not have the max time. So check all blocks.
+	for _, b := range db.Blocks() {
+		if !b.meta.Compaction.FromOutOfOrder() && b.meta.MaxTime > maxt {
+			ok = true
+			maxt = b.meta.MaxTime
+		}
+	}
+	return maxt, ok
+}
+
 // Head returns the databases's head.
 func (db *DB) Head() *Head {
 	return db.head
@@ -1526,13 +1784,13 @@ func (db *DB) Querier(_ context.Context, mint, maxt int64) (storage.Querier, err
 			blocks = append(blocks, b)
 		}
 	}
-	var headQuerier storage.Querier
+	var inOrderHeadQuerier storage.Querier
 	if maxt >= db.head.MinTime() {
 		rh := NewRangeHead(db.head, mint, maxt)
 		var err error
-		headQuerier, err = NewBlockQuerier(rh, mint, maxt)
+		inOrderHeadQuerier, err = NewBlockQuerier(rh, mint, maxt)
 		if err != nil {
-			return nil, errors.Wrapf(err, "open querier for head %s", rh)
+			return nil, errors.Wrapf(err, "open block querier for head %s", rh)
 		}
 
 		// Getting the querier above registers itself in the queue that the truncation waits on.
@@ -1540,20 +1798,30 @@ func (db *DB) Querier(_ context.Context, mint, maxt int64) (storage.Querier, err
 		// won't run into a race later since any truncation that comes after will wait on this querier if it overlaps.
 		shouldClose, getNew, newMint := db.head.IsQuerierCollidingWithTruncation(mint, maxt)
 		if shouldClose {
-			if err := headQuerier.Close(); err != nil {
-				return nil, errors.Wrapf(err, "closing head querier %s", rh)
+			if err := inOrderHeadQuerier.Close(); err != nil {
+				return nil, errors.Wrapf(err, "closing head block querier %s", rh)
 			}
-			headQuerier = nil
+			inOrderHeadQuerier = nil
 		}
 		if getNew {
 			rh := NewRangeHead(db.head, newMint, maxt)
-			headQuerier, err = NewBlockQuerier(rh, newMint, maxt)
+			inOrderHeadQuerier, err = NewBlockQuerier(rh, newMint, maxt)
 			if err != nil {
-				return nil, errors.Wrapf(err, "open querier for head while getting new querier %s", rh)
+				return nil, errors.Wrapf(err, "open block querier for head while getting new querier %s", rh)
 			}
 		}
 	}
 
+	var outOfOrderHeadQuerier storage.Querier
+	if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
+		rh := NewOOORangeHead(db.head, mint, maxt)
+		var err error
+		outOfOrderHeadQuerier, err = NewBlockQuerier(rh, mint, maxt)
+		if err != nil {
+			return nil, errors.Wrapf(err, "open block querier for ooo head %s", rh)
+		}
+	}
+
 	blockQueriers := make([]storage.Querier, 0, len(blocks))
 	for _, b := range blocks {
 		q, err := NewBlockQuerier(b, mint, maxt)
@@ -1568,14 +1836,18 @@ func (db *DB) Querier(_ context.Context, mint, maxt int64) (storage.Querier, err
 		}
 		return nil, errors.Wrapf(err, "open querier for block %s", b)
 	}
-	if headQuerier != nil {
-		blockQueriers = append(blockQueriers, headQuerier)
+	if inOrderHeadQuerier != nil {
+		blockQueriers = append(blockQueriers, inOrderHeadQuerier)
+	}
+	if outOfOrderHeadQuerier != nil {
+		blockQueriers = append(blockQueriers, outOfOrderHeadQuerier)
 	}
 	return storage.NewMergeQuerier(blockQueriers, nil, storage.ChainedSeriesMerge), nil
 }
 
-// ChunkQuerier returns a new chunk querier over the data partition for the given time range.
-func (db *DB) ChunkQuerier(_ context.Context, mint, maxt int64) (storage.ChunkQuerier, error) {
+// blockQueriersForRange returns individual block chunk queriers from the persistent blocks, in-order head block, and the
+// out-of-order head block, overlapping with the given time range.
+func (db *DB) blockChunkQuerierForRange(mint, maxt int64) ([]storage.ChunkQuerier, error) {
 	var blocks []BlockReader
 
 	db.mtx.RLock()
@@ -1586,11 +1858,11 @@ func (db *DB) ChunkQuerier(_ context.Context, mint, maxt int64) (storage.ChunkQu
 			blocks = append(blocks, b)
 		}
 	}
-	var headQuerier storage.ChunkQuerier
+	var inOrderHeadQuerier storage.ChunkQuerier
 	if maxt >= db.head.MinTime() {
 		rh := NewRangeHead(db.head, mint, maxt)
 		var err error
-		headQuerier, err = NewBlockChunkQuerier(rh, mint, maxt)
+		inOrderHeadQuerier, err = NewBlockChunkQuerier(rh, mint, maxt)
 		if err != nil {
 			return nil, errors.Wrapf(err, "open querier for head %s", rh)
 		}
@@ -1600,20 +1872,30 @@ func (db *DB) ChunkQuerier(_ context.Context, mint, maxt int64) (storage.ChunkQu
 		// won't run into a race later since any truncation that comes after will wait on this querier if it overlaps.
 		shouldClose, getNew, newMint := db.head.IsQuerierCollidingWithTruncation(mint, maxt)
 		if shouldClose {
-			if err := headQuerier.Close(); err != nil {
+			if err := inOrderHeadQuerier.Close(); err != nil {
 				return nil, errors.Wrapf(err, "closing head querier %s", rh)
 			}
-			headQuerier = nil
+			inOrderHeadQuerier = nil
 		}
 		if getNew {
 			rh := NewRangeHead(db.head, newMint, maxt)
-			headQuerier, err = NewBlockChunkQuerier(rh, newMint, maxt)
+			inOrderHeadQuerier, err = NewBlockChunkQuerier(rh, newMint, maxt)
 			if err != nil {
 				return nil, errors.Wrapf(err, "open querier for head while getting new querier %s", rh)
 			}
 		}
 	}
 
+	var outOfOrderHeadQuerier storage.ChunkQuerier
+	if overlapsClosedInterval(mint, maxt, db.head.MinOOOTime(), db.head.MaxOOOTime()) {
+		rh := NewOOORangeHead(db.head, mint, maxt)
+		var err error
+		outOfOrderHeadQuerier, err = NewBlockChunkQuerier(rh, mint, maxt)
+		if err != nil {
+			return nil, errors.Wrapf(err, "open block chunk querier for ooo head %s", rh)
+		}
+	}
+
 	blockQueriers := make([]storage.ChunkQuerier, 0, len(blocks))
 	for _, b := range blocks {
 		q, err := NewBlockChunkQuerier(b, mint, maxt)
@@ -1628,10 +1910,22 @@ func (db *DB) ChunkQuerier(_ context.Context, mint, maxt int64) (storage.ChunkQu
 		}
 		return nil, errors.Wrapf(err, "open querier for block %s", b)
 	}
-	if headQuerier != nil {
-		blockQueriers = append(blockQueriers, headQuerier)
+	if inOrderHeadQuerier != nil {
+		blockQueriers = append(blockQueriers, inOrderHeadQuerier)
+	}
+	if outOfOrderHeadQuerier != nil {
+		blockQueriers = append(blockQueriers, outOfOrderHeadQuerier)
 	}
 
+	return blockQueriers, nil
+}
+
+// ChunkQuerier returns a new chunk querier over the data partition for the given time range.
+func (db *DB) ChunkQuerier(_ context.Context, mint, maxt int64) (storage.ChunkQuerier, error) {
+	blockQueriers, err := db.blockChunkQuerierForRange(mint, maxt)
+	if err != nil {
+		return nil, err
+	}
 	return storage.NewMergeChunkQuerier(blockQueriers, nil, storage.NewCompactingChunkSeriesMerger(storage.ChainedSeriesMerge)), nil
 }
 
diff --git a/tsdb/db_test.go b/tsdb/db_test.go
index 53e6e824af..f996c423f7 100644
--- a/tsdb/db_test.go
+++ b/tsdb/db_test.go
@@ -40,6 +40,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"go.uber.org/goleak"
 
+	"github.com/prometheus/prometheus/config"
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/prometheus/prometheus/model/metadata"
 	"github.com/prometheus/prometheus/storage"
@@ -1335,6 +1336,7 @@ func intersection(oldBlocks, actualBlocks []string) (intersection []string) {
 }
 
 // mockCompactorFailing creates a new empty block on every write and fails when reached the max allowed total.
+// For CompactOOO, it always fails.
 type mockCompactorFailing struct {
 	t      *testing.T
 	blocks []*Block
@@ -1373,6 +1375,10 @@ func (*mockCompactorFailing) Compact(string, []string, []*Block) (ulid.ULID, err
 	return ulid.ULID{}, nil
 }
 
+func (*mockCompactorFailing) CompactOOO(dest string, oooHead *OOOCompactionHead) (result []ulid.ULID, err error) {
+	return nil, fmt.Errorf("mock compaction failing CompactOOO")
+}
+
 func TestTimeRetention(t *testing.T) {
 	db := openTestDB(t, nil, []int64{1000})
 	defer func() {
@@ -1405,7 +1411,9 @@ func TestTimeRetention(t *testing.T) {
 }
 
 func TestSizeRetention(t *testing.T) {
-	db := openTestDB(t, nil, []int64{100})
+	opts := DefaultOptions()
+	opts.OutOfOrderTimeWindow = 100
+	db := openTestDB(t, opts, []int64{100})
 	defer func() {
 		require.NoError(t, db.Close())
 	}()
@@ -1428,9 +1436,11 @@ func TestSizeRetention(t *testing.T) {
 
 	// Add some data to the WAL.
 	headApp := db.Head().Appender(context.Background())
+	var aSeries labels.Labels
 	for _, m := range headBlocks {
 		series := genSeries(100, 10, m.MinTime, m.MaxTime+1)
 		for _, s := range series {
+			aSeries = s.Labels()
 			it := s.Iterator()
 			for it.Next() {
 				tim, v := it.At()
@@ -1488,6 +1498,26 @@ func TestSizeRetention(t *testing.T) {
 	require.NoError(t, err)
 	require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
 
+	// Add some out of order samples to check the size of WBL.
+	headApp = db.Head().Appender(context.Background())
+	for ts := int64(750); ts < 800; ts++ {
+		_, err := headApp.Append(0, aSeries, ts, float64(ts))
+		require.NoError(t, err)
+	}
+	require.NoError(t, headApp.Commit())
+
+	walSize, err = db.Head().wal.Size()
+	require.NoError(t, err)
+	wblSize, err := db.Head().wbl.Size()
+	require.NoError(t, err)
+	require.NotZero(t, wblSize)
+	cdmSize, err = db.Head().chunkDiskMapper.Size()
+	require.NoError(t, err)
+	expSize = blockSize + walSize + wblSize + cdmSize
+	actSize, err = fileutil.DirSize(db.Dir())
+	require.NoError(t, err)
+	require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size")
+
 	// Decrease the max bytes limit so that a delete is triggered.
 	// Check total size, total count and check that the oldest block was deleted.
 	firstBlockSize := db.Blocks()[0].Size()
@@ -1503,8 +1533,8 @@ func TestSizeRetention(t *testing.T) {
 	cdmSize, err = db.Head().chunkDiskMapper.Size()
 	require.NoError(t, err)
 	require.NotZero(t, cdmSize)
-	// Expected size should take into account block size + WAL size
-	expSize = blockSize + walSize + cdmSize
+	// Expected size should take into account block size + WAL size + WBL size
+	expSize = blockSize + walSize + wblSize + cdmSize
 	actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount))
 	actSize, err = fileutil.DirSize(db.Dir())
 	require.NoError(t, err)
@@ -2753,7 +2783,7 @@ func TestChunkWriter_ReadAfterWrite(t *testing.T) {
 
 			for _, chks := range test.chks {
 				for _, chkExp := range chks {
-					chkAct, err := r.Chunk(chkExp.Ref)
+					chkAct, err := r.Chunk(chkExp)
 					require.NoError(t, err)
 					require.Equal(t, chkExp.Chunk.Bytes(), chkAct.Bytes())
 				}
@@ -2813,7 +2843,7 @@ func TestChunkReader_ConcurrentReads(t *testing.T) {
 			go func(chunk chunks.Meta) {
 				defer wg.Done()
 
-				chkAct, err := r.Chunk(chunk.Ref)
+				chkAct, err := r.Chunk(chunk)
 				require.NoError(t, err)
 				require.Equal(t, chunk.Chunk.Bytes(), chkAct.Bytes())
 			}(chk)
@@ -3053,7 +3083,8 @@ func TestOneCheckpointPerCompactCall(t *testing.T) {
 		_, err = app.Append(0, lbls, (blockRange*i)+blockRange/2, rand.Float64())
 		require.NoError(t, err)
 		// Rotate the WAL file so that there is >3 files for checkpoint to happen.
-		require.NoError(t, db.head.wal.NextSegment())
+		_, err = db.head.wal.NextSegment()
+		require.NoError(t, err)
 	}
 	require.NoError(t, app.Commit())
 
@@ -3437,6 +3468,196 @@ func newTestDB(t *testing.T) *DB {
 	return db
 }
 
+func TestOOOWALWrite(t *testing.T) {
+	dir := t.TempDir()
+
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 2
+	opts.OutOfOrderTimeWindow = 30 * time.Minute.Milliseconds()
+
+	db, err := Open(dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	s1, s2 := labels.FromStrings("l", "v1"), labels.FromStrings("l", "v2")
+	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
+
+	appendSample := func(app storage.Appender, l labels.Labels, mins int64) {
+		_, err = app.Append(0, l, minutes(mins), float64(mins))
+		require.NoError(t, err)
+	}
+
+	// Ingest sample at 1h.
+	app := db.Appender(context.Background())
+	appendSample(app, s1, 60)
+	appendSample(app, s2, 60)
+	require.NoError(t, app.Commit())
+
+	// OOO for s1.
+	app = db.Appender(context.Background())
+	appendSample(app, s1, 40)
+	require.NoError(t, app.Commit())
+
+	// OOO for s2.
+	app = db.Appender(context.Background())
+	appendSample(app, s2, 42)
+	require.NoError(t, app.Commit())
+
+	// OOO for both s1 and s2 in the same commit.
+	app = db.Appender(context.Background())
+	appendSample(app, s2, 45)
+	appendSample(app, s1, 35)
+	appendSample(app, s1, 36) // m-maps.
+	appendSample(app, s1, 37)
+	require.NoError(t, app.Commit())
+
+	// OOO for s1 but not for s2 in the same commit.
+	app = db.Appender(context.Background())
+	appendSample(app, s1, 50) // m-maps.
+	appendSample(app, s2, 65)
+	require.NoError(t, app.Commit())
+
+	// Single commit has 2 times m-mapping and more samples after m-map.
+	app = db.Appender(context.Background())
+	appendSample(app, s2, 50) // m-maps.
+	appendSample(app, s2, 51)
+	appendSample(app, s2, 52) // m-maps.
+	appendSample(app, s2, 53)
+	require.NoError(t, app.Commit())
+
+	// The MmapRef in this are not hand calculated, and instead taken from the test run.
+	// What is important here is the order of records, and that MmapRef increases for each record.
+	oooRecords := []interface{}{
+		[]record.RefMmapMarker{
+			{Ref: 1},
+		},
+		[]record.RefSample{
+			{Ref: 1, T: minutes(40), V: 40},
+		},
+
+		[]record.RefMmapMarker{
+			{Ref: 2},
+		},
+		[]record.RefSample{
+			{Ref: 2, T: minutes(42), V: 42},
+		},
+
+		[]record.RefSample{
+			{Ref: 2, T: minutes(45), V: 45},
+			{Ref: 1, T: minutes(35), V: 35},
+		},
+		[]record.RefMmapMarker{ // 3rd sample, hence m-mapped.
+			{Ref: 1, MmapRef: 4294967304},
+		},
+		[]record.RefSample{
+			{Ref: 1, T: minutes(36), V: 36},
+			{Ref: 1, T: minutes(37), V: 37},
+		},
+
+		[]record.RefMmapMarker{ // 3rd sample, hence m-mapped.
+			{Ref: 1, MmapRef: 4294967354},
+		},
+		[]record.RefSample{ // Does not contain the in-order sample here.
+			{Ref: 1, T: minutes(50), V: 50},
+		},
+
+		// Single commit but multiple OOO records.
+		[]record.RefMmapMarker{
+			{Ref: 2, MmapRef: 4294967403},
+		},
+		[]record.RefSample{
+			{Ref: 2, T: minutes(50), V: 50},
+			{Ref: 2, T: minutes(51), V: 51},
+		},
+		[]record.RefMmapMarker{
+			{Ref: 2, MmapRef: 4294967452},
+		},
+		[]record.RefSample{
+			{Ref: 2, T: minutes(52), V: 52},
+			{Ref: 2, T: minutes(53), V: 53},
+		},
+	}
+
+	inOrderRecords := []interface{}{
+		[]record.RefSeries{
+			{Ref: 1, Labels: s1},
+			{Ref: 2, Labels: s2},
+		},
+		[]record.RefSample{
+			{Ref: 1, T: minutes(60), V: 60},
+			{Ref: 2, T: minutes(60), V: 60},
+		},
+		[]record.RefSample{
+			{Ref: 1, T: minutes(40), V: 40},
+		},
+		[]record.RefSample{
+			{Ref: 2, T: minutes(42), V: 42},
+		},
+		[]record.RefSample{
+			{Ref: 2, T: minutes(45), V: 45},
+			{Ref: 1, T: minutes(35), V: 35},
+			{Ref: 1, T: minutes(36), V: 36},
+			{Ref: 1, T: minutes(37), V: 37},
+		},
+		[]record.RefSample{ // Contains both in-order and ooo sample.
+			{Ref: 1, T: minutes(50), V: 50},
+			{Ref: 2, T: minutes(65), V: 65},
+		},
+		[]record.RefSample{
+			{Ref: 2, T: minutes(50), V: 50},
+			{Ref: 2, T: minutes(51), V: 51},
+			{Ref: 2, T: minutes(52), V: 52},
+			{Ref: 2, T: minutes(53), V: 53},
+		},
+	}
+
+	getRecords := func(walDir string) []interface{} {
+		sr, err := wal.NewSegmentsReader(walDir)
+		require.NoError(t, err)
+		r := wal.NewReader(sr)
+		defer func() {
+			require.NoError(t, sr.Close())
+		}()
+
+		var (
+			records []interface{}
+			dec     record.Decoder
+		)
+		for r.Next() {
+			rec := r.Record()
+			switch typ := dec.Type(rec); typ {
+			case record.Series:
+				series, err := dec.Series(rec, nil)
+				require.NoError(t, err)
+				records = append(records, series)
+			case record.Samples:
+				samples, err := dec.Samples(rec, nil)
+				require.NoError(t, err)
+				records = append(records, samples)
+			case record.MmapMarkers:
+				markers, err := dec.MmapMarkers(rec, nil)
+				require.NoError(t, err)
+				records = append(records, markers)
+			default:
+				t.Fatalf("got a WAL record that is not series or samples: %v", typ)
+			}
+		}
+
+		return records
+	}
+
+	// The normal WAL.
+	actRecs := getRecords(path.Join(dir, "wal"))
+	require.Equal(t, inOrderRecords, actRecs)
+
+	// The OOO WAL.
+	actRecs = getRecords(path.Join(dir, wal.WblDirName))
+	require.Equal(t, oooRecords, actRecs)
+}
+
 // Tests https://github.com/prometheus/prometheus/issues/10291#issuecomment-1044373110.
 func TestDBPanicOnMmappingHeadChunk(t *testing.T) {
 	dir := t.TempDir()
@@ -3568,7 +3789,7 @@ func TestMetadataCheckpointingOnlyKeepsLatestEntry(t *testing.T) {
 
 	ctx := context.Background()
 	numSamples := 10000
-	hb, w := newTestHead(t, int64(numSamples)*10, false)
+	hb, w := newTestHead(t, int64(numSamples)*10, false, false)
 
 	// Add some series so we can append metadata to them.
 	app := hb.Appender(ctx)
@@ -3745,3 +3966,1788 @@ func TestMetadataAssertInMemoryData(t *testing.T) {
 	require.Equal(t, *reopenDB.head.series.getByHash(s3.Hash(), s3).meta, m3)
 	require.Equal(t, *reopenDB.head.series.getByHash(s4.Hash(), s4).meta, m4)
 }
+
+// TODO(codesome): test more samples incoming once compaction has started. To verify new samples after the start
+//
+//	are not included in this compaction.
+func TestOOOCompaction(t *testing.T) {
+	dir := t.TempDir()
+
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 30
+	opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds()
+	opts.AllowOverlappingQueries = true
+	opts.AllowOverlappingCompaction = true
+
+	db, err := Open(dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	db.DisableCompactions() // We want to manually call it.
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	series1 := labels.FromStrings("foo", "bar1")
+	series2 := labels.FromStrings("foo", "bar2")
+
+	addSample := func(fromMins, toMins int64) {
+		app := db.Appender(context.Background())
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			_, err := app.Append(0, series1, ts, float64(ts))
+			require.NoError(t, err)
+			_, err = app.Append(0, series2, ts, float64(2*ts))
+			require.NoError(t, err)
+		}
+		require.NoError(t, app.Commit())
+	}
+
+	// Add an in-order samples.
+	addSample(250, 350)
+
+	// Verify that the in-memory ooo chunk is empty.
+	checkEmptyOOOChunk := func(lbls labels.Labels) {
+		ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls)
+		require.NoError(t, err)
+		require.False(t, created)
+		require.Nil(t, ms.oooHeadChunk)
+		require.Equal(t, 0, len(ms.oooMmappedChunks))
+	}
+	checkEmptyOOOChunk(series1)
+	checkEmptyOOOChunk(series2)
+
+	// Add ooo samples that creates multiple chunks.
+	// 90 to 300 spans across 3 block ranges: [0, 120), [120, 240), [240, 360)
+	addSample(90, 310)
+	// Adding same samples to create overlapping chunks.
+	// Since the active chunk won't start at 90 again, all the new
+	// chunks will have different time ranges than the previous chunks.
+	addSample(90, 310)
+
+	verifyDBSamples := func() {
+		var series1Samples, series2Samples []tsdbutil.Sample
+		for _, r := range [][2]int64{{90, 119}, {120, 239}, {240, 350}} {
+			fromMins, toMins := r[0], r[1]
+			for min := fromMins; min <= toMins; min++ {
+				ts := min * time.Minute.Milliseconds()
+				series1Samples = append(series1Samples, sample{ts, float64(ts)})
+				series2Samples = append(series2Samples, sample{ts, float64(2 * ts)})
+			}
+		}
+		expRes := map[string][]tsdbutil.Sample{
+			series1.String(): series1Samples,
+			series2.String(): series2Samples,
+		}
+
+		q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64)
+		require.NoError(t, err)
+
+		actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
+		require.Equal(t, expRes, actRes)
+	}
+
+	verifyDBSamples() // Before any compaction.
+
+	// Verify that the in-memory ooo chunk is not empty.
+	checkNonEmptyOOOChunk := func(lbls labels.Labels) {
+		ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls)
+		require.NoError(t, err)
+		require.False(t, created)
+		require.Greater(t, ms.oooHeadChunk.chunk.NumSamples(), 0)
+		require.Equal(t, 14, len(ms.oooMmappedChunks)) // 7 original, 7 duplicate.
+	}
+	checkNonEmptyOOOChunk(series1)
+	checkNonEmptyOOOChunk(series2)
+
+	// No blocks before compaction.
+	require.Equal(t, len(db.Blocks()), 0)
+
+	// There is a 0th WBL file.
+	require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows
+	files, err := os.ReadDir(db.head.wbl.Dir())
+	require.NoError(t, err)
+	require.Len(t, files, 1)
+	require.Equal(t, "00000000", files[0].Name())
+	f, err := files[0].Info()
+	require.NoError(t, err)
+	require.Greater(t, f.Size(), int64(100))
+
+	// OOO compaction happens here.
+	require.NoError(t, db.CompactOOOHead())
+
+	// 3 blocks exist now. [0, 120), [120, 240), [240, 360)
+	require.Equal(t, len(db.Blocks()), 3)
+
+	verifyDBSamples() // Blocks created out of OOO head now.
+
+	// 0th WBL file will be deleted and 1st will be the only present.
+	files, err = os.ReadDir(db.head.wbl.Dir())
+	require.NoError(t, err)
+	require.Len(t, files, 1)
+	require.Equal(t, "00000001", files[0].Name())
+	f, err = files[0].Info()
+	require.NoError(t, err)
+	require.Equal(t, int64(0), f.Size())
+
+	// OOO stuff should not be present in the Head now.
+	checkEmptyOOOChunk(series1)
+	checkEmptyOOOChunk(series2)
+
+	verifySamples := func(block *Block, fromMins, toMins int64) {
+		series1Samples := make([]tsdbutil.Sample, 0, toMins-fromMins+1)
+		series2Samples := make([]tsdbutil.Sample, 0, toMins-fromMins+1)
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			series1Samples = append(series1Samples, sample{ts, float64(ts)})
+			series2Samples = append(series2Samples, sample{ts, float64(2 * ts)})
+		}
+		expRes := map[string][]tsdbutil.Sample{
+			series1.String(): series1Samples,
+			series2.String(): series2Samples,
+		}
+
+		q, err := NewBlockQuerier(block, math.MinInt64, math.MaxInt64)
+		require.NoError(t, err)
+
+		actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
+		require.Equal(t, expRes, actRes)
+	}
+
+	// Checking for expected data in the blocks.
+	verifySamples(db.Blocks()[0], 90, 119)
+	verifySamples(db.Blocks()[1], 120, 239)
+	verifySamples(db.Blocks()[2], 240, 310)
+
+	// There should be a single m-map file.
+	mmapDir := mmappedChunksDir(db.head.opts.ChunkDirRoot)
+	files, err = os.ReadDir(mmapDir)
+	require.NoError(t, err)
+	require.Len(t, files, 1)
+
+	// Compact the in-order head and expect another block.
+	// Since this is a forced compaction, this block is not aligned with 2h.
+	err = db.CompactHead(NewRangeHead(db.head, 250*time.Minute.Milliseconds(), 350*time.Minute.Milliseconds()))
+	require.NoError(t, err)
+	require.Equal(t, len(db.Blocks()), 4) // [0, 120), [120, 240), [240, 360), [250, 351)
+	verifySamples(db.Blocks()[3], 250, 350)
+
+	verifyDBSamples() // Blocks created out of normal and OOO head now. But not merged.
+
+	// The compaction also clears out the old m-map files. Including
+	// the file that has ooo chunks.
+	files, err = os.ReadDir(mmapDir)
+	require.NoError(t, err)
+	require.Len(t, files, 1)
+	require.Equal(t, "000001", files[0].Name())
+
+	// This will merge overlapping block.
+	require.NoError(t, db.Compact())
+
+	require.Equal(t, len(db.Blocks()), 3) // [0, 120), [120, 240), [240, 360)
+	verifySamples(db.Blocks()[0], 90, 119)
+	verifySamples(db.Blocks()[1], 120, 239)
+	verifySamples(db.Blocks()[2], 240, 350) // Merged block.
+
+	verifyDBSamples() // Final state. Blocks from normal and OOO head are merged.
+}
+
+// TestOOOCompactionWithNormalCompaction tests if OOO compaction is performed
+// when the normal head's compaction is done.
+func TestOOOCompactionWithNormalCompaction(t *testing.T) {
+	dir := t.TempDir()
+
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 30
+	opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds()
+	opts.AllowOverlappingQueries = true
+	opts.AllowOverlappingCompaction = true
+
+	db, err := Open(dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	db.DisableCompactions() // We want to manually call it.
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	series1 := labels.FromStrings("foo", "bar1")
+	series2 := labels.FromStrings("foo", "bar2")
+
+	addSamples := func(fromMins, toMins int64) {
+		app := db.Appender(context.Background())
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			_, err := app.Append(0, series1, ts, float64(ts))
+			require.NoError(t, err)
+			_, err = app.Append(0, series2, ts, float64(2*ts))
+			require.NoError(t, err)
+		}
+		require.NoError(t, app.Commit())
+	}
+
+	// Add an in-order samples.
+	addSamples(250, 350)
+
+	// Add ooo samples that will result into a single block.
+	addSamples(90, 110)
+
+	// Checking that ooo chunk is not empty.
+	for _, lbls := range []labels.Labels{series1, series2} {
+		ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls)
+		require.NoError(t, err)
+		require.False(t, created)
+		require.Greater(t, ms.oooHeadChunk.chunk.NumSamples(), 0)
+	}
+
+	// If the normal Head is not compacted, the OOO head compaction does not take place.
+	require.NoError(t, db.Compact())
+	require.Equal(t, len(db.Blocks()), 0)
+
+	// Add more in-order samples in future that would trigger the compaction.
+	addSamples(400, 450)
+
+	// No blocks before compaction.
+	require.Equal(t, len(db.Blocks()), 0)
+
+	// Compacts normal and OOO head.
+	require.NoError(t, db.Compact())
+
+	// 2 blocks exist now. [0, 120), [250, 360)
+	require.Equal(t, len(db.Blocks()), 2)
+	require.Equal(t, int64(0), db.Blocks()[0].MinTime())
+	require.Equal(t, 120*time.Minute.Milliseconds(), db.Blocks()[0].MaxTime())
+	require.Equal(t, 250*time.Minute.Milliseconds(), db.Blocks()[1].MinTime())
+	require.Equal(t, 360*time.Minute.Milliseconds(), db.Blocks()[1].MaxTime())
+
+	// Checking that ooo chunk is empty.
+	for _, lbls := range []labels.Labels{series1, series2} {
+		ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls)
+		require.NoError(t, err)
+		require.False(t, created)
+		require.Nil(t, ms.oooHeadChunk)
+		require.Equal(t, 0, len(ms.oooMmappedChunks))
+	}
+
+	verifySamples := func(block *Block, fromMins, toMins int64) {
+		series1Samples := make([]tsdbutil.Sample, 0, toMins-fromMins+1)
+		series2Samples := make([]tsdbutil.Sample, 0, toMins-fromMins+1)
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			series1Samples = append(series1Samples, sample{ts, float64(ts)})
+			series2Samples = append(series2Samples, sample{ts, float64(2 * ts)})
+		}
+		expRes := map[string][]tsdbutil.Sample{
+			series1.String(): series1Samples,
+			series2.String(): series2Samples,
+		}
+
+		q, err := NewBlockQuerier(block, math.MinInt64, math.MaxInt64)
+		require.NoError(t, err)
+
+		actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
+		require.Equal(t, expRes, actRes)
+	}
+
+	// Checking for expected data in the blocks.
+	verifySamples(db.Blocks()[0], 90, 110)
+	verifySamples(db.Blocks()[1], 250, 350)
+}
+
+func Test_Querier_OOOQuery(t *testing.T) {
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 30
+	opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds()
+	opts.AllowOverlappingQueries = true
+	opts.AllowOverlappingCompaction = false
+
+	series1 := labels.FromStrings("foo", "bar1")
+
+	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
+	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []tsdbutil.Sample) ([]tsdbutil.Sample, int) {
+		app := db.Appender(context.Background())
+		totalAppended := 0
+		for min := fromMins; min <= toMins; min += time.Minute.Milliseconds() {
+			_, err := app.Append(0, series1, min, float64(min))
+			if min >= queryMinT && min <= queryMaxT {
+				expSamples = append(expSamples, sample{t: min, v: float64(min)})
+			}
+			require.NoError(t, err)
+			totalAppended++
+		}
+		require.NoError(t, app.Commit())
+		return expSamples, totalAppended
+	}
+
+	tests := []struct {
+		name        string
+		queryMinT   int64
+		queryMaxT   int64
+		inOrderMinT int64
+		inOrderMaxT int64
+		oooMinT     int64
+		oooMaxT     int64
+	}{
+		{
+			name:        "query interval covering ooomint and inordermaxt returns all ingested samples",
+			queryMinT:   minutes(0),
+			queryMaxT:   minutes(200),
+			inOrderMinT: minutes(100),
+			inOrderMaxT: minutes(200),
+			oooMinT:     minutes(0),
+			oooMaxT:     minutes(99),
+		},
+		{
+			name:        "partial query interval returns only samples within interval",
+			queryMinT:   minutes(20),
+			queryMaxT:   minutes(180),
+			inOrderMinT: minutes(100),
+			inOrderMaxT: minutes(200),
+			oooMinT:     minutes(0),
+			oooMaxT:     minutes(99),
+		},
+	}
+	for _, tc := range tests {
+		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
+			db := openTestDB(t, opts, nil)
+			db.DisableCompactions()
+			defer func() {
+				require.NoError(t, db.Close())
+			}()
+
+			var expSamples []tsdbutil.Sample
+
+			// Add in-order samples.
+			expSamples, _ = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
+
+			// Add out-of-order samples.
+			expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
+
+			sort.Slice(expSamples, func(i, j int) bool {
+				return expSamples[i].T() < expSamples[j].T()
+			})
+
+			querier, err := db.Querier(context.TODO(), tc.queryMinT, tc.queryMaxT)
+			require.NoError(t, err)
+			defer querier.Close()
+
+			seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1"))
+			require.NotNil(t, seriesSet[series1.String()])
+			require.Equal(t, 1, len(seriesSet))
+			require.Equal(t, expSamples, seriesSet[series1.String()])
+			require.GreaterOrEqual(t, float64(oooSamples), prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamplesAppended), "number of ooo appended samples mismatch")
+		})
+	}
+}
+
+func Test_ChunkQuerier_OOOQuery(t *testing.T) {
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 30
+	opts.OutOfOrderTimeWindow = 24 * time.Hour.Milliseconds()
+	opts.AllowOverlappingQueries = true
+	opts.AllowOverlappingCompaction = false
+
+	series1 := labels.FromStrings("foo", "bar1")
+
+	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
+	addSample := func(db *DB, fromMins, toMins, queryMinT, queryMaxT int64, expSamples []tsdbutil.Sample) ([]tsdbutil.Sample, int) {
+		app := db.Appender(context.Background())
+		totalAppended := 0
+		for min := fromMins; min <= toMins; min += time.Minute.Milliseconds() {
+			_, err := app.Append(0, series1, min, float64(min))
+			if min >= queryMinT && min <= queryMaxT {
+				expSamples = append(expSamples, sample{t: min, v: float64(min)})
+			}
+			require.NoError(t, err)
+			totalAppended++
+		}
+		require.NoError(t, app.Commit())
+		return expSamples, totalAppended
+	}
+
+	tests := []struct {
+		name        string
+		queryMinT   int64
+		queryMaxT   int64
+		inOrderMinT int64
+		inOrderMaxT int64
+		oooMinT     int64
+		oooMaxT     int64
+	}{
+		{
+			name:        "query interval covering ooomint and inordermaxt returns all ingested samples",
+			queryMinT:   minutes(0),
+			queryMaxT:   minutes(200),
+			inOrderMinT: minutes(100),
+			inOrderMaxT: minutes(200),
+			oooMinT:     minutes(0),
+			oooMaxT:     minutes(99),
+		},
+		{
+			name:        "partial query interval returns only samples within interval",
+			queryMinT:   minutes(20),
+			queryMaxT:   minutes(180),
+			inOrderMinT: minutes(100),
+			inOrderMaxT: minutes(200),
+			oooMinT:     minutes(0),
+			oooMaxT:     minutes(99),
+		},
+	}
+	for _, tc := range tests {
+		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
+			db := openTestDB(t, opts, nil)
+			db.DisableCompactions()
+			defer func() {
+				require.NoError(t, db.Close())
+			}()
+
+			var expSamples []tsdbutil.Sample
+
+			// Add in-order samples.
+			expSamples, _ = addSample(db, tc.inOrderMinT, tc.inOrderMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
+
+			// Add out-of-order samples.
+			expSamples, oooSamples := addSample(db, tc.oooMinT, tc.oooMaxT, tc.queryMinT, tc.queryMaxT, expSamples)
+
+			sort.Slice(expSamples, func(i, j int) bool {
+				return expSamples[i].T() < expSamples[j].T()
+			})
+
+			querier, err := db.ChunkQuerier(context.TODO(), tc.queryMinT, tc.queryMaxT)
+			require.NoError(t, err)
+			defer querier.Close()
+
+			chks := queryChunks(t, querier, labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1"))
+			require.NotNil(t, chks[series1.String()])
+			require.Equal(t, 1, len(chks))
+			require.Equal(t, float64(oooSamples), prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamplesAppended), "number of ooo appended samples mismatch")
+			var gotSamples []tsdbutil.Sample
+			for _, chunk := range chks[series1.String()] {
+				it := chunk.Chunk.Iterator(nil)
+				for it.Next() {
+					ts, v := it.At()
+					gotSamples = append(gotSamples, sample{t: ts, v: v})
+				}
+			}
+			require.Equal(t, expSamples, gotSamples)
+		})
+	}
+}
+
+func TestOOOAppendAndQuery(t *testing.T) {
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 30
+	opts.OutOfOrderTimeWindow = 4 * time.Hour.Milliseconds()
+	opts.AllowOverlappingQueries = true
+
+	db := openTestDB(t, opts, nil)
+	db.DisableCompactions()
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	s1 := labels.FromStrings("foo", "bar1")
+	s2 := labels.FromStrings("foo", "bar2")
+
+	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
+	appendedSamples := make(map[string][]tsdbutil.Sample)
+	totalSamples := 0
+	addSample := func(lbls labels.Labels, fromMins, toMins int64, faceError bool) {
+		app := db.Appender(context.Background())
+		key := lbls.String()
+		from, to := minutes(fromMins), minutes(toMins)
+		for min := from; min <= to; min += time.Minute.Milliseconds() {
+			val := rand.Float64()
+			_, err := app.Append(0, lbls, min, val)
+			if faceError {
+				require.Error(t, err)
+			} else {
+				require.NoError(t, err)
+				appendedSamples[key] = append(appendedSamples[key], sample{t: min, v: val})
+				totalSamples++
+			}
+		}
+		if faceError {
+			require.NoError(t, app.Rollback())
+		} else {
+			require.NoError(t, app.Commit())
+		}
+	}
+
+	testQuery := func(from, to int64) {
+		querier, err := db.Querier(context.TODO(), from, to)
+		require.NoError(t, err)
+
+		seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar."))
+
+		for k, v := range appendedSamples {
+			sort.Slice(v, func(i, j int) bool {
+				return v[i].T() < v[j].T()
+			})
+			appendedSamples[k] = v
+		}
+
+		expSamples := make(map[string][]tsdbutil.Sample)
+		for k, samples := range appendedSamples {
+			for _, s := range samples {
+				if s.T() < from {
+					continue
+				}
+				if s.T() > to {
+					continue
+				}
+				expSamples[k] = append(expSamples[k], s)
+			}
+		}
+		require.Equal(t, expSamples, seriesSet)
+		require.Equal(t, float64(totalSamples-2), prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamplesAppended), "number of ooo appended samples mismatch")
+	}
+
+	verifyOOOMinMaxTimes := func(expMin, expMax int64) {
+		require.Equal(t, minutes(expMin), db.head.MinOOOTime())
+		require.Equal(t, minutes(expMax), db.head.MaxOOOTime())
+	}
+
+	// In-order samples.
+	addSample(s1, 300, 300, false)
+	addSample(s2, 290, 290, false)
+	require.Equal(t, float64(2), prom_testutil.ToFloat64(db.head.metrics.chunksCreated))
+	testQuery(math.MinInt64, math.MaxInt64)
+
+	// Some ooo samples.
+	addSample(s1, 250, 260, false)
+	addSample(s2, 255, 265, false)
+	verifyOOOMinMaxTimes(250, 265)
+	testQuery(math.MinInt64, math.MaxInt64)
+	testQuery(minutes(250), minutes(265)) // Test querying ono data time range
+	testQuery(minutes(290), minutes(300)) // Test querying in-order data time range
+	testQuery(minutes(250), minutes(300)) // Test querying the entire range
+
+	// Out of time window.
+	addSample(s1, 59, 59, true)
+	addSample(s2, 49, 49, true)
+	verifyOOOMinMaxTimes(250, 265)
+	testQuery(math.MinInt64, math.MaxInt64)
+
+	// At the edge of time window, also it would be "out of bound" without the ooo support.
+	addSample(s1, 60, 65, false)
+	verifyOOOMinMaxTimes(60, 265)
+	testQuery(math.MinInt64, math.MaxInt64)
+
+	// This sample is not within the time window w.r.t. the head's maxt, but it is within the window
+	// w.r.t. the series' maxt. But we consider only head's maxt.
+	addSample(s2, 59, 59, true)
+	verifyOOOMinMaxTimes(60, 265)
+	testQuery(math.MinInt64, math.MaxInt64)
+
+	// Now the sample is within time window w.r.t. the head's maxt.
+	addSample(s2, 60, 65, false)
+	verifyOOOMinMaxTimes(60, 265)
+	testQuery(math.MinInt64, math.MaxInt64)
+
+	// Out of time window again.
+	addSample(s1, 59, 59, true)
+	addSample(s2, 49, 49, true)
+	testQuery(math.MinInt64, math.MaxInt64)
+
+	// Generating some m-map chunks. The m-map chunks here are in such a way
+	// that when sorted w.r.t. mint, the last chunk's maxt is not the overall maxt
+	// of the merged chunk. This tests a bug fixed in https://github.com/grafana/mimir-prometheus/pull/238/.
+	require.Equal(t, float64(4), prom_testutil.ToFloat64(db.head.metrics.chunksCreated))
+	addSample(s1, 180, 249, false)
+	require.Equal(t, float64(6), prom_testutil.ToFloat64(db.head.metrics.chunksCreated))
+	verifyOOOMinMaxTimes(60, 265)
+	testQuery(math.MinInt64, math.MaxInt64)
+}
+
+func TestOOODisabled(t *testing.T) {
+	opts := DefaultOptions()
+	opts.OutOfOrderTimeWindow = 0
+	db := openTestDB(t, opts, nil)
+	db.DisableCompactions()
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	s1 := labels.FromStrings("foo", "bar1")
+	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
+	expSamples := make(map[string][]tsdbutil.Sample)
+	totalSamples := 0
+	failedSamples := 0
+	addSample := func(lbls labels.Labels, fromMins, toMins int64, faceError bool) {
+		app := db.Appender(context.Background())
+		key := lbls.String()
+		from, to := minutes(fromMins), minutes(toMins)
+		for min := from; min <= to; min += time.Minute.Milliseconds() {
+			val := rand.Float64()
+			_, err := app.Append(0, lbls, min, val)
+			if faceError {
+				require.Error(t, err)
+				failedSamples++
+			} else {
+				require.NoError(t, err)
+				expSamples[key] = append(expSamples[key], sample{t: min, v: val})
+				totalSamples++
+			}
+		}
+		if faceError {
+			require.NoError(t, app.Rollback())
+		} else {
+			require.NoError(t, app.Commit())
+		}
+	}
+
+	addSample(s1, 300, 300, false) // In-order samples.
+	addSample(s1, 250, 260, true)  // Some ooo samples.
+	addSample(s1, 59, 59, true)    // Out of time window.
+	addSample(s1, 60, 65, true)    // At the edge of time window, also it would be "out of bound" without the ooo support.
+	addSample(s1, 59, 59, true)    // Out of time window again.
+	addSample(s1, 301, 310, false) // More in-order samples.
+
+	querier, err := db.Querier(context.TODO(), math.MinInt64, math.MaxInt64)
+	require.NoError(t, err)
+
+	seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar."))
+	require.Equal(t, expSamples, seriesSet)
+	require.Equal(t, float64(0), prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamplesAppended), "number of ooo appended samples mismatch")
+	require.Equal(t, float64(failedSamples),
+		prom_testutil.ToFloat64(db.head.metrics.outOfOrderSamples)+prom_testutil.ToFloat64(db.head.metrics.outOfBoundSamples),
+		"number of ooo/oob samples mismatch")
+
+	// Verifying that no OOO artifacts were generated.
+	_, err = os.ReadDir(path.Join(db.Dir(), wal.WblDirName))
+	require.True(t, os.IsNotExist(err))
+
+	ms, created, err := db.head.getOrCreate(s1.Hash(), s1)
+	require.NoError(t, err)
+	require.False(t, created)
+	require.NotNil(t, ms)
+	require.Nil(t, ms.oooHeadChunk)
+	require.Len(t, ms.oooMmappedChunks, 0)
+}
+
+func TestWBLAndMmapReplay(t *testing.T) {
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 30
+	opts.OutOfOrderTimeWindow = 4 * time.Hour.Milliseconds()
+	opts.AllowOverlappingQueries = true
+
+	db := openTestDB(t, opts, nil)
+	db.DisableCompactions()
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	s1 := labels.FromStrings("foo", "bar1")
+
+	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
+	expSamples := make(map[string][]tsdbutil.Sample)
+	totalSamples := 0
+	addSample := func(lbls labels.Labels, fromMins, toMins int64) {
+		app := db.Appender(context.Background())
+		key := lbls.String()
+		from, to := minutes(fromMins), minutes(toMins)
+		for min := from; min <= to; min += time.Minute.Milliseconds() {
+			val := rand.Float64()
+			_, err := app.Append(0, lbls, min, val)
+			require.NoError(t, err)
+			expSamples[key] = append(expSamples[key], sample{t: min, v: val})
+			totalSamples++
+		}
+		require.NoError(t, app.Commit())
+	}
+
+	testQuery := func(exp map[string][]tsdbutil.Sample) {
+		querier, err := db.Querier(context.TODO(), math.MinInt64, math.MaxInt64)
+		require.NoError(t, err)
+
+		seriesSet := query(t, querier, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar."))
+
+		for k, v := range exp {
+			sort.Slice(v, func(i, j int) bool {
+				return v[i].T() < v[j].T()
+			})
+			exp[k] = v
+		}
+		require.Equal(t, exp, seriesSet)
+	}
+
+	// In-order samples.
+	addSample(s1, 300, 300)
+	require.Equal(t, float64(1), prom_testutil.ToFloat64(db.head.metrics.chunksCreated))
+
+	// Some ooo samples.
+	addSample(s1, 250, 260)
+	addSample(s1, 195, 249) // This creates some m-map chunks.
+	require.Equal(t, float64(4), prom_testutil.ToFloat64(db.head.metrics.chunksCreated))
+	testQuery(expSamples)
+	oooMint, oooMaxt := minutes(195), minutes(260)
+
+	// Collect the samples only present in the ooo m-map chunks.
+	ms, created, err := db.head.getOrCreate(s1.Hash(), s1)
+	require.False(t, created)
+	require.NoError(t, err)
+	var s1MmapSamples []tsdbutil.Sample
+	for _, mc := range ms.oooMmappedChunks {
+		chk, err := db.head.chunkDiskMapper.Chunk(mc.ref)
+		require.NoError(t, err)
+		it := chk.Iterator(nil)
+		for it.Next() {
+			ts, val := it.At()
+			s1MmapSamples = append(s1MmapSamples, sample{t: ts, v: val})
+		}
+	}
+	require.Greater(t, len(s1MmapSamples), 0)
+
+	require.NoError(t, db.Close())
+
+	// Making a copy of original state of WBL and Mmap files to use it later.
+	mmapDir := mmappedChunksDir(db.head.opts.ChunkDirRoot)
+	wblDir := db.head.wbl.Dir()
+	originalWblDir := filepath.Join(t.TempDir(), "original_wbl")
+	originalMmapDir := filepath.Join(t.TempDir(), "original_mmap")
+	require.NoError(t, fileutil.CopyDirs(wblDir, originalWblDir))
+	require.NoError(t, fileutil.CopyDirs(mmapDir, originalMmapDir))
+	resetWBLToOriginal := func() {
+		require.NoError(t, os.RemoveAll(wblDir))
+		require.NoError(t, fileutil.CopyDirs(originalWblDir, wblDir))
+	}
+	resetMmapToOriginal := func() {
+		require.NoError(t, os.RemoveAll(mmapDir))
+		require.NoError(t, fileutil.CopyDirs(originalMmapDir, mmapDir))
+	}
+
+	t.Run("Restart DB with both WBL and M-map files for ooo data", func(t *testing.T) {
+		db, err = Open(db.dir, nil, nil, opts, nil)
+		require.NoError(t, err)
+		require.Equal(t, oooMint, db.head.MinOOOTime())
+		require.Equal(t, oooMaxt, db.head.MaxOOOTime())
+		testQuery(expSamples)
+		require.NoError(t, db.Close())
+	})
+
+	t.Run("Restart DB with only WBL for ooo data", func(t *testing.T) {
+		require.NoError(t, os.RemoveAll(mmapDir))
+
+		db, err = Open(db.dir, nil, nil, opts, nil)
+		require.NoError(t, err)
+		require.Equal(t, oooMint, db.head.MinOOOTime())
+		require.Equal(t, oooMaxt, db.head.MaxOOOTime())
+		testQuery(expSamples)
+		require.NoError(t, db.Close())
+	})
+
+	t.Run("Restart DB with only M-map files for ooo data", func(t *testing.T) {
+		require.NoError(t, os.RemoveAll(wblDir))
+		resetMmapToOriginal()
+
+		db, err = Open(db.dir, nil, nil, opts, nil)
+		require.NoError(t, err)
+		require.Equal(t, oooMint, db.head.MinOOOTime())
+		require.Equal(t, oooMaxt, db.head.MaxOOOTime())
+		inOrderSample := expSamples[s1.String()][len(expSamples[s1.String()])-1]
+		testQuery(map[string][]tsdbutil.Sample{
+			s1.String(): append(s1MmapSamples, inOrderSample),
+		})
+		require.NoError(t, db.Close())
+	})
+
+	t.Run("Restart DB with WBL+Mmap while increasing the OOOCapMax", func(t *testing.T) {
+		resetWBLToOriginal()
+		resetMmapToOriginal()
+
+		opts.OutOfOrderCapMax = 60
+		db, err = Open(db.dir, nil, nil, opts, nil)
+		require.NoError(t, err)
+		require.Equal(t, oooMint, db.head.MinOOOTime())
+		require.Equal(t, oooMaxt, db.head.MaxOOOTime())
+		testQuery(expSamples)
+		require.NoError(t, db.Close())
+	})
+
+	t.Run("Restart DB with WBL+Mmap while decreasing the OOOCapMax", func(t *testing.T) {
+		resetMmapToOriginal() // We need to reset because new duplicate chunks can be written above.
+
+		opts.OutOfOrderCapMax = 10
+		db, err = Open(db.dir, nil, nil, opts, nil)
+		require.NoError(t, err)
+		require.Equal(t, oooMint, db.head.MinOOOTime())
+		require.Equal(t, oooMaxt, db.head.MaxOOOTime())
+		testQuery(expSamples)
+		require.NoError(t, db.Close())
+	})
+
+	t.Run("Restart DB with WBL+Mmap while having no m-map markers in WBL", func(t *testing.T) {
+		resetMmapToOriginal() // We neet to reset because new duplicate chunks can be written above.
+
+		// Removing m-map markers in WBL by rewriting it.
+		newWbl, err := wal.New(log.NewNopLogger(), nil, filepath.Join(t.TempDir(), "new_wbl"), false)
+		require.NoError(t, err)
+		sr, err := wal.NewSegmentsReader(originalWblDir)
+		require.NoError(t, err)
+		var dec record.Decoder
+		r, markers, addedRecs := wal.NewReader(sr), 0, 0
+		for r.Next() {
+			rec := r.Record()
+			if dec.Type(rec) == record.MmapMarkers {
+				markers++
+				continue
+			}
+			addedRecs++
+			require.NoError(t, newWbl.Log(rec))
+		}
+		require.Greater(t, markers, 0)
+		require.Greater(t, addedRecs, 0)
+		require.NoError(t, newWbl.Close())
+		require.NoError(t, sr.Close())
+		require.NoError(t, os.RemoveAll(wblDir))
+		require.NoError(t, os.Rename(newWbl.Dir(), wblDir))
+
+		opts.OutOfOrderCapMax = 30
+		db, err = Open(db.dir, nil, nil, opts, nil)
+		require.NoError(t, err)
+		require.Equal(t, oooMint, db.head.MinOOOTime())
+		require.Equal(t, oooMaxt, db.head.MaxOOOTime())
+		testQuery(expSamples)
+	})
+}
+
+func TestOOOCompactionFailure(t *testing.T) {
+	dir := t.TempDir()
+
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 30
+	opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds()
+	opts.AllowOverlappingQueries = true
+	opts.AllowOverlappingCompaction = true
+
+	db, err := Open(dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	db.DisableCompactions() // We want to manually call it.
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	series1 := labels.FromStrings("foo", "bar1")
+
+	addSample := func(fromMins, toMins int64) {
+		app := db.Appender(context.Background())
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			_, err := app.Append(0, series1, ts, float64(ts))
+			require.NoError(t, err)
+		}
+		require.NoError(t, app.Commit())
+	}
+
+	// Add an in-order samples.
+	addSample(250, 350)
+
+	// Add ooo samples that creates multiple chunks.
+	addSample(90, 310)
+
+	// No blocks before compaction.
+	require.Equal(t, len(db.Blocks()), 0)
+
+	// There is a 0th WBL file.
+	verifyFirstWBLFileIs0 := func(count int) {
+		require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows
+		files, err := os.ReadDir(db.head.wbl.Dir())
+		require.NoError(t, err)
+		require.Len(t, files, count)
+		require.Equal(t, "00000000", files[0].Name())
+		f, err := files[0].Info()
+		require.NoError(t, err)
+		require.Greater(t, f.Size(), int64(100))
+	}
+	verifyFirstWBLFileIs0(1)
+
+	verifyMmapFiles := func(exp ...string) {
+		mmapDir := mmappedChunksDir(db.head.opts.ChunkDirRoot)
+		files, err := os.ReadDir(mmapDir)
+		require.NoError(t, err)
+		require.Len(t, files, len(exp))
+		for i, f := range files {
+			require.Equal(t, exp[i], f.Name())
+		}
+	}
+
+	verifyMmapFiles("000001")
+
+	// OOO compaction fails 5 times.
+	originalCompactor := db.compactor
+	db.compactor = &mockCompactorFailing{t: t}
+	for i := 0; i < 5; i++ {
+		require.Error(t, db.CompactOOOHead())
+	}
+	require.Equal(t, len(db.Blocks()), 0)
+
+	// M-map files don't change after failed compaction.
+	verifyMmapFiles("000001")
+
+	// Because of 5 compaction attempts, there are 6 files now.
+	verifyFirstWBLFileIs0(6)
+
+	db.compactor = originalCompactor
+	require.NoError(t, db.CompactOOOHead())
+	oldBlocks := db.Blocks()
+	require.Equal(t, len(db.Blocks()), 3)
+
+	// Check that the ooo chunks were removed.
+	ms, created, err := db.head.getOrCreate(series1.Hash(), series1)
+	require.NoError(t, err)
+	require.False(t, created)
+	require.Nil(t, ms.oooHeadChunk)
+	require.Len(t, ms.oooMmappedChunks, 0)
+
+	// The failed compaction should not have left the ooo Head corrupted.
+	// Hence, expect no new blocks with another OOO compaction call.
+	require.NoError(t, db.CompactOOOHead())
+	require.Equal(t, len(db.Blocks()), 3)
+	require.Equal(t, oldBlocks, db.Blocks())
+
+	// There should be a single m-map file
+	verifyMmapFiles("000001")
+
+	// All but last WBL file will be deleted.
+	// 8 files in total (starting at 0) because of 7 compaction calls.
+	files, err := os.ReadDir(db.head.wbl.Dir())
+	require.NoError(t, err)
+	require.Len(t, files, 1)
+	require.Equal(t, "00000007", files[0].Name())
+	f, err := files[0].Info()
+	require.NoError(t, err)
+	require.Equal(t, int64(0), f.Size())
+
+	verifySamples := func(block *Block, fromMins, toMins int64) {
+		series1Samples := make([]tsdbutil.Sample, 0, toMins-fromMins+1)
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			series1Samples = append(series1Samples, sample{ts, float64(ts)})
+		}
+		expRes := map[string][]tsdbutil.Sample{
+			series1.String(): series1Samples,
+		}
+
+		q, err := NewBlockQuerier(block, math.MinInt64, math.MaxInt64)
+		require.NoError(t, err)
+
+		actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
+		require.Equal(t, expRes, actRes)
+	}
+
+	// Checking for expected data in the blocks.
+	verifySamples(db.Blocks()[0], 90, 119)
+	verifySamples(db.Blocks()[1], 120, 239)
+	verifySamples(db.Blocks()[2], 240, 310)
+
+	// Compact the in-order head and expect another block.
+	// Since this is a forced compaction, this block is not aligned with 2h.
+	err = db.CompactHead(NewRangeHead(db.head, 250*time.Minute.Milliseconds(), 350*time.Minute.Milliseconds()))
+	require.NoError(t, err)
+	require.Equal(t, len(db.Blocks()), 4) // [0, 120), [120, 240), [240, 360), [250, 351)
+	verifySamples(db.Blocks()[3], 250, 350)
+
+	// The compaction also clears out the old m-map files. Including
+	// the file that has ooo chunks.
+	verifyMmapFiles("000001")
+}
+
+func TestWBLCorruption(t *testing.T) {
+	dir := t.TempDir()
+
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 30
+	opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds()
+	opts.AllowOverlappingQueries = true
+	opts.AllowOverlappingCompaction = true
+
+	db, err := Open(dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	db.DisableCompactions()
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	series1 := labels.FromStrings("foo", "bar1")
+	var allSamples, expAfterRestart []tsdbutil.Sample
+	addSamples := func(fromMins, toMins int64, afterRestart bool) {
+		app := db.Appender(context.Background())
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			_, err := app.Append(0, series1, ts, float64(ts))
+			require.NoError(t, err)
+			allSamples = append(allSamples, sample{t: ts, v: float64(ts)})
+			if afterRestart {
+				expAfterRestart = append(expAfterRestart, sample{t: ts, v: float64(ts)})
+			}
+		}
+		require.NoError(t, app.Commit())
+	}
+
+	// Add an in-order samples.
+	addSamples(340, 350, true)
+
+	// OOO samples.
+	addSamples(90, 99, true)
+	addSamples(100, 119, true)
+	addSamples(120, 130, true)
+
+	// Moving onto the second file.
+	_, err = db.head.wbl.NextSegment()
+	require.NoError(t, err)
+
+	// More OOO samples.
+	addSamples(200, 230, true)
+	addSamples(240, 255, true)
+
+	// We corrupt WBL after the sample at 255. So everything added later
+	// should be deleted after replay.
+
+	// Checking where we corrupt it.
+	require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows
+	files, err := os.ReadDir(db.head.wbl.Dir())
+	require.NoError(t, err)
+	require.Len(t, files, 2)
+	f1, err := files[1].Info()
+	require.NoError(t, err)
+	corruptIndex := f1.Size()
+	corruptFilePath := path.Join(db.head.wbl.Dir(), files[1].Name())
+
+	// Corrupt the WBL by adding a malformed record.
+	require.NoError(t, db.head.wbl.Log([]byte{byte(record.Samples), 99, 9, 99, 9, 99, 9, 99}))
+
+	// More samples after the corruption point.
+	addSamples(260, 280, false)
+	addSamples(290, 300, false)
+
+	// Another file.
+	_, err = db.head.wbl.NextSegment()
+	require.NoError(t, err)
+
+	addSamples(310, 320, false)
+
+	// Verifying that we have data after corruption point.
+	require.NoError(t, db.head.wbl.Sync()) // syncing to make sure wbl is flushed in windows
+	files, err = os.ReadDir(db.head.wbl.Dir())
+	require.NoError(t, err)
+	require.Len(t, files, 3)
+	f1, err = files[1].Info()
+	require.NoError(t, err)
+	require.Greater(t, f1.Size(), corruptIndex)
+	f0, err := files[0].Info()
+	require.NoError(t, err)
+	require.Greater(t, f0.Size(), int64(100))
+	f2, err := files[2].Info()
+	require.NoError(t, err)
+	require.Greater(t, f2.Size(), int64(100))
+
+	verifySamples := func(expSamples []tsdbutil.Sample) {
+		sort.Slice(expSamples, func(i, j int) bool {
+			return expSamples[i].T() < expSamples[j].T()
+		})
+
+		expRes := map[string][]tsdbutil.Sample{
+			series1.String(): expSamples,
+		}
+
+		q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64)
+		require.NoError(t, err)
+
+		actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
+		require.Equal(t, expRes, actRes)
+	}
+
+	verifySamples(allSamples)
+
+	require.NoError(t, db.Close())
+
+	// We want everything to be replayed from the WBL. So we delete the m-map files.
+	require.NoError(t, os.RemoveAll(mmappedChunksDir(db.head.opts.ChunkDirRoot)))
+
+	// Restart does the replay and repair.
+	db, err = Open(db.dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.walCorruptionsTotal))
+	require.Less(t, len(expAfterRestart), len(allSamples))
+	verifySamples(expAfterRestart)
+
+	// Verify that it did the repair on disk.
+	files, err = os.ReadDir(db.head.wbl.Dir())
+	require.NoError(t, err)
+	require.Len(t, files, 3)
+	f0, err = files[0].Info()
+	require.NoError(t, err)
+	require.Greater(t, f0.Size(), int64(100))
+	f2, err = files[2].Info()
+	require.NoError(t, err)
+	require.Equal(t, int64(0), f2.Size())
+	require.Equal(t, corruptFilePath, path.Join(db.head.wbl.Dir(), files[1].Name()))
+
+	// Verifying that everything after the corruption point is set to 0.
+	b, err := os.ReadFile(corruptFilePath)
+	require.NoError(t, err)
+	sum := 0
+	for _, val := range b[corruptIndex:] {
+		sum += int(val)
+	}
+	require.Equal(t, 0, sum)
+
+	// Another restart, everything normal with no repair.
+	require.NoError(t, db.Close())
+	db, err = Open(db.dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	require.Equal(t, 0.0, prom_testutil.ToFloat64(db.head.metrics.walCorruptionsTotal))
+	verifySamples(expAfterRestart)
+}
+
+func TestOOOMmapCorruption(t *testing.T) {
+	dir := t.TempDir()
+
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 10
+	opts.OutOfOrderTimeWindow = 300 * time.Minute.Milliseconds()
+	opts.AllowOverlappingQueries = true
+	opts.AllowOverlappingCompaction = true
+
+	db, err := Open(dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	db.DisableCompactions()
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	series1 := labels.FromStrings("foo", "bar1")
+	var allSamples, expInMmapChunks []tsdbutil.Sample
+	addSamples := func(fromMins, toMins int64, inMmapAfterCorruption bool) {
+		app := db.Appender(context.Background())
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			_, err := app.Append(0, series1, ts, float64(ts))
+			require.NoError(t, err)
+			allSamples = append(allSamples, sample{t: ts, v: float64(ts)})
+			if inMmapAfterCorruption {
+				expInMmapChunks = append(expInMmapChunks, sample{t: ts, v: float64(ts)})
+			}
+		}
+		require.NoError(t, app.Commit())
+	}
+
+	// Add an in-order samples.
+	addSamples(340, 350, true)
+
+	// OOO samples.
+	addSamples(90, 99, true)
+	addSamples(100, 109, true)
+	// This sample m-maps a chunk. But 120 goes into a new chunk.
+	addSamples(120, 120, false)
+
+	// Second m-map file. We will corrupt this file. Sample 120 goes into this new file.
+	db.head.chunkDiskMapper.CutNewFile()
+
+	// More OOO samples.
+	addSamples(200, 230, false)
+	addSamples(240, 255, false)
+
+	db.head.chunkDiskMapper.CutNewFile()
+	addSamples(260, 290, false)
+
+	verifySamples := func(expSamples []tsdbutil.Sample) {
+		sort.Slice(expSamples, func(i, j int) bool {
+			return expSamples[i].T() < expSamples[j].T()
+		})
+
+		expRes := map[string][]tsdbutil.Sample{
+			series1.String(): expSamples,
+		}
+
+		q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64)
+		require.NoError(t, err)
+
+		actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
+		require.Equal(t, expRes, actRes)
+	}
+
+	verifySamples(allSamples)
+
+	// Verifying existing files.
+	mmapDir := mmappedChunksDir(db.head.opts.ChunkDirRoot)
+	files, err := os.ReadDir(mmapDir)
+	require.NoError(t, err)
+	require.Len(t, files, 3)
+
+	// Corrupting the 2nd file.
+	f, err := os.OpenFile(path.Join(mmapDir, files[1].Name()), os.O_RDWR, 0o666)
+	require.NoError(t, err)
+	_, err = f.WriteAt([]byte{99, 9, 99, 9, 99}, 20)
+	require.NoError(t, err)
+	require.NoError(t, f.Close())
+	firstFileName := files[0].Name()
+
+	require.NoError(t, db.Close())
+
+	// Moving OOO WBL to use it later.
+	wblDir := db.head.wbl.Dir()
+	wblDirTmp := path.Join(t.TempDir(), "wbl_tmp")
+	require.NoError(t, os.Rename(wblDir, wblDirTmp))
+
+	// Restart does the replay and repair of m-map files.
+	db, err = Open(db.dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.mmapChunkCorruptionTotal))
+	require.Less(t, len(expInMmapChunks), len(allSamples))
+
+	// Since there is no WBL, only samples from m-map chunks comes in the query.
+	verifySamples(expInMmapChunks)
+
+	// Verify that it did the repair on disk. All files from the point of corruption
+	// should be deleted.
+	files, err = os.ReadDir(mmapDir)
+	require.NoError(t, err)
+	require.Len(t, files, 1)
+	f0, err := files[0].Info()
+	require.NoError(t, err)
+	require.Greater(t, f0.Size(), int64(100))
+	require.Equal(t, firstFileName, files[0].Name())
+
+	// Another restart, everything normal with no repair.
+	require.NoError(t, db.Close())
+	db, err = Open(db.dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	require.Equal(t, 0.0, prom_testutil.ToFloat64(db.head.metrics.mmapChunkCorruptionTotal))
+	verifySamples(expInMmapChunks)
+
+	// Restart again with the WBL, all samples should be present now.
+	require.NoError(t, db.Close())
+	require.NoError(t, os.RemoveAll(wblDir))
+	require.NoError(t, os.Rename(wblDirTmp, wblDir))
+	db, err = Open(db.dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	verifySamples(allSamples)
+}
+
+func TestOutOfOrderRuntimeConfig(t *testing.T) {
+	getDB := func(oooTimeWindow int64) *DB {
+		dir := t.TempDir()
+
+		opts := DefaultOptions()
+		opts.OutOfOrderTimeWindow = oooTimeWindow
+
+		db, err := Open(dir, nil, nil, opts, nil)
+		require.NoError(t, err)
+		db.DisableCompactions()
+		t.Cleanup(func() {
+			require.NoError(t, db.Close())
+		})
+
+		return db
+	}
+
+	makeConfig := func(oooTimeWindow int) *config.Config {
+		return &config.Config{
+			StorageConfig: config.StorageConfig{
+				TSDBConfig: &config.TSDBConfig{
+					OutOfOrderTimeWindow: int64(oooTimeWindow) * time.Minute.Milliseconds(),
+				},
+			},
+		}
+	}
+
+	series1 := labels.FromStrings("foo", "bar1")
+	addSamples := func(t *testing.T, db *DB, fromMins, toMins int64, success bool, allSamples []tsdbutil.Sample) []tsdbutil.Sample {
+		app := db.Appender(context.Background())
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			_, err := app.Append(0, series1, ts, float64(ts))
+			if success {
+				require.NoError(t, err)
+				allSamples = append(allSamples, sample{t: ts, v: float64(ts)})
+			} else {
+				require.Error(t, err)
+			}
+		}
+		require.NoError(t, app.Commit())
+		return allSamples
+	}
+
+	verifySamples := func(t *testing.T, db *DB, expSamples []tsdbutil.Sample) {
+		sort.Slice(expSamples, func(i, j int) bool {
+			return expSamples[i].T() < expSamples[j].T()
+		})
+
+		expRes := map[string][]tsdbutil.Sample{
+			series1.String(): expSamples,
+		}
+
+		q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64)
+		require.NoError(t, err)
+
+		actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
+		require.Equal(t, expRes, actRes)
+	}
+
+	doOOOCompaction := func(t *testing.T, db *DB) {
+		// WBL is not empty.
+		size, err := db.head.wbl.Size()
+		require.NoError(t, err)
+		require.Greater(t, size, int64(0))
+
+		require.Len(t, db.Blocks(), 0)
+		require.NoError(t, db.compactOOOHead())
+		require.Greater(t, len(db.Blocks()), 0)
+
+		// WBL is empty.
+		size, err = db.head.wbl.Size()
+		require.NoError(t, err)
+		require.Equal(t, int64(0), size)
+	}
+
+	t.Run("increase time window", func(t *testing.T) {
+		var allSamples []tsdbutil.Sample
+		db := getDB(30 * time.Minute.Milliseconds())
+
+		// In-order.
+		allSamples = addSamples(t, db, 300, 310, true, allSamples)
+
+		// OOO upto 30m old is success.
+		allSamples = addSamples(t, db, 281, 290, true, allSamples)
+
+		// OOO of 59m old fails.
+		s := addSamples(t, db, 251, 260, false, nil)
+		require.Len(t, s, 0)
+		verifySamples(t, db, allSamples)
+
+		oldWblPtr := fmt.Sprintf("%p", db.head.wbl)
+
+		// Increase time window and try adding again.
+		err := db.ApplyConfig(makeConfig(60))
+		require.NoError(t, err)
+		allSamples = addSamples(t, db, 251, 260, true, allSamples)
+
+		// WBL does not change.
+		newWblPtr := fmt.Sprintf("%p", db.head.wbl)
+		require.Equal(t, oldWblPtr, newWblPtr)
+
+		doOOOCompaction(t, db)
+		verifySamples(t, db, allSamples)
+	})
+
+	t.Run("decrease time window and increase again", func(t *testing.T) {
+		var allSamples []tsdbutil.Sample
+		db := getDB(60 * time.Minute.Milliseconds())
+
+		// In-order.
+		allSamples = addSamples(t, db, 300, 310, true, allSamples)
+
+		// OOO upto 59m old is success.
+		allSamples = addSamples(t, db, 251, 260, true, allSamples)
+
+		oldWblPtr := fmt.Sprintf("%p", db.head.wbl)
+		// Decrease time window.
+		err := db.ApplyConfig(makeConfig(30))
+		require.NoError(t, err)
+
+		// OOO of 49m old fails.
+		s := addSamples(t, db, 261, 270, false, nil)
+		require.Len(t, s, 0)
+
+		// WBL does not change.
+		newWblPtr := fmt.Sprintf("%p", db.head.wbl)
+		require.Equal(t, oldWblPtr, newWblPtr)
+
+		verifySamples(t, db, allSamples)
+
+		// Increase time window again and check
+		err = db.ApplyConfig(makeConfig(60))
+		require.NoError(t, err)
+		allSamples = addSamples(t, db, 261, 270, true, allSamples)
+		verifySamples(t, db, allSamples)
+
+		// WBL does not change.
+		newWblPtr = fmt.Sprintf("%p", db.head.wbl)
+		require.Equal(t, oldWblPtr, newWblPtr)
+
+		doOOOCompaction(t, db)
+		verifySamples(t, db, allSamples)
+	})
+
+	t.Run("disabled to enabled", func(t *testing.T) {
+		var allSamples []tsdbutil.Sample
+		db := getDB(0)
+
+		// In-order.
+		allSamples = addSamples(t, db, 300, 310, true, allSamples)
+
+		// OOO fails.
+		s := addSamples(t, db, 251, 260, false, nil)
+		require.Len(t, s, 0)
+		verifySamples(t, db, allSamples)
+
+		require.Nil(t, db.head.wbl)
+
+		// Increase time window and try adding again.
+		err := db.ApplyConfig(makeConfig(60))
+		require.NoError(t, err)
+		allSamples = addSamples(t, db, 251, 260, true, allSamples)
+
+		// WBL gets created.
+		require.NotNil(t, db.head.wbl)
+
+		verifySamples(t, db, allSamples)
+
+		// OOO compaction works now.
+		doOOOCompaction(t, db)
+		verifySamples(t, db, allSamples)
+	})
+
+	t.Run("enabled to disabled", func(t *testing.T) {
+		var allSamples []tsdbutil.Sample
+		db := getDB(60 * time.Minute.Milliseconds())
+
+		// In-order.
+		allSamples = addSamples(t, db, 300, 310, true, allSamples)
+
+		// OOO upto 59m old is success.
+		allSamples = addSamples(t, db, 251, 260, true, allSamples)
+
+		oldWblPtr := fmt.Sprintf("%p", db.head.wbl)
+		// Time Window to 0, hence disabled.
+		err := db.ApplyConfig(makeConfig(0))
+		require.NoError(t, err)
+
+		// OOO within old time window fails.
+		s := addSamples(t, db, 290, 309, false, nil)
+		require.Len(t, s, 0)
+
+		// WBL does not change and is not removed.
+		newWblPtr := fmt.Sprintf("%p", db.head.wbl)
+		require.Equal(t, oldWblPtr, newWblPtr)
+
+		verifySamples(t, db, allSamples)
+
+		// Compaction still works after disabling with WBL cleanup.
+		doOOOCompaction(t, db)
+		verifySamples(t, db, allSamples)
+	})
+
+	t.Run("disabled to disabled", func(t *testing.T) {
+		var allSamples []tsdbutil.Sample
+		db := getDB(0)
+
+		// In-order.
+		allSamples = addSamples(t, db, 300, 310, true, allSamples)
+
+		// OOO fails.
+		s := addSamples(t, db, 290, 309, false, nil)
+		require.Len(t, s, 0)
+		verifySamples(t, db, allSamples)
+		require.Nil(t, db.head.wbl)
+
+		// Time window to 0.
+		err := db.ApplyConfig(makeConfig(0))
+		require.NoError(t, err)
+
+		// OOO still fails.
+		s = addSamples(t, db, 290, 309, false, nil)
+		require.Len(t, s, 0)
+		verifySamples(t, db, allSamples)
+		require.Nil(t, db.head.wbl)
+	})
+}
+
+func TestNoGapAfterRestartWithOOO(t *testing.T) {
+	series1 := labels.FromStrings("foo", "bar1")
+	addSamples := func(t *testing.T, db *DB, fromMins, toMins int64, success bool) {
+		app := db.Appender(context.Background())
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			_, err := app.Append(0, series1, ts, float64(ts))
+			if success {
+				require.NoError(t, err)
+			} else {
+				require.Error(t, err)
+			}
+		}
+		require.NoError(t, app.Commit())
+	}
+
+	verifySamples := func(t *testing.T, db *DB, fromMins, toMins int64) {
+		var expSamples []tsdbutil.Sample
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			expSamples = append(expSamples, sample{t: ts, v: float64(ts)})
+		}
+
+		expRes := map[string][]tsdbutil.Sample{
+			series1.String(): expSamples,
+		}
+
+		q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64)
+		require.NoError(t, err)
+
+		actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
+		require.Equal(t, expRes, actRes)
+	}
+
+	cases := []struct {
+		inOrderMint, inOrderMaxt int64
+		oooMint, oooMaxt         int64
+		// After compaction.
+		blockRanges        [][2]int64
+		headMint, headMaxt int64
+	}{
+		{
+			300, 490,
+			489, 489,
+			[][2]int64{{300, 360}, {480, 600}},
+			360, 490,
+		},
+		{
+			300, 490,
+			479, 479,
+			[][2]int64{{300, 360}, {360, 480}},
+			360, 490,
+		},
+	}
+
+	for i, c := range cases {
+		t.Run(fmt.Sprintf("case=%d", i), func(t *testing.T) {
+			dir := t.TempDir()
+
+			opts := DefaultOptions()
+			opts.OutOfOrderTimeWindow = 30 * time.Minute.Milliseconds()
+
+			db, err := Open(dir, nil, nil, opts, nil)
+			require.NoError(t, err)
+			db.DisableCompactions()
+			t.Cleanup(func() {
+				require.NoError(t, db.Close())
+			})
+
+			// 3h10m=190m worth in-order data.
+			addSamples(t, db, c.inOrderMint, c.inOrderMaxt, true)
+			verifySamples(t, db, c.inOrderMint, c.inOrderMaxt)
+
+			// One ooo samples.
+			addSamples(t, db, c.oooMint, c.oooMaxt, true)
+			verifySamples(t, db, c.inOrderMint, c.inOrderMaxt)
+
+			// We get 2 blocks. 1 from OOO, 1 from in-order.
+			require.NoError(t, db.Compact())
+			verifyBlockRanges := func() {
+				blocks := db.Blocks()
+				require.Equal(t, len(c.blockRanges), len(blocks))
+				for j, br := range c.blockRanges {
+					require.Equal(t, br[0]*time.Minute.Milliseconds(), blocks[j].MinTime())
+					require.Equal(t, br[1]*time.Minute.Milliseconds(), blocks[j].MaxTime())
+				}
+			}
+			verifyBlockRanges()
+			require.Equal(t, c.headMint*time.Minute.Milliseconds(), db.head.MinTime())
+			require.Equal(t, c.headMaxt*time.Minute.Milliseconds(), db.head.MaxTime())
+
+			// Restart and expect all samples to be present.
+			require.NoError(t, db.Close())
+
+			db, err = Open(dir, nil, nil, opts, nil)
+			require.NoError(t, err)
+			db.DisableCompactions()
+
+			verifyBlockRanges()
+			require.Equal(t, c.headMint*time.Minute.Milliseconds(), db.head.MinTime())
+			require.Equal(t, c.headMaxt*time.Minute.Milliseconds(), db.head.MaxTime())
+			verifySamples(t, db, c.inOrderMint, c.inOrderMaxt)
+		})
+	}
+}
+
+func TestWblReplayAfterOOODisableAndRestart(t *testing.T) {
+	dir := t.TempDir()
+
+	opts := DefaultOptions()
+	opts.OutOfOrderTimeWindow = 60 * time.Minute.Milliseconds()
+	opts.AllowOverlappingQueries = true
+	opts.AllowOverlappingCompaction = true
+
+	db, err := Open(dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	db.DisableCompactions()
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	series1 := labels.FromStrings("foo", "bar1")
+	var allSamples []tsdbutil.Sample
+	addSamples := func(fromMins, toMins int64) {
+		app := db.Appender(context.Background())
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			_, err := app.Append(0, series1, ts, float64(ts))
+			require.NoError(t, err)
+			allSamples = append(allSamples, sample{t: ts, v: float64(ts)})
+		}
+		require.NoError(t, app.Commit())
+	}
+
+	// In-order samples.
+	addSamples(290, 300)
+	// OOO samples.
+	addSamples(250, 260)
+
+	verifySamples := func(expSamples []tsdbutil.Sample) {
+		sort.Slice(expSamples, func(i, j int) bool {
+			return expSamples[i].T() < expSamples[j].T()
+		})
+
+		expRes := map[string][]tsdbutil.Sample{
+			series1.String(): expSamples,
+		}
+
+		q, err := db.Querier(context.Background(), math.MinInt64, math.MaxInt64)
+		require.NoError(t, err)
+
+		actRes := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*"))
+		require.Equal(t, expRes, actRes)
+	}
+
+	verifySamples(allSamples)
+
+	// Restart DB with OOO disabled.
+	require.NoError(t, db.Close())
+	opts.OutOfOrderTimeWindow = 0
+	db, err = Open(db.dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+
+	// We can still query OOO samples when OOO is disabled.
+	verifySamples(allSamples)
+}
+
+func TestPanicOnApplyConfig(t *testing.T) {
+	dir := t.TempDir()
+
+	opts := DefaultOptions()
+	opts.OutOfOrderTimeWindow = 60 * time.Minute.Milliseconds()
+	opts.AllowOverlappingQueries = true
+
+	db, err := Open(dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	db.DisableCompactions()
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	series1 := labels.FromStrings("foo", "bar1")
+	var allSamples []tsdbutil.Sample
+	addSamples := func(fromMins, toMins int64) {
+		app := db.Appender(context.Background())
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			_, err := app.Append(0, series1, ts, float64(ts))
+			require.NoError(t, err)
+			allSamples = append(allSamples, sample{t: ts, v: float64(ts)})
+		}
+		require.NoError(t, app.Commit())
+	}
+
+	// In-order samples.
+	addSamples(290, 300)
+	// OOO samples.
+	addSamples(250, 260)
+
+	// Restart DB with OOO disabled.
+	require.NoError(t, db.Close())
+	opts.OutOfOrderTimeWindow = 0
+	db, err = Open(db.dir, nil, prometheus.NewRegistry(), opts, nil)
+	require.NoError(t, err)
+
+	// ApplyConfig with OOO enabled and expect no panic.
+	err = db.ApplyConfig(&config.Config{
+		StorageConfig: config.StorageConfig{
+			TSDBConfig: &config.TSDBConfig{
+				OutOfOrderTimeWindow: 60 * time.Minute.Milliseconds(),
+			},
+		},
+	})
+	require.NoError(t, err)
+}
+
+func TestDiskFillingUpAfterDisablingOOO(t *testing.T) {
+	dir := t.TempDir()
+
+	opts := DefaultOptions()
+	opts.OutOfOrderTimeWindow = 60 * time.Minute.Milliseconds()
+	opts.AllowOverlappingQueries = true
+
+	db, err := Open(dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+	db.DisableCompactions()
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	series1 := labels.FromStrings("foo", "bar1")
+	var allSamples []tsdbutil.Sample
+	addSamples := func(fromMins, toMins int64) {
+		app := db.Appender(context.Background())
+		for min := fromMins; min <= toMins; min++ {
+			ts := min * time.Minute.Milliseconds()
+			_, err := app.Append(0, series1, ts, float64(ts))
+			require.NoError(t, err)
+			allSamples = append(allSamples, sample{t: ts, v: float64(ts)})
+		}
+		require.NoError(t, app.Commit())
+	}
+
+	// In-order samples.
+	addSamples(290, 300)
+	// OOO samples.
+	addSamples(250, 299)
+
+	// Restart DB with OOO disabled.
+	require.NoError(t, db.Close())
+	opts.OutOfOrderTimeWindow = 0
+	db, err = Open(db.dir, nil, prometheus.NewRegistry(), opts, nil)
+	require.NoError(t, err)
+	db.DisableCompactions()
+
+	ms := db.head.series.getByHash(series1.Hash(), series1)
+	require.Greater(t, len(ms.oooMmappedChunks), 0, "OOO mmap chunk was not replayed")
+
+	checkMmapFileContents := func(contains, notContains []string) {
+		mmapDir := mmappedChunksDir(db.head.opts.ChunkDirRoot)
+		files, err := os.ReadDir(mmapDir)
+		require.NoError(t, err)
+
+		fnames := make([]string, 0, len(files))
+		for _, f := range files {
+			fnames = append(fnames, f.Name())
+		}
+
+		for _, f := range contains {
+			require.Contains(t, fnames, f)
+		}
+		for _, f := range notContains {
+			require.NotContains(t, fnames, f)
+		}
+	}
+
+	// Add in-order samples until ready for compaction..
+	addSamples(301, 500)
+
+	// Check that m-map files gets deleted properly after compactions.
+
+	checkMmapFileContents([]string{"000001", "000002"}, nil)
+	require.NoError(t, db.Compact())
+	checkMmapFileContents([]string{"000002"}, []string{"000001"})
+	require.Equal(t, 0, len(ms.oooMmappedChunks), "OOO mmap chunk was not compacted")
+
+	addSamples(501, 650)
+	checkMmapFileContents([]string{"000002", "000003"}, []string{"000001"})
+	require.NoError(t, db.Compact())
+	checkMmapFileContents(nil, []string{"000001", "000002", "000003"})
+
+	// Verify that WBL is empty.
+	files, err := os.ReadDir(db.head.wbl.Dir())
+	require.NoError(t, err)
+	require.Len(t, files, 1) // Last empty file after compaction.
+	finfo, err := files[0].Info()
+	require.NoError(t, err)
+	require.Equal(t, int64(0), finfo.Size())
+}
diff --git a/tsdb/head.go b/tsdb/head.go
index 32e85c5993..8aa5aa2c8b 100644
--- a/tsdb/head.go
+++ b/tsdb/head.go
@@ -25,9 +25,10 @@ import (
 	"github.com/go-kit/log/level"
 	"github.com/oklog/ulid"
 	"github.com/pkg/errors"
-	"github.com/prometheus/client_golang/prometheus"
 	"go.uber.org/atomic"
 
+	"github.com/prometheus/client_golang/prometheus"
+
 	"github.com/prometheus/prometheus/config"
 	"github.com/prometheus/prometheus/model/exemplar"
 	"github.com/prometheus/prometheus/model/labels"
@@ -62,15 +63,19 @@ var (
 type Head struct {
 	chunkRange               atomic.Int64
 	numSeries                atomic.Uint64
-	minTime, maxTime         atomic.Int64 // Current min and max of the samples included in the head.
+	minOOOTime, maxOOOTime   atomic.Int64 // TODO(jesusvazquez) These should be updated after garbage collection.
+	minTime, maxTime         atomic.Int64 // Current min and max of the samples included in the head. TODO(jesusvazquez) Ensure these are properly tracked.
 	minValidTime             atomic.Int64 // Mint allowed to be added to the head. It shouldn't be lower than the maxt of the last persisted block.
 	lastWALTruncationTime    atomic.Int64
 	lastMemoryTruncationTime atomic.Int64
 	lastSeriesID             atomic.Uint64
+	// All the ooo m-map chunks should be after this. This is used to truncate old ooo m-map chunks.
+	// This should be typecasted to chunks.ChunkDiskMapperRef after loading.
+	minOOOMmapRef atomic.Uint64
 
 	metrics         *headMetrics
 	opts            *HeadOptions
-	wal             *wal.WAL
+	wal, wbl        *wal.WAL
 	exemplarMetrics *ExemplarMetrics
 	exemplars       ExemplarStorage
 	logger          log.Logger
@@ -87,6 +92,7 @@ type Head struct {
 	deletedMtx sync.Mutex
 	deleted    map[chunks.HeadSeriesRef]int // Deleted series, and what WAL segment they must be kept until.
 
+	// TODO(codesome): Extend MemPostings to return only OOOPostings, Set OOOStatus, ... Like an additional map of ooo postings.
 	postings *index.MemPostings // Postings lists for terms.
 
 	tombstones *tombstones.MemTombstones
@@ -130,6 +136,8 @@ type HeadOptions struct {
 	ChunkPool            chunkenc.Pool
 	ChunkWriteBufferSize int
 	ChunkWriteQueueSize  int
+	OutOfOrderTimeWindow atomic.Int64
+	OutOfOrderCapMax     atomic.Int64
 
 	// StripeSize sets the number of entries in the hash map, it must be a power of 2.
 	// A larger StripeSize will allocate more memory up-front, but will increase performance when handling a large number of series.
@@ -142,8 +150,13 @@ type HeadOptions struct {
 	IsolationDisabled bool
 }
 
+const (
+	// DefaultOutOfOrderCapMax is the default maximum size of an in-memory out-of-order chunk.
+	DefaultOutOfOrderCapMax int64 = 32
+)
+
 func DefaultHeadOptions() *HeadOptions {
-	return &HeadOptions{
+	ho := &HeadOptions{
 		ChunkRange:           DefaultBlockDuration,
 		ChunkDirRoot:         "",
 		ChunkPool:            chunkenc.NewPool(),
@@ -153,6 +166,8 @@ func DefaultHeadOptions() *HeadOptions {
 		SeriesCallback:       &noopSeriesLifecycleCallback{},
 		IsolationDisabled:    defaultIsolationDisabled,
 	}
+	ho.OutOfOrderCapMax.Store(DefaultOutOfOrderCapMax)
+	return ho
 }
 
 // SeriesLifecycleCallback specifies a list of callbacks that will be called during a lifecycle of a series.
@@ -171,11 +186,23 @@ type SeriesLifecycleCallback interface {
 }
 
 // NewHead opens the head block in dir.
-func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, opts *HeadOptions, stats *HeadStats) (*Head, error) {
+func NewHead(r prometheus.Registerer, l log.Logger, wal, wbl *wal.WAL, opts *HeadOptions, stats *HeadStats) (*Head, error) {
 	var err error
 	if l == nil {
 		l = log.NewNopLogger()
 	}
+
+	if opts.OutOfOrderTimeWindow.Load() < 0 {
+		opts.OutOfOrderTimeWindow.Store(0)
+	}
+
+	// Time window can be set on runtime. So the capMin and capMax should be valid
+	// even if ooo is not enabled yet.
+	capMax := opts.OutOfOrderCapMax.Load()
+	if capMax <= 0 || capMax > 255 {
+		return nil, errors.Errorf("OOOCapMax of %d is invalid. must be > 0 and <= 255", capMax)
+	}
+
 	if opts.ChunkRange < 1 {
 		return nil, errors.Errorf("invalid chunk range %d", opts.ChunkRange)
 	}
@@ -193,6 +220,7 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, opts *HeadOpti
 
 	h := &Head{
 		wal:    wal,
+		wbl:    wbl,
 		logger: l,
 		opts:   opts,
 		memChunkPool: sync.Pool{
@@ -254,35 +282,40 @@ func (h *Head) resetInMemoryState() error {
 	h.chunkRange.Store(h.opts.ChunkRange)
 	h.minTime.Store(math.MaxInt64)
 	h.maxTime.Store(math.MinInt64)
+	h.minOOOTime.Store(math.MaxInt64)
+	h.maxOOOTime.Store(math.MinInt64)
 	h.lastWALTruncationTime.Store(math.MinInt64)
 	h.lastMemoryTruncationTime.Store(math.MinInt64)
 	return nil
 }
 
 type headMetrics struct {
-	activeAppenders          prometheus.Gauge
-	series                   prometheus.GaugeFunc
-	seriesCreated            prometheus.Counter
-	seriesRemoved            prometheus.Counter
-	seriesNotFound           prometheus.Counter
-	chunks                   prometheus.Gauge
-	chunksCreated            prometheus.Counter
-	chunksRemoved            prometheus.Counter
-	gcDuration               prometheus.Summary
-	samplesAppended          prometheus.Counter
-	outOfBoundSamples        prometheus.Counter
-	outOfOrderSamples        prometheus.Counter
-	walTruncateDuration      prometheus.Summary
-	walCorruptionsTotal      prometheus.Counter
-	walTotalReplayDuration   prometheus.Gauge
-	headTruncateFail         prometheus.Counter
-	headTruncateTotal        prometheus.Counter
-	checkpointDeleteFail     prometheus.Counter
-	checkpointDeleteTotal    prometheus.Counter
-	checkpointCreationFail   prometheus.Counter
-	checkpointCreationTotal  prometheus.Counter
-	mmapChunkCorruptionTotal prometheus.Counter
-	snapshotReplayErrorTotal prometheus.Counter // Will be either 0 or 1.
+	activeAppenders           prometheus.Gauge
+	series                    prometheus.GaugeFunc
+	seriesCreated             prometheus.Counter
+	seriesRemoved             prometheus.Counter
+	seriesNotFound            prometheus.Counter
+	chunks                    prometheus.Gauge
+	chunksCreated             prometheus.Counter
+	chunksRemoved             prometheus.Counter
+	gcDuration                prometheus.Summary
+	samplesAppended           prometheus.Counter
+	outOfOrderSamplesAppended prometheus.Counter
+	outOfBoundSamples         prometheus.Counter
+	outOfOrderSamples         prometheus.Counter
+	tooOldSamples             prometheus.Counter
+	walTruncateDuration       prometheus.Summary
+	walCorruptionsTotal       prometheus.Counter
+	dataTotalReplayDuration   prometheus.Gauge
+	headTruncateFail          prometheus.Counter
+	headTruncateTotal         prometheus.Counter
+	checkpointDeleteFail      prometheus.Counter
+	checkpointDeleteTotal     prometheus.Counter
+	checkpointCreationFail    prometheus.Counter
+	checkpointCreationTotal   prometheus.Counter
+	mmapChunkCorruptionTotal  prometheus.Counter
+	snapshotReplayErrorTotal  prometheus.Counter // Will be either 0 or 1.
+	oooHistogram              prometheus.Histogram
 }
 
 func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
@@ -333,7 +366,7 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
 			Name: "prometheus_tsdb_wal_corruptions_total",
 			Help: "Total number of WAL corruptions.",
 		}),
-		walTotalReplayDuration: prometheus.NewGauge(prometheus.GaugeOpts{
+		dataTotalReplayDuration: prometheus.NewGauge(prometheus.GaugeOpts{
 			Name: "prometheus_tsdb_data_replay_duration_seconds",
 			Help: "Time taken to replay the data on disk.",
 		}),
@@ -341,13 +374,21 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
 			Name: "prometheus_tsdb_head_samples_appended_total",
 			Help: "Total number of appended samples.",
 		}),
+		outOfOrderSamplesAppended: prometheus.NewCounter(prometheus.CounterOpts{
+			Name: "prometheus_tsdb_head_out_of_order_samples_appended_total",
+			Help: "Total number of appended out of order samples.",
+		}),
 		outOfBoundSamples: prometheus.NewCounter(prometheus.CounterOpts{
 			Name: "prometheus_tsdb_out_of_bound_samples_total",
-			Help: "Total number of out of bound samples ingestion failed attempts.",
+			Help: "Total number of out of bound samples ingestion failed attempts with out of order support disabled.",
 		}),
 		outOfOrderSamples: prometheus.NewCounter(prometheus.CounterOpts{
 			Name: "prometheus_tsdb_out_of_order_samples_total",
-			Help: "Total number of out of order samples ingestion failed attempts.",
+			Help: "Total number of out of order samples ingestion failed attempts due to out of order being disabled.",
+		}),
+		tooOldSamples: prometheus.NewCounter(prometheus.CounterOpts{
+			Name: "prometheus_tsdb_too_old_samples_total",
+			Help: "Total number of out of order samples ingestion failed attempts with out of support enabled, but sample outside of time window.",
 		}),
 		headTruncateFail: prometheus.NewCounter(prometheus.CounterOpts{
 			Name: "prometheus_tsdb_head_truncations_failed_total",
@@ -381,6 +422,19 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
 			Name: "prometheus_tsdb_snapshot_replay_error_total",
 			Help: "Total number snapshot replays that failed.",
 		}),
+		oooHistogram: prometheus.NewHistogram(prometheus.HistogramOpts{
+			Name: "prometheus_tsdb_sample_ooo_delta",
+			Help: "Delta in seconds by which a sample is considered out of order (reported regardless of OOO time window and whether sample is accepted or not).",
+			Buckets: []float64{
+				60 * 10,      // 10 min
+				60 * 30,      // 30 min
+				60 * 60,      // 60 min
+				60 * 60 * 2,  // 2h
+				60 * 60 * 3,  // 3h
+				60 * 60 * 6,  // 6h
+				60 * 60 * 12, // 12h
+			},
+		}),
 	}
 
 	if r != nil {
@@ -396,10 +450,12 @@ func newHeadMetrics(h *Head, r prometheus.Registerer) *headMetrics {
 			m.gcDuration,
 			m.walTruncateDuration,
 			m.walCorruptionsTotal,
-			m.walTotalReplayDuration,
+			m.dataTotalReplayDuration,
 			m.samplesAppended,
+			m.outOfOrderSamplesAppended,
 			m.outOfBoundSamples,
 			m.outOfOrderSamples,
+			m.tooOldSamples,
 			m.headTruncateFail,
 			m.headTruncateTotal,
 			m.checkpointDeleteFail,
@@ -517,8 +573,9 @@ func (h *Head) Init(minValidTime int64) error {
 	}
 
 	mmapChunkReplayStart := time.Now()
-	mmappedChunks, err := h.loadMmappedChunks(refSeries)
+	mmappedChunks, oooMmappedChunks, lastMmapRef, err := h.loadMmappedChunks(refSeries)
 	if err != nil {
+		// TODO(codesome): clear out all m-map chunks here for refSeries.
 		level.Error(h.logger).Log("msg", "Loading on-disk chunks failed", "err", err)
 		if _, ok := errors.Cause(err).(*chunks.CorruptionErr); ok {
 			h.metrics.mmapChunkCorruptionTotal.Inc()
@@ -529,7 +586,7 @@ func (h *Head) Init(minValidTime int64) error {
 
 		// If this fails, data will be recovered from WAL.
 		// Hence we wont lose any data (given WAL is not corrupt).
-		mmappedChunks, err = h.removeCorruptedMmappedChunks(err)
+		mmappedChunks, oooMmappedChunks, lastMmapRef, err = h.removeCorruptedMmappedChunks(err)
 		if err != nil {
 			return err
 		}
@@ -572,7 +629,7 @@ func (h *Head) Init(minValidTime int64) error {
 
 		// A corrupted checkpoint is a hard error for now and requires user
 		// intervention. There's likely little data that can be recovered anyway.
-		if err := h.loadWAL(wal.NewReader(sr), multiRef, mmappedChunks); err != nil {
+		if err := h.loadWAL(wal.NewReader(sr), multiRef, mmappedChunks, oooMmappedChunks); err != nil {
 			return errors.Wrap(err, "backfill checkpoint")
 		}
 		h.updateWALReplayStatusRead(startFrom)
@@ -605,7 +662,7 @@ func (h *Head) Init(minValidTime int64) error {
 		if err != nil {
 			return errors.Wrapf(err, "segment reader (offset=%d)", offset)
 		}
-		err = h.loadWAL(wal.NewReader(sr), multiRef, mmappedChunks)
+		err = h.loadWAL(wal.NewReader(sr), multiRef, mmappedChunks, oooMmappedChunks)
 		if err := sr.Close(); err != nil {
 			level.Warn(h.logger).Log("msg", "Error while closing the wal segments reader", "err", err)
 		}
@@ -615,26 +672,94 @@ func (h *Head) Init(minValidTime int64) error {
 		level.Info(h.logger).Log("msg", "WAL segment loaded", "segment", i, "maxSegment", endAt)
 		h.updateWALReplayStatusRead(i)
 	}
+	walReplayDuration := time.Since(walReplayStart)
 
-	walReplayDuration := time.Since(start)
-	h.metrics.walTotalReplayDuration.Set(walReplayDuration.Seconds())
+	wblReplayStart := time.Now()
+	if h.wbl != nil {
+		// Replay OOO WAL.
+		startFrom, endAt, e = wal.Segments(h.wbl.Dir())
+		if e != nil {
+			return errors.Wrap(e, "finding OOO WAL segments")
+		}
+		h.startWALReplayStatus(startFrom, endAt)
+
+		for i := startFrom; i <= endAt; i++ {
+			s, err := wal.OpenReadSegment(wal.SegmentName(h.wbl.Dir(), i))
+			if err != nil {
+				return errors.Wrap(err, fmt.Sprintf("open WBL segment: %d", i))
+			}
+
+			sr := wal.NewSegmentBufReader(s)
+			err = h.loadWBL(wal.NewReader(sr), multiRef, lastMmapRef)
+			if err := sr.Close(); err != nil {
+				level.Warn(h.logger).Log("msg", "Error while closing the wbl segments reader", "err", err)
+			}
+			if err != nil {
+				return err
+			}
+			level.Info(h.logger).Log("msg", "WBL segment loaded", "segment", i, "maxSegment", endAt)
+			h.updateWALReplayStatusRead(i)
+		}
+	}
+
+	wblReplayDuration := time.Since(wblReplayStart)
+
+	totalReplayDuration := time.Since(start)
+	h.metrics.dataTotalReplayDuration.Set(totalReplayDuration.Seconds())
 	level.Info(h.logger).Log(
 		"msg", "WAL replay completed",
 		"checkpoint_replay_duration", checkpointReplayDuration.String(),
-		"wal_replay_duration", time.Since(walReplayStart).String(),
-		"total_replay_duration", walReplayDuration.String(),
+		"wal_replay_duration", walReplayDuration.String(),
+		"wbl_replay_duration", wblReplayDuration.String(),
+		"total_replay_duration", totalReplayDuration.String(),
 	)
 
 	return nil
 }
 
-func (h *Head) loadMmappedChunks(refSeries map[chunks.HeadSeriesRef]*memSeries) (map[chunks.HeadSeriesRef][]*mmappedChunk, error) {
+func (h *Head) loadMmappedChunks(refSeries map[chunks.HeadSeriesRef]*memSeries) (map[chunks.HeadSeriesRef][]*mmappedChunk, map[chunks.HeadSeriesRef][]*mmappedChunk, chunks.ChunkDiskMapperRef, error) {
 	mmappedChunks := map[chunks.HeadSeriesRef][]*mmappedChunk{}
-	if err := h.chunkDiskMapper.IterateAllChunks(func(seriesRef chunks.HeadSeriesRef, chunkRef chunks.ChunkDiskMapperRef, mint, maxt int64, numSamples uint16) error {
-		if maxt < h.minValidTime.Load() {
+	oooMmappedChunks := map[chunks.HeadSeriesRef][]*mmappedChunk{}
+	var lastRef, secondLastRef chunks.ChunkDiskMapperRef
+	if err := h.chunkDiskMapper.IterateAllChunks(func(seriesRef chunks.HeadSeriesRef, chunkRef chunks.ChunkDiskMapperRef, mint, maxt int64, numSamples uint16, encoding chunkenc.Encoding) error {
+		secondLastRef = lastRef
+		lastRef = chunkRef
+		isOOO := chunkenc.IsOutOfOrderChunk(encoding)
+		if !isOOO && maxt < h.minValidTime.Load() {
 			return nil
 		}
+
+		// We ignore any chunk that doesn't have a valid encoding
+		if !chunkenc.IsValidEncoding(encoding) {
+			return nil
+		}
+
 		ms, ok := refSeries[seriesRef]
+
+		if isOOO {
+			if !ok {
+				oooMmappedChunks[seriesRef] = append(oooMmappedChunks[seriesRef], &mmappedChunk{
+					ref:        chunkRef,
+					minTime:    mint,
+					maxTime:    maxt,
+					numSamples: numSamples,
+				})
+				return nil
+			}
+
+			h.metrics.chunks.Inc()
+			h.metrics.chunksCreated.Inc()
+
+			ms.oooMmappedChunks = append(ms.oooMmappedChunks, &mmappedChunk{
+				ref:        chunkRef,
+				minTime:    mint,
+				maxTime:    maxt,
+				numSamples: numSamples,
+			})
+
+			return nil
+		}
+
 		if !ok {
 			slice := mmappedChunks[seriesRef]
 			if len(slice) > 0 && slice[len(slice)-1].maxTime >= mint {
@@ -677,45 +802,57 @@ func (h *Head) loadMmappedChunks(refSeries map[chunks.HeadSeriesRef]*memSeries)
 		}
 		return nil
 	}); err != nil {
-		return nil, errors.Wrap(err, "iterate on on-disk chunks")
+		// secondLastRef because the lastRef caused an error.
+		return nil, nil, secondLastRef, errors.Wrap(err, "iterate on on-disk chunks")
 	}
-	return mmappedChunks, nil
+	return mmappedChunks, oooMmappedChunks, lastRef, nil
 }
 
 // removeCorruptedMmappedChunks attempts to delete the corrupted mmapped chunks and if it fails, it clears all the previously
 // loaded mmapped chunks.
-func (h *Head) removeCorruptedMmappedChunks(err error) (map[chunks.HeadSeriesRef][]*mmappedChunk, error) {
+func (h *Head) removeCorruptedMmappedChunks(err error) (map[chunks.HeadSeriesRef][]*mmappedChunk, map[chunks.HeadSeriesRef][]*mmappedChunk, chunks.ChunkDiskMapperRef, error) {
+	level.Info(h.logger).Log("msg", "Deleting mmapped chunk files")
 	// We never want to preserve the in-memory series from snapshots if we are repairing m-map chunks.
 	if err := h.resetInMemoryState(); err != nil {
-		return nil, err
+		return map[chunks.HeadSeriesRef][]*mmappedChunk{}, map[chunks.HeadSeriesRef][]*mmappedChunk{}, 0, err
 	}
 
 	level.Info(h.logger).Log("msg", "Deleting mmapped chunk files")
 
 	if err := h.chunkDiskMapper.DeleteCorrupted(err); err != nil {
 		level.Info(h.logger).Log("msg", "Deletion of corrupted mmap chunk files failed, discarding chunk files completely", "err", err)
-		if err := h.chunkDiskMapper.Truncate(math.MaxInt64); err != nil {
+		if err := h.chunkDiskMapper.Truncate(math.MaxUint32); err != nil {
 			level.Error(h.logger).Log("msg", "Deletion of all mmap chunk files failed", "err", err)
 		}
-		return map[chunks.HeadSeriesRef][]*mmappedChunk{}, nil
+		return map[chunks.HeadSeriesRef][]*mmappedChunk{}, map[chunks.HeadSeriesRef][]*mmappedChunk{}, 0, nil
 	}
 
 	level.Info(h.logger).Log("msg", "Deletion of mmap chunk files successful, reattempting m-mapping the on-disk chunks")
-	mmappedChunks, err := h.loadMmappedChunks(make(map[chunks.HeadSeriesRef]*memSeries))
+	mmappedChunks, oooMmappedChunks, lastRef, err := h.loadMmappedChunks(make(map[chunks.HeadSeriesRef]*memSeries))
 	if err != nil {
 		level.Error(h.logger).Log("msg", "Loading on-disk chunks failed, discarding chunk files completely", "err", err)
-		if err := h.chunkDiskMapper.Truncate(math.MaxInt64); err != nil {
+		if err := h.chunkDiskMapper.Truncate(math.MaxUint32); err != nil {
 			level.Error(h.logger).Log("msg", "Deletion of all mmap chunk files failed after failed loading", "err", err)
 		}
 		mmappedChunks = map[chunks.HeadSeriesRef][]*mmappedChunk{}
 	}
 
-	return mmappedChunks, nil
+	return mmappedChunks, oooMmappedChunks, lastRef, nil
 }
 
-func (h *Head) ApplyConfig(cfg *config.Config) error {
+func (h *Head) ApplyConfig(cfg *config.Config, wbl *wal.WAL) {
+	oooTimeWindow := int64(0)
+	if cfg.StorageConfig.TSDBConfig != nil {
+		oooTimeWindow = cfg.StorageConfig.TSDBConfig.OutOfOrderTimeWindow
+	}
+	if oooTimeWindow < 0 {
+		oooTimeWindow = 0
+	}
+
+	h.SetOutOfOrderTimeWindow(oooTimeWindow, wbl)
+
 	if !h.opts.EnableExemplarStorage {
-		return nil
+		return
 	}
 
 	// Head uses opts.MaxExemplars in combination with opts.EnableExemplarStorage
@@ -726,12 +863,21 @@ func (h *Head) ApplyConfig(cfg *config.Config) error {
 	newSize := h.opts.MaxExemplars.Load()
 
 	if prevSize == newSize {
-		return nil
+		return
 	}
 
 	migrated := h.exemplars.(*CircularExemplarStorage).Resize(newSize)
 	level.Info(h.logger).Log("msg", "Exemplar storage resized", "from", prevSize, "to", newSize, "migrated", migrated)
-	return nil
+}
+
+// SetOutOfOrderTimeWindow updates the out of order related parameters.
+// If the Head already has a WBL set, then the wbl will be ignored.
+func (h *Head) SetOutOfOrderTimeWindow(oooTimeWindow int64, wbl *wal.WAL) {
+	if oooTimeWindow > 0 && h.wbl == nil {
+		h.wbl = wbl
+	}
+
+	h.opts.OutOfOrderTimeWindow.Store(oooTimeWindow)
 }
 
 // PostingsCardinalityStats returns top 10 highest cardinality stats By label and value names.
@@ -773,6 +919,27 @@ func (h *Head) updateMinMaxTime(mint, maxt int64) {
 	}
 }
 
+func (h *Head) updateMinOOOMaxOOOTime(mint, maxt int64) {
+	for {
+		lt := h.MinOOOTime()
+		if mint >= lt {
+			break
+		}
+		if h.minOOOTime.CompareAndSwap(lt, mint) {
+			break
+		}
+	}
+	for {
+		ht := h.MaxOOOTime()
+		if maxt <= ht {
+			break
+		}
+		if h.maxOOOTime.CompareAndSwap(ht, maxt) {
+			break
+		}
+	}
+}
+
 // SetMinValidTime sets the minimum timestamp the head can ingest.
 func (h *Head) SetMinValidTime(minValidTime int64) {
 	h.minValidTime.Store(minValidTime)
@@ -838,30 +1005,7 @@ func (h *Head) truncateMemory(mint int64) (err error) {
 	}
 
 	h.metrics.headTruncateTotal.Inc()
-	start := time.Now()
-
-	actualMint := h.gc()
-	level.Info(h.logger).Log("msg", "Head GC completed", "duration", time.Since(start))
-	h.metrics.gcDuration.Observe(time.Since(start).Seconds())
-	if actualMint > h.minTime.Load() {
-		// The actual mint of the Head is higher than the one asked to truncate.
-		appendableMinValidTime := h.appendableMinValidTime()
-		if actualMint < appendableMinValidTime {
-			h.minTime.Store(actualMint)
-			h.minValidTime.Store(actualMint)
-		} else {
-			// The actual min time is in the appendable window.
-			// So we set the mint to the appendableMinValidTime.
-			h.minTime.Store(appendableMinValidTime)
-			h.minValidTime.Store(appendableMinValidTime)
-		}
-	}
-
-	// Truncate the chunk m-mapper.
-	if err := h.chunkDiskMapper.Truncate(mint); err != nil {
-		return errors.Wrap(err, "truncate chunks.HeadReadWriter")
-	}
-	return nil
+	return h.truncateSeriesAndChunkDiskMapper("truncateMemory")
 }
 
 // WaitForPendingReadersInTimeRange waits for queries overlapping with given range to finish querying.
@@ -950,7 +1094,7 @@ func (h *Head) truncateWAL(mint int64) error {
 	}
 	// Start a new segment, so low ingestion volume TSDB don't have more WAL than
 	// needed.
-	if err := h.wal.NextSegment(); err != nil {
+	if _, err := h.wal.NextSegment(); err != nil {
 		return errors.Wrap(err, "next segment")
 	}
 	last-- // Never consider last segment for checkpoint.
@@ -1016,6 +1160,59 @@ func (h *Head) truncateWAL(mint int64) error {
 	return nil
 }
 
+// truncateOOO
+//   - truncates the OOO WBL files whose index is strictly less than lastWBLFile.
+//   - garbage collects all the m-map chunks from the memory that are less than or equal to minOOOMmapRef
+//     and then deletes the series that do not have any data anymore.
+func (h *Head) truncateOOO(lastWBLFile int, minOOOMmapRef chunks.ChunkDiskMapperRef) error {
+	curMinOOOMmapRef := chunks.ChunkDiskMapperRef(h.minOOOMmapRef.Load())
+	if minOOOMmapRef.GreaterThan(curMinOOOMmapRef) {
+		h.minOOOMmapRef.Store(uint64(minOOOMmapRef))
+		if err := h.truncateSeriesAndChunkDiskMapper("truncateOOO"); err != nil {
+			return err
+		}
+	}
+
+	return h.wbl.Truncate(lastWBLFile)
+}
+
+// truncateSeriesAndChunkDiskMapper is a helper function for truncateMemory and truncateOOO.
+// It runs GC on the Head and truncates the ChunkDiskMapper accordingly.
+func (h *Head) truncateSeriesAndChunkDiskMapper(caller string) error {
+	start := time.Now()
+	headMaxt := h.MaxTime()
+	actualMint, minOOOTime, minMmapFile := h.gc()
+	level.Info(h.logger).Log("msg", "Head GC completed", "caller", caller, "duration", time.Since(start))
+	h.metrics.gcDuration.Observe(time.Since(start).Seconds())
+
+	if actualMint > h.minTime.Load() {
+		// The actual mint of the head is higher than the one asked to truncate.
+		appendableMinValidTime := h.appendableMinValidTime()
+		if actualMint < appendableMinValidTime {
+			h.minTime.Store(actualMint)
+			h.minValidTime.Store(actualMint)
+		} else {
+			// The actual min time is in the appendable window.
+			// So we set the mint to the appendableMinValidTime.
+			h.minTime.Store(appendableMinValidTime)
+			h.minValidTime.Store(appendableMinValidTime)
+		}
+	}
+	if headMaxt-h.opts.OutOfOrderTimeWindow.Load() < minOOOTime {
+		// The allowed OOO window is lower than the min OOO time seen during GC.
+		// So it is possible that some OOO sample was inserted that was less that minOOOTime.
+		// So we play safe and set it to the min that was possible.
+		minOOOTime = headMaxt - h.opts.OutOfOrderTimeWindow.Load()
+	}
+	h.minOOOTime.Store(minOOOTime)
+
+	// Truncate the chunk m-mapper.
+	if err := h.chunkDiskMapper.Truncate(uint32(minMmapFile)); err != nil {
+		return errors.Wrap(err, "truncate chunks.HeadReadWriter by file number")
+	}
+	return nil
+}
+
 type Stats struct {
 	NumSeries         uint64
 	MinTime, MaxTime  int64
@@ -1149,14 +1346,20 @@ func (h *Head) Delete(mint, maxt int64, ms ...*labels.Matcher) error {
 }
 
 // gc removes data before the minimum timestamp from the head.
-// It returns the actual min times of the chunks present in the Head.
-func (h *Head) gc() int64 {
+// It returns
+// * The actual min times of the chunks present in the Head.
+// * The min OOO time seen during the GC.
+// * Min mmap file number seen in the series (in-order and out-of-order) after gc'ing the series.
+func (h *Head) gc() (actualInOrderMint, minOOOTime int64, minMmapFile int) {
 	// Only data strictly lower than this timestamp must be deleted.
 	mint := h.MinTime()
+	// Only ooo m-map chunks strictly lower than or equal to this ref
+	// must be deleted.
+	minOOOMmapRef := chunks.ChunkDiskMapperRef(h.minOOOMmapRef.Load())
 
 	// Drop old chunks and remember series IDs and hashes if they can be
 	// deleted entirely.
-	deleted, chunksRemoved, actualMint := h.series.gc(mint)
+	deleted, chunksRemoved, actualInOrderMint, minOOOTime, minMmapFile := h.series.gc(mint, minOOOMmapRef)
 	seriesRemoved := len(deleted)
 
 	h.metrics.seriesRemoved.Add(float64(seriesRemoved))
@@ -1186,7 +1389,7 @@ func (h *Head) gc() int64 {
 		h.deletedMtx.Unlock()
 	}
 
-	return actualMint
+	return actualInOrderMint, minOOOTime, minMmapFile
 }
 
 // Tombstones returns a new reader over the head's tombstones
@@ -1224,6 +1427,18 @@ func (h *Head) MaxTime() int64 {
 	return h.maxTime.Load()
 }
 
+// MinOOOTime returns the lowest time bound on visible data in the out of order
+// head.
+func (h *Head) MinOOOTime() int64 {
+	return h.minOOOTime.Load()
+}
+
+// MaxOOOTime returns the highest timestamp on visible data in the out of order
+// head.
+func (h *Head) MaxOOOTime() int64 {
+	return h.maxOOOTime.Load()
+}
+
 // compactable returns whether the head has a compactable range.
 // The head has a compactable range when the head time range is 1.5 times the chunk range.
 // The 0.5 acts as a buffer of the appendable window.
@@ -1241,6 +1456,9 @@ func (h *Head) Close() error {
 	if h.wal != nil {
 		errs.Add(h.wal.Close())
 	}
+	if h.wbl != nil {
+		errs.Add(h.wbl.Close())
+	}
 	if errs.Err() == nil && h.opts.EnableMemorySnapshotOnShutdown {
 		errs.Add(h.performChunkSnapshot())
 	}
@@ -1271,7 +1489,7 @@ func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool, e
 
 func (h *Head) getOrCreateWithID(id chunks.HeadSeriesRef, hash uint64, lset labels.Labels) (*memSeries, bool, error) {
 	s, created, err := h.series.getOrSet(hash, lset, func() *memSeries {
-		return newMemSeries(lset, id, h.chunkRange.Load(), h.opts.IsolationDisabled)
+		return newMemSeries(lset, id, h.chunkRange.Load(), h.opts.OutOfOrderCapMax.Load(), h.opts.IsolationDisabled)
 	})
 	if err != nil {
 		return nil, false, err
@@ -1333,7 +1551,7 @@ const (
 )
 
 // stripeSeries holds series by HeadSeriesRef ("ID") and also by hash of their labels.
-// ID-based lookups via (getByID()) are preferred over getByHash() for performance reasons.
+// ID-based lookups via getByID() are preferred over getByHash() for performance reasons.
 // It locks modulo ranges of IDs and hashes to reduce lock contention.
 // The locks are padded to not be on the same cache line. Filling the padded space
 // with the maps was profiled to be slower – likely due to the additional pointer
@@ -1375,13 +1593,16 @@ func newStripeSeries(stripeSize int, seriesCallback SeriesLifecycleCallback) *st
 // note: returning map[chunks.HeadSeriesRef]struct{} would be more accurate,
 // but the returned map goes into postings.Delete() which expects a map[storage.SeriesRef]struct
 // and there's no easy way to cast maps.
-func (s *stripeSeries) gc(mint int64) (map[storage.SeriesRef]struct{}, int, int64) {
+// minMmapFile is the min mmap file number seen in the series (in-order and out-of-order) after gc'ing the series.
+func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (_ map[storage.SeriesRef]struct{}, _ int, _, _ int64, minMmapFile int) {
 	var (
 		deleted                  = map[storage.SeriesRef]struct{}{}
 		deletedForCallback       = []labels.Labels{}
 		rmChunks                 = 0
 		actualMint         int64 = math.MaxInt64
+		minOOOTime         int64 = math.MaxInt64
 	)
+	minMmapFile = math.MaxInt32
 	// Run through all series and truncate old chunks. Mark those with no
 	// chunks left as deleted and store their ID.
 	for i := 0; i < s.size; i++ {
@@ -1390,9 +1611,32 @@ func (s *stripeSeries) gc(mint int64) (map[storage.SeriesRef]struct{}, int, int6
 		for hash, all := range s.hashes[i] {
 			for _, series := range all {
 				series.Lock()
-				rmChunks += series.truncateChunksBefore(mint)
+				rmChunks += series.truncateChunksBefore(mint, minOOOMmapRef)
 
-				if len(series.mmappedChunks) > 0 || series.headChunk != nil || series.pendingCommit {
+				if len(series.mmappedChunks) > 0 {
+					seq, _ := series.mmappedChunks[0].ref.Unpack()
+					if seq < minMmapFile {
+						minMmapFile = seq
+					}
+				}
+				if len(series.oooMmappedChunks) > 0 {
+					seq, _ := series.oooMmappedChunks[0].ref.Unpack()
+					if seq < minMmapFile {
+						minMmapFile = seq
+					}
+					for _, ch := range series.oooMmappedChunks {
+						if ch.minTime < minOOOTime {
+							minOOOTime = ch.minTime
+						}
+					}
+				}
+				if series.oooHeadChunk != nil {
+					if series.oooHeadChunk.minTime < minOOOTime {
+						minOOOTime = series.oooHeadChunk.minTime
+					}
+				}
+				if len(series.mmappedChunks) > 0 || len(series.oooMmappedChunks) > 0 ||
+					series.headChunk != nil || series.oooHeadChunk != nil || series.pendingCommit {
 					seriesMint := series.minTime()
 					if seriesMint < actualMint {
 						actualMint = seriesMint
@@ -1435,7 +1679,7 @@ func (s *stripeSeries) gc(mint int64) (map[storage.SeriesRef]struct{}, int, int6
 		actualMint = mint
 	}
 
-	return deleted, rmChunks, actualMint
+	return deleted, rmChunks, actualMint, minOOOTime, minMmapFile
 }
 
 func (s *stripeSeries) getByID(id chunks.HeadSeriesRef) *memSeries {
@@ -1528,11 +1772,16 @@ type memSeries struct {
 	//
 	// pN is the pointer to the mmappedChunk referered to by HeadChunkID=N
 	mmappedChunks []*mmappedChunk
+	headChunk     *memChunk          // Most recent chunk in memory that's still being built.
+	firstChunkID  chunks.HeadChunkID // HeadChunkID for mmappedChunks[0]
 
-	mmMaxTime    int64     // Max time of any mmapped chunk, only used during WAL replay.
-	headChunk    *memChunk // Most recent chunk in memory that's still being built.
-	chunkRange   int64
-	firstChunkID chunks.HeadChunkID // HeadChunkID for mmappedChunks[0]
+	oooMmappedChunks []*mmappedChunk    // Immutable chunks on disk containing OOO samples.
+	oooHeadChunk     *oooHeadChunk      // Most recent chunk for ooo samples in memory that's still being built.
+	firstOOOChunkID  chunks.HeadChunkID // HeadOOOChunkID for oooMmappedChunks[0]
+
+	mmMaxTime  int64 // Max time of any mmapped chunk, only used during WAL replay.
+	chunkRange int64
+	oooCapMax  uint8
 
 	nextAt int64 // Timestamp at which to cut the next chunk.
 
@@ -1551,12 +1800,13 @@ type memSeries struct {
 	pendingCommit bool // Whether there are samples waiting to be committed to this series.
 }
 
-func newMemSeries(lset labels.Labels, id chunks.HeadSeriesRef, chunkRange int64, isolationDisabled bool) *memSeries {
+func newMemSeries(lset labels.Labels, id chunks.HeadSeriesRef, chunkRange, oooCapMax int64, isolationDisabled bool) *memSeries {
 	s := &memSeries{
 		lset:       lset,
 		ref:        id,
 		chunkRange: chunkRange,
 		nextAt:     math.MinInt64,
+		oooCapMax:  uint8(oooCapMax),
 	}
 	if !isolationDisabled {
 		s.txs = newTxRing(4)
@@ -1575,6 +1825,7 @@ func (s *memSeries) minTime() int64 {
 }
 
 func (s *memSeries) maxTime() int64 {
+	// The highest timestamps will always be in the regular (non-OOO) chunks, even if OOO is enabled.
 	c := s.head()
 	if c != nil {
 		return c.maxTime
@@ -1588,26 +1839,39 @@ func (s *memSeries) maxTime() int64 {
 // truncateChunksBefore removes all chunks from the series that
 // have no timestamp at or after mint.
 // Chunk IDs remain unchanged.
-func (s *memSeries) truncateChunksBefore(mint int64) (removed int) {
+func (s *memSeries) truncateChunksBefore(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) int {
+	var removedInOrder int
 	if s.headChunk != nil && s.headChunk.maxTime < mint {
 		// If head chunk is truncated, we can truncate all mmapped chunks.
-		removed = 1 + len(s.mmappedChunks)
-		s.firstChunkID += chunks.HeadChunkID(removed)
+		removedInOrder = 1 + len(s.mmappedChunks)
+		s.firstChunkID += chunks.HeadChunkID(removedInOrder)
 		s.headChunk = nil
 		s.mmappedChunks = nil
-		return removed
 	}
 	if len(s.mmappedChunks) > 0 {
 		for i, c := range s.mmappedChunks {
 			if c.maxTime >= mint {
 				break
 			}
-			removed = i + 1
+			removedInOrder = i + 1
 		}
-		s.mmappedChunks = append(s.mmappedChunks[:0], s.mmappedChunks[removed:]...)
-		s.firstChunkID += chunks.HeadChunkID(removed)
+		s.mmappedChunks = append(s.mmappedChunks[:0], s.mmappedChunks[removedInOrder:]...)
+		s.firstChunkID += chunks.HeadChunkID(removedInOrder)
 	}
-	return removed
+
+	var removedOOO int
+	if len(s.oooMmappedChunks) > 0 {
+		for i, c := range s.oooMmappedChunks {
+			if c.ref.GreaterThan(minOOOMmapRef) {
+				break
+			}
+			removedOOO = i + 1
+		}
+		s.oooMmappedChunks = append(s.oooMmappedChunks[:0], s.oooMmappedChunks[removedOOO:]...)
+		s.firstOOOChunkID += chunks.HeadChunkID(removedOOO)
+	}
+
+	return removedInOrder + removedOOO
 }
 
 // cleanupAppendIDsBelow cleans up older appendIDs. Has to be called after
@@ -1627,6 +1891,16 @@ type memChunk struct {
 	minTime, maxTime int64
 }
 
+type oooHeadChunk struct {
+	chunk            *OOOChunk
+	minTime, maxTime int64 // can probably be removed and pulled out of the chunk instead
+}
+
+// OverlapsClosedInterval returns true if the chunk overlaps [mint, maxt].
+func (mc *oooHeadChunk) OverlapsClosedInterval(mint, maxt int64) bool {
+	return overlapsClosedInterval(mc.minTime, mc.maxTime, mint, maxt)
+}
+
 // OverlapsClosedInterval returns true if the chunk overlaps [mint, maxt].
 func (mc *memChunk) OverlapsClosedInterval(mint, maxt int64) bool {
 	return overlapsClosedInterval(mc.minTime, mc.maxTime, mint, maxt)
@@ -1655,12 +1929,15 @@ func (noopSeriesLifecycleCallback) PostCreation(labels.Labels)      {}
 func (noopSeriesLifecycleCallback) PostDeletion(...labels.Labels)   {}
 
 func (h *Head) Size() int64 {
-	var walSize int64
+	var walSize, wblSize int64
 	if h.wal != nil {
 		walSize, _ = h.wal.Size()
 	}
+	if h.wbl != nil {
+		wblSize, _ = h.wbl.Size()
+	}
 	cdmSize, _ := h.chunkDiskMapper.Size()
-	return walSize + cdmSize
+	return walSize + wblSize + cdmSize
 }
 
 func (h *RangeHead) Size() int64 {
diff --git a/tsdb/head_append.go b/tsdb/head_append.go
index 1331fbe252..cbd6ad8e2b 100644
--- a/tsdb/head_append.go
+++ b/tsdb/head_append.go
@@ -137,6 +137,8 @@ func (h *Head) appender() *headAppender {
 		minValidTime:          h.appendableMinValidTime(),
 		mint:                  math.MaxInt64,
 		maxt:                  math.MinInt64,
+		headMaxt:              h.MaxTime(),
+		oooTimeWindow:         h.opts.OutOfOrderTimeWindow.Load(),
 		samples:               h.getAppendBuffer(),
 		sampleSeries:          h.getSeriesBuffer(),
 		exemplars:             exemplarsBuf,
@@ -252,9 +254,11 @@ type exemplarWithSeriesRef struct {
 }
 
 type headAppender struct {
-	head         *Head
-	minValidTime int64 // No samples below this timestamp are allowed.
-	mint, maxt   int64
+	head          *Head
+	minValidTime  int64 // No samples below this timestamp are allowed.
+	mint, maxt    int64
+	headMaxt      int64 // We track it here to not take the lock for every sample appended.
+	oooTimeWindow int64 // Use the same for the entire append, and don't load the atomic for each sample.
 
 	series         []record.RefSeries      // New series held by this appender.
 	metadata       []record.RefMetadata    // New metadata held by this appender.
@@ -268,7 +272,9 @@ type headAppender struct {
 }
 
 func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64, v float64) (storage.SeriesRef, error) {
-	if t < a.minValidTime {
+	// For OOO inserts, this restriction is irrelevant and will be checked later once we confirm the sample is an in-order append.
+	// If OOO inserts are disabled, we may as well as check this as early as we can and avoid more work.
+	if a.oooTimeWindow == 0 && t < a.minValidTime {
 		a.head.metrics.outOfBoundSamples.Inc()
 		return 0, storage.ErrOutOfBounds
 	}
@@ -300,15 +306,25 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64
 	}
 
 	s.Lock()
-	if err := s.appendable(t, v); err != nil {
-		s.Unlock()
-		if err == storage.ErrOutOfOrderSample {
+	// TODO(codesome): If we definitely know at this point that the sample is ooo, then optimise
+	// to skip that sample from the WAL and write only in the WBL.
+	_, delta, err := s.appendable(t, v, a.headMaxt, a.minValidTime, a.oooTimeWindow)
+	if err == nil {
+		s.pendingCommit = true
+	}
+	s.Unlock()
+	if delta > 0 {
+		a.head.metrics.oooHistogram.Observe(float64(delta))
+	}
+	if err != nil {
+		switch err {
+		case storage.ErrOutOfOrderSample:
 			a.head.metrics.outOfOrderSamples.Inc()
+		case storage.ErrTooOldSample:
+			a.head.metrics.tooOldSamples.Inc()
 		}
 		return 0, err
 	}
-	s.pendingCommit = true
-	s.Unlock()
 
 	if t < a.mint {
 		a.mint = t
@@ -326,25 +342,46 @@ func (a *headAppender) Append(ref storage.SeriesRef, lset labels.Labels, t int64
 	return storage.SeriesRef(s.ref), nil
 }
 
-// appendable checks whether the given sample is valid for appending to the series.
-func (s *memSeries) appendable(t int64, v float64) error {
-	c := s.head()
-	if c == nil {
-		return nil
+// appendable checks whether the given sample is valid for appending to the series. (if we return false and no error)
+// The sample belongs to the out of order chunk if we return true and no error.
+// An error signifies the sample cannot be handled.
+func (s *memSeries) appendable(t int64, v float64, headMaxt, minValidTime, oooTimeWindow int64) (isOOO bool, oooDelta int64, err error) {
+	// Check if we can append in the in-order chunk.
+	if t >= minValidTime {
+		if s.head() == nil {
+			// The series has no sample and was freshly created.
+			return false, 0, nil
+		}
+		msMaxt := s.maxTime()
+		if t > msMaxt {
+			return false, 0, nil
+		}
+		if t == msMaxt {
+			// We are allowing exact duplicates as we can encounter them in valid cases
+			// like federation and erroring out at that time would be extremely noisy.
+			// This only checks against the latest in-order sample.
+			// The OOO headchunk has its own method to detect these duplicates.
+			if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) {
+				return false, 0, storage.ErrDuplicateSampleForTimestamp
+			}
+			// Sample is identical (ts + value) with most current (highest ts) sample in sampleBuf.
+			return false, 0, nil
+		}
 	}
 
-	if t > c.maxTime {
-		return nil
+	// The sample cannot go in the in-order chunk. Check if it can go in the out-of-order chunk.
+	if oooTimeWindow > 0 && t >= headMaxt-oooTimeWindow {
+		return true, headMaxt - t, nil
 	}
-	if t < c.maxTime {
-		return storage.ErrOutOfOrderSample
+
+	// The sample cannot go in both in-order and out-of-order chunk.
+	if oooTimeWindow > 0 {
+		return true, headMaxt - t, storage.ErrTooOldSample
 	}
-	// We are allowing exact duplicates as we can encounter them in valid cases
-	// like federation and erroring out at that time would be extremely noisy.
-	if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) {
-		return storage.ErrDuplicateSampleForTimestamp
+	if t < minValidTime {
+		return false, headMaxt - t, storage.ErrOutOfBounds
 	}
-	return nil
+	return false, headMaxt - t, storage.ErrOutOfOrderSample
 }
 
 // AppendExemplar for headAppender assumes the series ref already exists, and so it doesn't
@@ -487,6 +524,7 @@ func exemplarsForEncoding(es []exemplarWithSeriesRef) []record.RefExemplar {
 }
 
 // Commit writes to the WAL and adds the data to the Head.
+// TODO(codesome): Refactor this method to reduce indentation and make it more readable.
 func (a *headAppender) Commit() (err error) {
 	if a.closed {
 		return ErrAppenderClosed
@@ -517,24 +555,143 @@ func (a *headAppender) Commit() (err error) {
 	defer a.head.putMetadataBuffer(a.metadata)
 	defer a.head.iso.closeAppend(a.appendID)
 
-	total := len(a.samples)
-	var series *memSeries
+	var (
+		samplesAppended = len(a.samples)
+		oooAccepted     int   // number of samples out of order but accepted: with ooo enabled and within time window
+		oooRejected     int   // number of samples rejected due to: out of order but OOO support disabled.
+		tooOldRejected  int   // number of samples rejected due to: that are out of order but too old (OOO support enabled, but outside time window)
+		oobRejected     int   // number of samples rejected due to: out of bounds: with t < minValidTime (OOO support disabled)
+		inOrderMint     int64 = math.MaxInt64
+		inOrderMaxt     int64 = math.MinInt64
+		ooomint         int64 = math.MaxInt64
+		ooomaxt         int64 = math.MinInt64
+		wblSamples      []record.RefSample
+		oooMmapMarkers  map[chunks.HeadSeriesRef]chunks.ChunkDiskMapperRef
+		oooRecords      [][]byte
+		series          *memSeries
+		enc             record.Encoder
+	)
+	defer func() {
+		for i := range oooRecords {
+			a.head.putBytesBuffer(oooRecords[i][:0])
+		}
+	}()
+	collectOOORecords := func() {
+		if a.head.wbl == nil {
+			// WBL is not enabled. So no need to collect.
+			wblSamples = nil
+			oooMmapMarkers = nil
+			return
+		}
+		// The m-map happens before adding a new sample. So we collect
+		// the m-map markers first, and then samples.
+		// WBL Graphically:
+		//   WBL Before this Commit(): [old samples before this commit for chunk 1]
+		//   WBL After this Commit():  [old samples before this commit for chunk 1][new samples in this commit for chunk 1]mmapmarker1[samples for chunk 2]mmapmarker2[samples for chunk 3]
+		if oooMmapMarkers != nil {
+			markers := make([]record.RefMmapMarker, 0, len(oooMmapMarkers))
+			for ref, mmapRef := range oooMmapMarkers {
+				markers = append(markers, record.RefMmapMarker{
+					Ref:     ref,
+					MmapRef: mmapRef,
+				})
+			}
+			r := enc.MmapMarkers(markers, a.head.getBytesBuffer())
+			oooRecords = append(oooRecords, r)
+		}
+
+		if len(wblSamples) > 0 {
+			r := enc.Samples(wblSamples, a.head.getBytesBuffer())
+			oooRecords = append(oooRecords, r)
+		}
+
+		wblSamples = nil
+		oooMmapMarkers = nil
+	}
 	for i, s := range a.samples {
 		series = a.sampleSeries[i]
 		series.Lock()
-		ok, chunkCreated := series.append(s.T, s.V, a.appendID, a.head.chunkDiskMapper)
-		series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow)
-		series.pendingCommit = false
-		series.Unlock()
 
-		if !ok {
-			total--
-			a.head.metrics.outOfOrderSamples.Inc()
+		oooSample, _, err := series.appendable(s.T, s.V, a.headMaxt, a.minValidTime, a.oooTimeWindow)
+		switch err {
+		case storage.ErrOutOfOrderSample:
+			samplesAppended--
+			oooRejected++
+		case storage.ErrOutOfBounds:
+			samplesAppended--
+			oobRejected++
+		case storage.ErrTooOldSample:
+			samplesAppended--
+			tooOldRejected++
+		case nil:
+			// Do nothing.
+		default:
+			samplesAppended--
 		}
+
+		var ok, chunkCreated bool
+
+		if err == nil && oooSample {
+			// Sample is OOO and OOO handling is enabled
+			// and the delta is within the OOO tolerance.
+			var mmapRef chunks.ChunkDiskMapperRef
+			ok, chunkCreated, mmapRef = series.insert(s.T, s.V, a.head.chunkDiskMapper)
+			if chunkCreated {
+				r, ok := oooMmapMarkers[series.ref]
+				if !ok || r != 0 {
+					// !ok means there are no markers collected for these samples yet. So we first flush the samples
+					// before setting this m-map marker.
+
+					// r != 0 means we have already m-mapped a chunk for this series in the same Commit().
+					// Hence, before we m-map again, we should add the samples and m-map markers
+					// seen till now to the WBL records.
+					collectOOORecords()
+				}
+
+				if oooMmapMarkers == nil {
+					oooMmapMarkers = make(map[chunks.HeadSeriesRef]chunks.ChunkDiskMapperRef)
+				}
+				oooMmapMarkers[series.ref] = mmapRef
+			}
+			if ok {
+				wblSamples = append(wblSamples, s)
+				if s.T < ooomint {
+					ooomint = s.T
+				}
+				if s.T > ooomaxt {
+					ooomaxt = s.T
+				}
+				oooAccepted++
+			} else {
+				// Sample is an exact duplicate of the last sample.
+				// NOTE: We can only detect updates if they clash with a sample in the OOOHeadChunk,
+				// not with samples in already flushed OOO chunks.
+				// TODO(codesome): Add error reporting? It depends on addressing https://github.com/prometheus/prometheus/discussions/10305.
+				samplesAppended--
+			}
+		} else if err == nil {
+			ok, chunkCreated = series.append(s.T, s.V, a.appendID, a.head.chunkDiskMapper)
+			if ok {
+				if s.T < inOrderMint {
+					inOrderMint = s.T
+				}
+				if s.T > inOrderMaxt {
+					inOrderMaxt = s.T
+				}
+			} else {
+				// The sample is an exact duplicate, and should be silently dropped.
+				samplesAppended--
+			}
+		}
+
 		if chunkCreated {
 			a.head.metrics.chunks.Inc()
 			a.head.metrics.chunksCreated.Inc()
 		}
+
+		series.cleanupAppendIDsBelow(a.cleanupAppendIDsBelow)
+		series.pendingCommit = false
+		series.Unlock()
 	}
 
 	for i, m := range a.metadata {
@@ -544,12 +701,48 @@ func (a *headAppender) Commit() (err error) {
 		series.Unlock()
 	}
 
-	a.head.metrics.samplesAppended.Add(float64(total))
-	a.head.updateMinMaxTime(a.mint, a.maxt)
+	a.head.metrics.outOfOrderSamples.Add(float64(oooRejected))
+	a.head.metrics.outOfBoundSamples.Add(float64(oobRejected))
+	a.head.metrics.tooOldSamples.Add(float64(tooOldRejected))
+	a.head.metrics.samplesAppended.Add(float64(samplesAppended))
+	a.head.metrics.outOfOrderSamplesAppended.Add(float64(oooAccepted))
+	a.head.updateMinMaxTime(inOrderMint, inOrderMaxt)
+	a.head.updateMinOOOMaxOOOTime(ooomint, ooomaxt)
 
+	collectOOORecords()
+	if a.head.wbl != nil {
+		if err := a.head.wbl.Log(oooRecords...); err != nil {
+			// TODO(codesome): Currently WBL logging of ooo samples is best effort here since we cannot try logging
+			// until we have found what samples become OOO. We can try having a metric for this failure.
+			// Returning the error here is not correct because we have already put the samples into the memory,
+			// hence the append/insert was a success.
+			level.Error(a.head.logger).Log("msg", "Failed to log out of order samples into the WAL", "err", err)
+		}
+	}
 	return nil
 }
 
+// insert is like append, except it inserts. Used for OOO samples.
+func (s *memSeries) insert(t int64, v float64, chunkDiskMapper *chunks.ChunkDiskMapper) (inserted, chunkCreated bool, mmapRef chunks.ChunkDiskMapperRef) {
+	c := s.oooHeadChunk
+	if c == nil || c.chunk.NumSamples() == int(s.oooCapMax) {
+		// Note: If no new samples come in then we rely on compaction to clean up stale in-memory OOO chunks.
+		c, mmapRef = s.cutNewOOOHeadChunk(t, chunkDiskMapper)
+		chunkCreated = true
+	}
+
+	ok := c.chunk.Insert(t, v)
+	if ok {
+		if chunkCreated || t < c.minTime {
+			c.minTime = t
+		}
+		if chunkCreated || t > c.maxTime {
+			c.maxTime = t
+		}
+	}
+	return ok, chunkCreated, mmapRef
+}
+
 // append adds the sample (t, v) to the series. The caller also has to provide
 // the appendID for isolation. (The appendID can be zero, which results in no
 // isolation for this append.)
@@ -567,7 +760,7 @@ func (s *memSeries) append(t int64, v float64, appendID uint64, chunkDiskMapper
 			// Out of order sample. Sample timestamp is already in the mmapped chunks, so ignore it.
 			return false, false
 		}
-		// There is no chunk in this series yet, create the first chunk for the sample.
+		// There is no head chunk in this series yet, create the first chunk for the sample.
 		c = s.cutNewHeadChunk(t, chunkDiskMapper)
 		chunkCreated = true
 	}
@@ -651,6 +844,36 @@ func (s *memSeries) cutNewHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDis
 	return s.headChunk
 }
 
+func (s *memSeries) cutNewOOOHeadChunk(mint int64, chunkDiskMapper *chunks.ChunkDiskMapper) (*oooHeadChunk, chunks.ChunkDiskMapperRef) {
+	ref := s.mmapCurrentOOOHeadChunk(chunkDiskMapper)
+
+	s.oooHeadChunk = &oooHeadChunk{
+		chunk:   NewOOOChunk(),
+		minTime: mint,
+		maxTime: math.MinInt64,
+	}
+
+	return s.oooHeadChunk, ref
+}
+
+func (s *memSeries) mmapCurrentOOOHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper) chunks.ChunkDiskMapperRef {
+	if s.oooHeadChunk == nil {
+		// There is no head chunk, so nothing to m-map here.
+		return 0
+	}
+	xor, _ := s.oooHeadChunk.chunk.ToXOR() // Encode to XorChunk which is more compact and implements all of the needed functionality.
+	oooXor := &chunkenc.OOOXORChunk{XORChunk: xor}
+	chunkRef := chunkDiskMapper.WriteChunk(s.ref, s.oooHeadChunk.minTime, s.oooHeadChunk.maxTime, oooXor, handleChunkWriteError)
+	s.oooMmappedChunks = append(s.oooMmappedChunks, &mmappedChunk{
+		ref:        chunkRef,
+		numSamples: uint16(xor.NumSamples()),
+		minTime:    s.oooHeadChunk.minTime,
+		maxTime:    s.oooHeadChunk.maxTime,
+	})
+	s.oooHeadChunk = nil
+	return chunkRef
+}
+
 func (s *memSeries) mmapCurrentHeadChunk(chunkDiskMapper *chunks.ChunkDiskMapper) {
 	if s.headChunk == nil {
 		// There is no head chunk, so nothing to m-map here.
diff --git a/tsdb/head_bench_test.go b/tsdb/head_bench_test.go
index c0f07a00f2..2f8e0ba374 100644
--- a/tsdb/head_bench_test.go
+++ b/tsdb/head_bench_test.go
@@ -30,7 +30,7 @@ func BenchmarkHeadStripeSeriesCreate(b *testing.B) {
 	opts := DefaultHeadOptions()
 	opts.ChunkRange = 1000
 	opts.ChunkDirRoot = chunkDir
-	h, err := NewHead(nil, nil, nil, opts, nil)
+	h, err := NewHead(nil, nil, nil, nil, opts, nil)
 	require.NoError(b, err)
 	defer h.Close()
 
@@ -45,7 +45,7 @@ func BenchmarkHeadStripeSeriesCreateParallel(b *testing.B) {
 	opts := DefaultHeadOptions()
 	opts.ChunkRange = 1000
 	opts.ChunkDirRoot = chunkDir
-	h, err := NewHead(nil, nil, nil, opts, nil)
+	h, err := NewHead(nil, nil, nil, nil, opts, nil)
 	require.NoError(b, err)
 	defer h.Close()
 
@@ -69,7 +69,7 @@ func BenchmarkHeadStripeSeriesCreate_PreCreationFailure(b *testing.B) {
 	// Mock the PreCreation() callback to fail on each series.
 	opts.SeriesCallback = failingSeriesLifecycleCallback{}
 
-	h, err := NewHead(nil, nil, nil, opts, nil)
+	h, err := NewHead(nil, nil, nil, nil, opts, nil)
 	require.NoError(b, err)
 	defer h.Close()
 
diff --git a/tsdb/head_read.go b/tsdb/head_read.go
index ca34b9bbdd..5b2a70c03d 100644
--- a/tsdb/head_read.go
+++ b/tsdb/head_read.go
@@ -183,11 +183,20 @@ func (h *headIndexReader) Series(ref storage.SeriesRef, lbls *labels.Labels, chk
 	return nil
 }
 
-// headChunkID returns the HeadChunkID corresponding to .mmappedChunks[pos]
+// headChunkID returns the HeadChunkID referred to by the given position.
+// * 0 <= pos < len(s.mmappedChunks) refer to s.mmappedChunks[pos]
+// * pos == len(s.mmappedChunks) refers to s.headChunk
 func (s *memSeries) headChunkID(pos int) chunks.HeadChunkID {
 	return chunks.HeadChunkID(pos) + s.firstChunkID
 }
 
+// oooHeadChunkID returns the HeadChunkID referred to by the given position.
+// * 0 <= pos < len(s.oooMmappedChunks) refer to s.oooMmappedChunks[pos]
+// * pos == len(s.oooMmappedChunks) refers to s.oooHeadChunk
+func (s *memSeries) oooHeadChunkID(pos int) chunks.HeadChunkID {
+	return chunks.HeadChunkID(pos) + s.firstOOOChunkID
+}
+
 // LabelValueFor returns label value for the given label name in the series referred to by ID.
 func (h *headIndexReader) LabelValueFor(id storage.SeriesRef, label string) (string, error) {
 	memSeries := h.head.series.getByID(chunks.HeadSeriesRef(id))
@@ -258,8 +267,8 @@ func (h *headChunkReader) Close() error {
 }
 
 // Chunk returns the chunk for the reference number.
-func (h *headChunkReader) Chunk(ref chunks.ChunkRef) (chunkenc.Chunk, error) {
-	sid, cid := chunks.HeadChunkRef(ref).Unpack()
+func (h *headChunkReader) Chunk(meta chunks.Meta) (chunkenc.Chunk, error) {
+	sid, cid := chunks.HeadChunkRef(meta.Ref).Unpack()
 
 	s := h.head.series.getByID(sid)
 	// This means that the series has been garbage collected.
@@ -330,6 +339,260 @@ func (s *memSeries) chunk(id chunks.HeadChunkID, chunkDiskMapper *chunks.ChunkDi
 	return mc, true, nil
 }
 
+// oooMergedChunk returns the requested chunk based on the given chunks.Meta
+// reference from memory or by m-mapping it from the disk. The returned chunk
+// might be a merge of all the overlapping chunks, if any, amongst all the
+// chunks in the OOOHead.
+// This function is not thread safe unless the caller holds a lock.
+func (s *memSeries) oooMergedChunk(meta chunks.Meta, cdm *chunks.ChunkDiskMapper, mint, maxt int64) (chunk *mergedOOOChunks, err error) {
+	_, cid := chunks.HeadChunkRef(meta.Ref).Unpack()
+
+	// ix represents the index of chunk in the s.mmappedChunks slice. The chunk meta's are
+	// incremented by 1 when new chunk is created, hence (meta - firstChunkID) gives the slice index.
+	// The max index for the s.mmappedChunks slice can be len(s.mmappedChunks)-1, hence if the ix
+	// is len(s.mmappedChunks), it represents the next chunk, which is the head chunk.
+	ix := int(cid) - int(s.firstOOOChunkID)
+	if ix < 0 || ix > len(s.oooMmappedChunks) {
+		return nil, storage.ErrNotFound
+	}
+
+	if ix == len(s.oooMmappedChunks) {
+		if s.oooHeadChunk == nil {
+			return nil, errors.New("invalid ooo head chunk")
+		}
+	}
+
+	// We create a temporary slice of chunk metas to hold the information of all
+	// possible chunks that may overlap with the requested chunk.
+	tmpChks := make([]chunkMetaAndChunkDiskMapperRef, 0, len(s.oooMmappedChunks))
+
+	oooHeadRef := chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(len(s.oooMmappedChunks))))
+	if s.oooHeadChunk != nil && s.oooHeadChunk.OverlapsClosedInterval(mint, maxt) {
+		// We only want to append the head chunk if this chunk existed when
+		// Series() was called. This brings consistency in case new data
+		// is added in between Series() and Chunk() calls.
+		if oooHeadRef == meta.OOOLastRef {
+			tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{
+				meta: chunks.Meta{
+					// Ignoring samples added before and after the last known min and max time for this chunk.
+					MinTime: meta.OOOLastMinTime,
+					MaxTime: meta.OOOLastMaxTime,
+					Ref:     oooHeadRef,
+				},
+			})
+		}
+	}
+
+	for i, c := range s.oooMmappedChunks {
+		chunkRef := chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(i)))
+		// We can skip chunks that came in later than the last known OOOLastRef.
+		if chunkRef > meta.OOOLastRef {
+			break
+		}
+
+		if chunkRef == meta.OOOLastRef {
+			tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{
+				meta: chunks.Meta{
+					MinTime: meta.OOOLastMinTime,
+					MaxTime: meta.OOOLastMaxTime,
+					Ref:     chunkRef,
+				},
+				ref:      c.ref,
+				origMinT: c.minTime,
+				origMaxT: c.maxTime,
+			})
+		} else if c.OverlapsClosedInterval(mint, maxt) {
+			tmpChks = append(tmpChks, chunkMetaAndChunkDiskMapperRef{
+				meta: chunks.Meta{
+					MinTime: c.minTime,
+					MaxTime: c.maxTime,
+					Ref:     chunkRef,
+				},
+				ref: c.ref,
+			})
+		}
+	}
+
+	// Next we want to sort all the collected chunks by min time so we can find
+	// those that overlap and stop when we know the rest don't.
+	sort.Sort(byMinTimeAndMinRef(tmpChks))
+
+	mc := &mergedOOOChunks{}
+	absoluteMax := int64(math.MinInt64)
+	for _, c := range tmpChks {
+		if c.meta.Ref != meta.Ref && (len(mc.chunks) == 0 || c.meta.MinTime > absoluteMax) {
+			continue
+		}
+		if c.meta.Ref == oooHeadRef {
+			var xor *chunkenc.XORChunk
+			// If head chunk min and max time match the meta OOO markers
+			// that means that the chunk has not expanded so we can append
+			// it as it is.
+			if s.oooHeadChunk.minTime == meta.OOOLastMinTime && s.oooHeadChunk.maxTime == meta.OOOLastMaxTime {
+				xor, err = s.oooHeadChunk.chunk.ToXOR() // TODO(jesus.vazquez) (This is an optimization idea that has no priority and might not be that useful) See if we could use a copy of the underlying slice. That would leave the more expensive ToXOR() function only for the usecase where Bytes() is called.
+			} else {
+				// We need to remove samples that are outside of the markers
+				xor, err = s.oooHeadChunk.chunk.ToXORBetweenTimestamps(meta.OOOLastMinTime, meta.OOOLastMaxTime)
+			}
+			if err != nil {
+				return nil, errors.Wrap(err, "failed to convert ooo head chunk to xor chunk")
+			}
+			c.meta.Chunk = xor
+		} else {
+			chk, err := cdm.Chunk(c.ref)
+			if err != nil {
+				if _, ok := err.(*chunks.CorruptionErr); ok {
+					return nil, errors.Wrap(err, "invalid ooo mmapped chunk")
+				}
+				return nil, err
+			}
+			if c.meta.Ref == meta.OOOLastRef &&
+				(c.origMinT != meta.OOOLastMinTime || c.origMaxT != meta.OOOLastMaxTime) {
+				// The head expanded and was memory mapped so now we need to
+				// wrap the chunk within a chunk that doesnt allows us to iterate
+				// through samples out of the OOOLastMinT and OOOLastMaxT
+				// markers.
+				c.meta.Chunk = boundedChunk{chk, meta.OOOLastMinTime, meta.OOOLastMaxTime}
+			} else {
+				c.meta.Chunk = chk
+			}
+		}
+		mc.chunks = append(mc.chunks, c.meta)
+		if c.meta.MaxTime > absoluteMax {
+			absoluteMax = c.meta.MaxTime
+		}
+	}
+
+	return mc, nil
+}
+
+var _ chunkenc.Chunk = &mergedOOOChunks{}
+
+// mergedOOOChunks holds the list of overlapping chunks. This struct satisfies
+// chunkenc.Chunk.
+type mergedOOOChunks struct {
+	chunks []chunks.Meta
+}
+
+// Bytes is a very expensive method because its calling the iterator of all the
+// chunks in the mergedOOOChunk and building a new chunk with the samples.
+func (o mergedOOOChunks) Bytes() []byte {
+	xc := chunkenc.NewXORChunk()
+	app, err := xc.Appender()
+	if err != nil {
+		panic(err)
+	}
+	it := o.Iterator(nil)
+	for it.Next() {
+		t, v := it.At()
+		app.Append(t, v)
+	}
+
+	return xc.Bytes()
+}
+
+func (o mergedOOOChunks) Encoding() chunkenc.Encoding {
+	return chunkenc.EncXOR
+}
+
+func (o mergedOOOChunks) Appender() (chunkenc.Appender, error) {
+	return nil, errors.New("can't append to mergedOOOChunks")
+}
+
+func (o mergedOOOChunks) Iterator(iterator chunkenc.Iterator) chunkenc.Iterator {
+	iterators := make([]chunkenc.Iterator, 0, len(o.chunks))
+	for _, c := range o.chunks {
+		iterators = append(iterators, c.Chunk.Iterator(nil))
+	}
+	return storage.NewChainSampleIterator(iterators)
+}
+
+func (o mergedOOOChunks) NumSamples() int {
+	samples := 0
+	for _, c := range o.chunks {
+		samples += c.Chunk.NumSamples()
+	}
+	return samples
+}
+
+func (o mergedOOOChunks) Compact() {}
+
+var _ chunkenc.Chunk = &boundedChunk{}
+
+// boundedChunk is an implementation of chunkenc.Chunk that uses a
+// boundedIterator that only iterates through samples which timestamps are
+// >= minT and <= maxT
+type boundedChunk struct {
+	chunkenc.Chunk
+	minT int64
+	maxT int64
+}
+
+func (b boundedChunk) Bytes() []byte {
+	xor := chunkenc.NewXORChunk()
+	a, _ := xor.Appender()
+	it := b.Iterator(nil)
+	for it.Next() {
+		t, v := it.At()
+		a.Append(t, v)
+	}
+	return xor.Bytes()
+}
+
+func (b boundedChunk) Iterator(iterator chunkenc.Iterator) chunkenc.Iterator {
+	it := b.Chunk.Iterator(iterator)
+	if it == nil {
+		panic("iterator shouldn't be nil")
+	}
+	return boundedIterator{it, b.minT, b.maxT}
+}
+
+var _ chunkenc.Iterator = &boundedIterator{}
+
+// boundedIterator is an implementation of Iterator that only iterates through
+// samples which timestamps are >= minT and <= maxT
+type boundedIterator struct {
+	chunkenc.Iterator
+	minT int64
+	maxT int64
+}
+
+// Next the first time its called it will advance as many positions as necessary
+// until its able to find a sample within the bounds minT and maxT.
+// If there are samples within bounds it will advance one by one amongst them.
+// If there are no samples within bounds it will return false.
+func (b boundedIterator) Next() bool {
+	for b.Iterator.Next() {
+		t, _ := b.Iterator.At()
+		if t < b.minT {
+			continue
+		} else if t > b.maxT {
+			return false
+		}
+		return true
+	}
+	return false
+}
+
+func (b boundedIterator) Seek(t int64) bool {
+	if t < b.minT {
+		// We must seek at least up to b.minT if it is asked for something before that.
+		ok := b.Iterator.Seek(b.minT)
+		if !ok {
+			return false
+		}
+		t, _ := b.Iterator.At()
+		return t <= b.maxT
+	}
+	if t > b.maxT {
+		// We seek anyway so that the subsequent Next() calls will also return false.
+		b.Iterator.Seek(t)
+		return false
+	}
+	return b.Iterator.Seek(t)
+}
+
+// safeChunk makes sure that the chunk can be accessed without a race condition
 type safeChunk struct {
 	chunkenc.Chunk
 	s               *memSeries
diff --git a/tsdb/head_read_test.go b/tsdb/head_read_test.go
new file mode 100644
index 0000000000..4c3ba885bb
--- /dev/null
+++ b/tsdb/head_read_test.go
@@ -0,0 +1,178 @@
+// Copyright 2021 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tsdb
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/prometheus/prometheus/tsdb/chunkenc"
+)
+
+func TestBoundedChunk(t *testing.T) {
+	tests := []struct {
+		name           string
+		inputChunk     chunkenc.Chunk
+		inputMinT      int64
+		inputMaxT      int64
+		initialSeek    int64
+		seekIsASuccess bool
+		expSamples     []sample
+	}{
+		{
+			name:       "if there are no samples it returns nothing",
+			inputChunk: newTestChunk(0),
+			expSamples: nil,
+		},
+		{
+			name:       "bounds represent a single sample",
+			inputChunk: newTestChunk(10),
+			expSamples: []sample{
+				{0, 0},
+			},
+		},
+		{
+			name:       "if there are bounds set only samples within them are returned",
+			inputChunk: newTestChunk(10),
+			inputMinT:  1,
+			inputMaxT:  8,
+			expSamples: []sample{
+				{1, 1},
+				{2, 2},
+				{3, 3},
+				{4, 4},
+				{5, 5},
+				{6, 6},
+				{7, 7},
+				{8, 8},
+			},
+		},
+		{
+			name:       "if bounds set and only maxt is less than actual maxt",
+			inputChunk: newTestChunk(10),
+			inputMinT:  0,
+			inputMaxT:  5,
+			expSamples: []sample{
+				{0, 0},
+				{1, 1},
+				{2, 2},
+				{3, 3},
+				{4, 4},
+				{5, 5},
+			},
+		},
+		{
+			name:       "if bounds set and only mint is more than actual mint",
+			inputChunk: newTestChunk(10),
+			inputMinT:  5,
+			inputMaxT:  9,
+			expSamples: []sample{
+				{5, 5},
+				{6, 6},
+				{7, 7},
+				{8, 8},
+				{9, 9},
+			},
+		},
+		{
+			name:           "if there are bounds set with seek before mint",
+			inputChunk:     newTestChunk(10),
+			inputMinT:      3,
+			inputMaxT:      7,
+			initialSeek:    1,
+			seekIsASuccess: true,
+			expSamples: []sample{
+				{3, 3},
+				{4, 4},
+				{5, 5},
+				{6, 6},
+				{7, 7},
+			},
+		},
+		{
+			name:           "if there are bounds set with seek between mint and maxt",
+			inputChunk:     newTestChunk(10),
+			inputMinT:      3,
+			inputMaxT:      7,
+			initialSeek:    5,
+			seekIsASuccess: true,
+			expSamples: []sample{
+				{5, 5},
+				{6, 6},
+				{7, 7},
+			},
+		},
+		{
+			name:           "if there are bounds set with seek after maxt",
+			inputChunk:     newTestChunk(10),
+			inputMinT:      3,
+			inputMaxT:      7,
+			initialSeek:    8,
+			seekIsASuccess: false,
+		},
+	}
+	for _, tc := range tests {
+		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
+			chunk := boundedChunk{tc.inputChunk, tc.inputMinT, tc.inputMaxT}
+
+			// Testing Bytes()
+			expChunk := chunkenc.NewXORChunk()
+			if tc.inputChunk.NumSamples() > 0 {
+				app, err := expChunk.Appender()
+				require.NoError(t, err)
+				for ts := tc.inputMinT; ts <= tc.inputMaxT; ts++ {
+					app.Append(ts, float64(ts))
+				}
+			}
+			require.Equal(t, expChunk.Bytes(), chunk.Bytes())
+
+			var samples []sample
+			it := chunk.Iterator(nil)
+
+			if tc.initialSeek != 0 {
+				// Testing Seek()
+				ok := it.Seek(tc.initialSeek)
+				require.Equal(t, tc.seekIsASuccess, ok)
+				if ok {
+					t, v := it.At()
+					samples = append(samples, sample{t, v})
+				}
+			}
+
+			// Testing Next()
+			for it.Next() {
+				t, v := it.At()
+				samples = append(samples, sample{t, v})
+			}
+
+			// it.Next() should keep returning false.
+			for i := 0; i < 10; i++ {
+				require.False(t, it.Next())
+			}
+
+			require.Equal(t, tc.expSamples, samples)
+		})
+	}
+}
+
+func newTestChunk(numSamples int) chunkenc.Chunk {
+	xor := chunkenc.NewXORChunk()
+	a, _ := xor.Appender()
+	for i := 0; i < numSamples; i++ {
+		a.Append(int64(i), float64(i))
+	}
+	return xor
+}
diff --git a/tsdb/head_test.go b/tsdb/head_test.go
index 7c580406ce..489dad65c9 100644
--- a/tsdb/head_test.go
+++ b/tsdb/head_test.go
@@ -49,7 +49,7 @@ import (
 	"github.com/prometheus/prometheus/tsdb/wal"
 )
 
-func newTestHead(t testing.TB, chunkRange int64, compressWAL bool) (*Head, *wal.WAL) {
+func newTestHead(t testing.TB, chunkRange int64, compressWAL, oooEnabled bool) (*Head, *wal.WAL) {
 	dir := t.TempDir()
 	wlog, err := wal.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, compressWAL)
 	require.NoError(t, err)
@@ -59,18 +59,23 @@ func newTestHead(t testing.TB, chunkRange int64, compressWAL bool) (*Head, *wal.
 	opts.ChunkDirRoot = dir
 	opts.EnableExemplarStorage = true
 	opts.MaxExemplars.Store(config.DefaultExemplarsConfig.MaxExemplars)
+	if oooEnabled {
+		opts.OutOfOrderTimeWindow.Store(10 * time.Minute.Milliseconds())
+	}
 
-	h, err := NewHead(nil, nil, wlog, opts, nil)
+	h, err := NewHead(nil, nil, wlog, nil, opts, nil)
 	require.NoError(t, err)
 
-	require.NoError(t, h.chunkDiskMapper.IterateAllChunks(func(_ chunks.HeadSeriesRef, _ chunks.ChunkDiskMapperRef, _, _ int64, _ uint16) error { return nil }))
+	require.NoError(t, h.chunkDiskMapper.IterateAllChunks(func(_ chunks.HeadSeriesRef, _ chunks.ChunkDiskMapperRef, _, _ int64, _ uint16, _ chunkenc.Encoding) error {
+		return nil
+	}))
 
 	return h, wlog
 }
 
 func BenchmarkCreateSeries(b *testing.B) {
 	series := genSeries(b.N, 10, 0, 0)
-	h, _ := newTestHead(b, 10000, false)
+	h, _ := newTestHead(b, 10000, false, false)
 	defer func() {
 		require.NoError(b, h.Close())
 	}()
@@ -224,7 +229,7 @@ func BenchmarkLoadWAL(b *testing.B) {
 						require.NoError(b, err)
 						for k := 0; k < c.batches*c.seriesPerBatch; k++ {
 							// Create one mmapped chunk per series, with one sample at the given time.
-							s := newMemSeries(labels.Labels{}, chunks.HeadSeriesRef(k)*101, c.mmappedChunkT, defaultIsolationDisabled)
+							s := newMemSeries(labels.Labels{}, chunks.HeadSeriesRef(k)*101, c.mmappedChunkT, 1, defaultIsolationDisabled)
 							s.append(c.mmappedChunkT, 42, 0, chunkDiskMapper)
 							s.mmapCurrentHeadChunk(chunkDiskMapper)
 						}
@@ -255,7 +260,7 @@ func BenchmarkLoadWAL(b *testing.B) {
 						opts := DefaultHeadOptions()
 						opts.ChunkRange = 1000
 						opts.ChunkDirRoot = w.Dir()
-						h, err := NewHead(nil, nil, w, opts, nil)
+						h, err := NewHead(nil, nil, w, nil, opts, nil)
 						require.NoError(b, err)
 						h.Init(0)
 					}
@@ -271,7 +276,7 @@ func BenchmarkLoadWAL(b *testing.B) {
 // While appending the samples to the head it concurrently queries them from multiple go routines and verifies that the
 // returned results are correct.
 func TestHead_HighConcurrencyReadAndWrite(t *testing.T) {
-	head, _ := newTestHead(t, DefaultBlockDuration, false)
+	head, _ := newTestHead(t, DefaultBlockDuration, false, false)
 	defer func() {
 		require.NoError(t, head.Close())
 	}()
@@ -487,7 +492,7 @@ func TestHead_ReadWAL(t *testing.T) {
 				},
 			}
 
-			head, w := newTestHead(t, 1000, compress)
+			head, w := newTestHead(t, 1000, compress, false)
 			defer func() {
 				require.NoError(t, head.Close())
 			}()
@@ -531,7 +536,7 @@ func TestHead_ReadWAL(t *testing.T) {
 }
 
 func TestHead_WALMultiRef(t *testing.T) {
-	head, w := newTestHead(t, 1000, false)
+	head, w := newTestHead(t, 1000, false, false)
 
 	require.NoError(t, head.Init(0))
 
@@ -572,7 +577,7 @@ func TestHead_WALMultiRef(t *testing.T) {
 	opts := DefaultHeadOptions()
 	opts.ChunkRange = 1000
 	opts.ChunkDirRoot = w.Dir()
-	head, err = NewHead(nil, nil, w, opts, nil)
+	head, err = NewHead(nil, nil, w, nil, opts, nil)
 	require.NoError(t, err)
 	require.NoError(t, head.Init(0))
 	defer func() {
@@ -591,7 +596,7 @@ func TestHead_WALMultiRef(t *testing.T) {
 }
 
 func TestHead_ActiveAppenders(t *testing.T) {
-	head, _ := newTestHead(t, 1000, false)
+	head, _ := newTestHead(t, 1000, false, false)
 	defer head.Close()
 
 	require.NoError(t, head.Init(0))
@@ -624,14 +629,14 @@ func TestHead_ActiveAppenders(t *testing.T) {
 }
 
 func TestHead_UnknownWALRecord(t *testing.T) {
-	head, w := newTestHead(t, 1000, false)
+	head, w := newTestHead(t, 1000, false, false)
 	w.Log([]byte{255, 42})
 	require.NoError(t, head.Init(0))
 	require.NoError(t, head.Close())
 }
 
 func TestHead_Truncate(t *testing.T) {
-	h, _ := newTestHead(t, 1000, false)
+	h, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, h.Close())
 	}()
@@ -733,7 +738,7 @@ func TestMemSeries_truncateChunks(t *testing.T) {
 		},
 	}
 
-	s := newMemSeries(labels.FromStrings("a", "b"), 1, 2000, defaultIsolationDisabled)
+	s := newMemSeries(labels.FromStrings("a", "b"), 1, 2000, 1, defaultIsolationDisabled)
 
 	for i := 0; i < 4000; i += 5 {
 		ok, _ := s.append(int64(i), float64(i), 0, chunkDiskMapper)
@@ -752,7 +757,7 @@ func TestMemSeries_truncateChunks(t *testing.T) {
 	require.NotNil(t, chk)
 	require.NoError(t, err)
 
-	s.truncateChunksBefore(2000)
+	s.truncateChunksBefore(2000, 0)
 
 	require.Equal(t, int64(2000), s.mmappedChunks[0].minTime)
 	_, _, err = s.chunk(0, chunkDiskMapper, &memChunkPool)
@@ -789,7 +794,7 @@ func TestHeadDeleteSeriesWithoutSamples(t *testing.T) {
 					{Ref: 50, T: 90, V: 1},
 				},
 			}
-			head, w := newTestHead(t, 1000, compress)
+			head, w := newTestHead(t, 1000, compress, false)
 			defer func() {
 				require.NoError(t, head.Close())
 			}()
@@ -857,7 +862,8 @@ func TestHeadDeleteSimple(t *testing.T) {
 	for _, compress := range []bool{false, true} {
 		t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) {
 			for _, c := range cases {
-				head, w := newTestHead(t, 1000, compress)
+				head, w := newTestHead(t, 1000, compress, false)
+				require.NoError(t, head.Init(0))
 
 				app := head.Appender(context.Background())
 				for _, smpl := range smplsAll {
@@ -887,7 +893,7 @@ func TestHeadDeleteSimple(t *testing.T) {
 				opts := DefaultHeadOptions()
 				opts.ChunkRange = 1000
 				opts.ChunkDirRoot = reloadedW.Dir()
-				reloadedHead, err := NewHead(nil, nil, reloadedW, opts, nil)
+				reloadedHead, err := NewHead(nil, nil, reloadedW, nil, opts, nil)
 				require.NoError(t, err)
 				require.NoError(t, reloadedHead.Init(0))
 
@@ -937,7 +943,7 @@ func TestHeadDeleteSimple(t *testing.T) {
 }
 
 func TestDeleteUntilCurMax(t *testing.T) {
-	hb, _ := newTestHead(t, 1000000, false)
+	hb, _ := newTestHead(t, 1000000, false, false)
 	defer func() {
 		require.NoError(t, hb.Close())
 	}()
@@ -990,7 +996,7 @@ func TestDeletedSamplesAndSeriesStillInWALAfterCheckpoint(t *testing.T) {
 	numSamples := 10000
 
 	// Enough samples to cause a checkpoint.
-	hb, w := newTestHead(t, int64(numSamples)*10, false)
+	hb, w := newTestHead(t, int64(numSamples)*10, false, false)
 
 	for i := 0; i < numSamples; i++ {
 		app := hb.Appender(context.Background())
@@ -1082,7 +1088,7 @@ func TestDelete_e2e(t *testing.T) {
 		seriesMap[labels.New(l...).String()] = []tsdbutil.Sample{}
 	}
 
-	hb, _ := newTestHead(t, 100000, false)
+	hb, _ := newTestHead(t, 100000, false, false)
 	defer func() {
 		require.NoError(t, hb.Close())
 	}()
@@ -1271,7 +1277,7 @@ func TestMemSeries_append(t *testing.T) {
 		require.NoError(t, chunkDiskMapper.Close())
 	}()
 
-	s := newMemSeries(labels.Labels{}, 1, 500, defaultIsolationDisabled)
+	s := newMemSeries(labels.Labels{}, 1, 500, 1, defaultIsolationDisabled)
 
 	// Add first two samples at the very end of a chunk range and the next two
 	// on and after it.
@@ -1325,7 +1331,7 @@ func TestMemSeries_append_atVariableRate(t *testing.T) {
 		require.NoError(t, chunkDiskMapper.Close())
 	})
 
-	s := newMemSeries(labels.Labels{}, 1, DefaultBlockDuration, defaultIsolationDisabled)
+	s := newMemSeries(labels.Labels{}, 1, DefaultBlockDuration, 0, defaultIsolationDisabled)
 
 	// At this slow rate, we will fill the chunk in two block durations.
 	slowRate := (DefaultBlockDuration * 2) / samplesPerChunk
@@ -1361,7 +1367,7 @@ func TestMemSeries_append_atVariableRate(t *testing.T) {
 
 func TestGCChunkAccess(t *testing.T) {
 	// Put a chunk, select it. GC it and then access it.
-	h, _ := newTestHead(t, 1000, false)
+	h, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, h.Close())
 	}()
@@ -1398,22 +1404,22 @@ func TestGCChunkAccess(t *testing.T) {
 
 	cr, err := h.chunksRange(0, 1500, nil)
 	require.NoError(t, err)
-	_, err = cr.Chunk(chunks[0].Ref)
+	_, err = cr.Chunk(chunks[0])
 	require.NoError(t, err)
-	_, err = cr.Chunk(chunks[1].Ref)
+	_, err = cr.Chunk(chunks[1])
 	require.NoError(t, err)
 
 	require.NoError(t, h.Truncate(1500)) // Remove a chunk.
 
-	_, err = cr.Chunk(chunks[0].Ref)
+	_, err = cr.Chunk(chunks[0])
 	require.Equal(t, storage.ErrNotFound, err)
-	_, err = cr.Chunk(chunks[1].Ref)
+	_, err = cr.Chunk(chunks[1])
 	require.NoError(t, err)
 }
 
 func TestGCSeriesAccess(t *testing.T) {
 	// Put a series, select it. GC it and then access it.
-	h, _ := newTestHead(t, 1000, false)
+	h, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, h.Close())
 	}()
@@ -1450,23 +1456,23 @@ func TestGCSeriesAccess(t *testing.T) {
 
 	cr, err := h.chunksRange(0, 2000, nil)
 	require.NoError(t, err)
-	_, err = cr.Chunk(chunks[0].Ref)
+	_, err = cr.Chunk(chunks[0])
 	require.NoError(t, err)
-	_, err = cr.Chunk(chunks[1].Ref)
+	_, err = cr.Chunk(chunks[1])
 	require.NoError(t, err)
 
 	require.NoError(t, h.Truncate(2000)) // Remove the series.
 
 	require.Equal(t, (*memSeries)(nil), h.series.getByID(1))
 
-	_, err = cr.Chunk(chunks[0].Ref)
+	_, err = cr.Chunk(chunks[0])
 	require.Equal(t, storage.ErrNotFound, err)
-	_, err = cr.Chunk(chunks[1].Ref)
+	_, err = cr.Chunk(chunks[1])
 	require.Equal(t, storage.ErrNotFound, err)
 }
 
 func TestUncommittedSamplesNotLostOnTruncate(t *testing.T) {
-	h, _ := newTestHead(t, 1000, false)
+	h, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, h.Close())
 	}()
@@ -1496,7 +1502,7 @@ func TestUncommittedSamplesNotLostOnTruncate(t *testing.T) {
 }
 
 func TestRemoveSeriesAfterRollbackAndTruncate(t *testing.T) {
-	h, _ := newTestHead(t, 1000, false)
+	h, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, h.Close())
 	}()
@@ -1529,7 +1535,7 @@ func TestRemoveSeriesAfterRollbackAndTruncate(t *testing.T) {
 func TestHead_LogRollback(t *testing.T) {
 	for _, compress := range []bool{false, true} {
 		t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) {
-			h, w := newTestHead(t, 1000, compress)
+			h, w := newTestHead(t, 1000, compress, false)
 			defer func() {
 				require.NoError(t, h.Close())
 			}()
@@ -1606,7 +1612,7 @@ func TestWalRepair_DecodingError(t *testing.T) {
 					opts := DefaultHeadOptions()
 					opts.ChunkRange = 1
 					opts.ChunkDirRoot = w.Dir()
-					h, err := NewHead(nil, nil, w, opts, nil)
+					h, err := NewHead(nil, nil, w, nil, opts, nil)
 					require.NoError(t, err)
 					require.Equal(t, 0.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal))
 					initErr := h.Init(math.MinInt64)
@@ -1660,7 +1666,8 @@ func TestHeadReadWriterRepair(t *testing.T) {
 		opts := DefaultHeadOptions()
 		opts.ChunkRange = chunkRange
 		opts.ChunkDirRoot = dir
-		h, err := NewHead(nil, nil, w, opts, nil)
+		opts.ChunkWriteQueueSize = 1 // We need to set this option so that we use the async queue. Upstream prometheus uses the queue directly.
+		h, err := NewHead(nil, nil, w, nil, opts, nil)
 		require.NoError(t, err)
 		require.Equal(t, 0.0, prom_testutil.ToFloat64(h.metrics.mmapChunkCorruptionTotal))
 		require.NoError(t, h.Init(math.MinInt64))
@@ -1715,7 +1722,7 @@ func TestHeadReadWriterRepair(t *testing.T) {
 }
 
 func TestNewWalSegmentOnTruncate(t *testing.T) {
-	h, wlog := newTestHead(t, 1000, false)
+	h, wlog := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, h.Close())
 	}()
@@ -1745,7 +1752,7 @@ func TestNewWalSegmentOnTruncate(t *testing.T) {
 }
 
 func TestAddDuplicateLabelName(t *testing.T) {
-	h, _ := newTestHead(t, 1000, false)
+	h, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, h.Close())
 	}()
@@ -1828,7 +1835,7 @@ func TestMemSeriesIsolation(t *testing.T) {
 	}
 
 	// Test isolation without restart of Head.
-	hb, _ := newTestHead(t, 1000, false)
+	hb, _ := newTestHead(t, 1000, false, false)
 	i := addSamples(hb)
 	testIsolation(hb, i)
 
@@ -1890,7 +1897,7 @@ func TestMemSeriesIsolation(t *testing.T) {
 	require.NoError(t, hb.Close())
 
 	// Test isolation with restart of Head. This is to verify the num samples of chunks after m-map chunk replay.
-	hb, w := newTestHead(t, 1000, false)
+	hb, w := newTestHead(t, 1000, false, false)
 	i = addSamples(hb)
 	require.NoError(t, hb.Close())
 
@@ -1899,7 +1906,7 @@ func TestMemSeriesIsolation(t *testing.T) {
 	opts := DefaultHeadOptions()
 	opts.ChunkRange = 1000
 	opts.ChunkDirRoot = wlog.Dir()
-	hb, err = NewHead(nil, nil, wlog, opts, nil)
+	hb, err = NewHead(nil, nil, wlog, nil, opts, nil)
 	defer func() { require.NoError(t, hb.Close()) }()
 	require.NoError(t, err)
 	require.NoError(t, hb.Init(0))
@@ -1943,7 +1950,7 @@ func TestIsolationRollback(t *testing.T) {
 	}
 
 	// Rollback after a failed append and test if the low watermark has progressed anyway.
-	hb, _ := newTestHead(t, 1000, false)
+	hb, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, hb.Close())
 	}()
@@ -1974,7 +1981,7 @@ func TestIsolationLowWatermarkMonotonous(t *testing.T) {
 		t.Skip("skipping test since tsdb isolation is disabled")
 	}
 
-	hb, _ := newTestHead(t, 1000, false)
+	hb, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, hb.Close())
 	}()
@@ -2011,7 +2018,7 @@ func TestIsolationAppendIDZeroIsNoop(t *testing.T) {
 		t.Skip("skipping test since tsdb isolation is disabled")
 	}
 
-	h, _ := newTestHead(t, 1000, false)
+	h, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, h.Close())
 	}()
@@ -2036,7 +2043,7 @@ func TestIsolationWithoutAdd(t *testing.T) {
 		t.Skip("skipping test since tsdb isolation is disabled")
 	}
 
-	hb, _ := newTestHead(t, 1000, false)
+	hb, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, hb.Close())
 	}()
@@ -2131,7 +2138,7 @@ func TestOutOfOrderSamplesMetric(t *testing.T) {
 }
 
 func testHeadSeriesChunkRace(t *testing.T) {
-	h, _ := newTestHead(t, 1000, false)
+	h, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, h.Close())
 	}()
@@ -2166,7 +2173,7 @@ func testHeadSeriesChunkRace(t *testing.T) {
 }
 
 func TestHeadLabelNamesValuesWithMinMaxRange(t *testing.T) {
-	head, _ := newTestHead(t, 1000, false)
+	head, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, head.Close())
 	}()
@@ -2226,7 +2233,7 @@ func TestHeadLabelNamesValuesWithMinMaxRange(t *testing.T) {
 }
 
 func TestHeadLabelValuesWithMatchers(t *testing.T) {
-	head, _ := newTestHead(t, 1000, false)
+	head, _ := newTestHead(t, 1000, false, false)
 	t.Cleanup(func() { require.NoError(t, head.Close()) })
 
 	app := head.Appender(context.Background())
@@ -2285,7 +2292,7 @@ func TestHeadLabelValuesWithMatchers(t *testing.T) {
 }
 
 func TestHeadLabelNamesWithMatchers(t *testing.T) {
-	head, _ := newTestHead(t, 1000, false)
+	head, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, head.Close())
 	}()
@@ -2353,7 +2360,7 @@ func TestHeadLabelNamesWithMatchers(t *testing.T) {
 }
 
 func TestErrReuseAppender(t *testing.T) {
-	head, _ := newTestHead(t, 1000, false)
+	head, _ := newTestHead(t, 1000, false, false)
 	defer func() {
 		require.NoError(t, head.Close())
 	}()
@@ -2389,7 +2396,7 @@ func TestErrReuseAppender(t *testing.T) {
 
 func TestHeadMintAfterTruncation(t *testing.T) {
 	chunkRange := int64(2000)
-	head, _ := newTestHead(t, chunkRange, false)
+	head, _ := newTestHead(t, chunkRange, false, false)
 
 	app := head.Appender(context.Background())
 	_, err := app.Append(0, labels.FromStrings("a", "b"), 100, 100)
@@ -2423,7 +2430,7 @@ func TestHeadMintAfterTruncation(t *testing.T) {
 
 func TestHeadExemplars(t *testing.T) {
 	chunkRange := int64(2000)
-	head, _ := newTestHead(t, chunkRange, false)
+	head, _ := newTestHead(t, chunkRange, false, false)
 	app := head.Appender(context.Background())
 
 	l := labels.FromStrings("traceId", "123")
@@ -2445,7 +2452,7 @@ func TestHeadExemplars(t *testing.T) {
 
 func BenchmarkHeadLabelValuesWithMatchers(b *testing.B) {
 	chunkRange := int64(2000)
-	head, _ := newTestHead(b, chunkRange, false)
+	head, _ := newTestHead(b, chunkRange, false, false)
 	b.Cleanup(func() { require.NoError(b, head.Close()) })
 
 	app := head.Appender(context.Background())
@@ -2483,7 +2490,7 @@ func TestMemSafeIteratorSeekIntoBuffer(t *testing.T) {
 		require.NoError(t, chunkDiskMapper.Close())
 	}()
 
-	s := newMemSeries(labels.Labels{}, 1, 500, defaultIsolationDisabled)
+	s := newMemSeries(labels.Labels{}, 1, 500, 1, defaultIsolationDisabled)
 
 	for i := 0; i < 7; i++ {
 		ok, _ := s.append(int64(i), float64(i), 0, chunkDiskMapper)
@@ -2754,7 +2761,7 @@ func TestWaitForPendingReadersInTimeRange(t *testing.T) {
 }
 
 func TestChunkSnapshot(t *testing.T) {
-	head, _ := newTestHead(t, 120*4, false)
+	head, _ := newTestHead(t, 120*4, false, false)
 	defer func() {
 		head.opts.EnableMemorySnapshotOnShutdown = false
 		require.NoError(t, head.Close())
@@ -2833,7 +2840,7 @@ func TestChunkSnapshot(t *testing.T) {
 	openHeadAndCheckReplay := func() {
 		w, err := wal.NewSize(nil, nil, head.wal.Dir(), 32768, false)
 		require.NoError(t, err)
-		head, err = NewHead(nil, nil, w, head.opts, nil)
+		head, err = NewHead(nil, nil, w, nil, head.opts, nil)
 		require.NoError(t, err)
 		require.NoError(t, head.Init(math.MinInt64))
 
@@ -2996,7 +3003,7 @@ func TestChunkSnapshot(t *testing.T) {
 }
 
 func TestSnapshotError(t *testing.T) {
-	head, _ := newTestHead(t, 120*4, false)
+	head, _ := newTestHead(t, 120*4, false, false)
 	defer func() {
 		head.opts.EnableMemorySnapshotOnShutdown = false
 		require.NoError(t, head.Close())
@@ -3043,7 +3050,7 @@ func TestSnapshotError(t *testing.T) {
 	w, err := wal.NewSize(nil, nil, head.wal.Dir(), 32768, false)
 	require.NoError(t, err)
 	// Testing https://github.com/prometheus/prometheus/issues/9437 with the registry.
-	head, err = NewHead(prometheus.NewRegistry(), nil, w, head.opts, nil)
+	head, err = NewHead(prometheus.NewRegistry(), nil, w, nil, head.opts, nil)
 	require.NoError(t, err)
 	require.NoError(t, head.Init(math.MinInt64))
 
@@ -3102,7 +3109,7 @@ func TestChunkSnapshotReplayBug(t *testing.T) {
 	opts := DefaultHeadOptions()
 	opts.ChunkDirRoot = dir
 	opts.EnableMemorySnapshotOnShutdown = true
-	head, err := NewHead(nil, nil, wlog, opts, nil)
+	head, err := NewHead(nil, nil, wlog, nil, opts, nil)
 	require.NoError(t, err)
 	require.NoError(t, head.Init(math.MinInt64))
 	defer func() {
@@ -3136,7 +3143,7 @@ func TestChunkSnapshotTakenAfterIncompleteSnapshot(t *testing.T) {
 	opts := DefaultHeadOptions()
 	opts.ChunkDirRoot = dir
 	opts.EnableMemorySnapshotOnShutdown = true
-	head, err := NewHead(nil, nil, wlog, opts, nil)
+	head, err := NewHead(nil, nil, wlog, nil, opts, nil)
 	require.NoError(t, err)
 	require.NoError(t, head.Init(math.MinInt64))
 
@@ -3159,6 +3166,251 @@ func TestChunkSnapshotTakenAfterIncompleteSnapshot(t *testing.T) {
 	require.Greater(t, offset, 0)
 }
 
+// TestOOOWalReplay checks the replay at a low level.
+// TODO(codesome): Needs test for ooo WAL repair.
+func TestOOOWalReplay(t *testing.T) {
+	dir := t.TempDir()
+	wlog, err := wal.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, true)
+	require.NoError(t, err)
+	oooWlog, err := wal.NewSize(nil, nil, filepath.Join(dir, wal.WblDirName), 32768, true)
+	require.NoError(t, err)
+
+	opts := DefaultHeadOptions()
+	opts.ChunkRange = 1000
+	opts.ChunkDirRoot = dir
+	opts.OutOfOrderTimeWindow.Store(30 * time.Minute.Milliseconds())
+
+	h, err := NewHead(nil, nil, wlog, oooWlog, opts, nil)
+	require.NoError(t, err)
+	require.NoError(t, h.Init(0))
+
+	var expOOOSamples []sample
+	l := labels.FromStrings("foo", "bar")
+	appendSample := func(mins int64, isOOO bool) {
+		app := h.Appender(context.Background())
+		ts, v := mins*time.Minute.Milliseconds(), float64(mins)
+		_, err := app.Append(0, l, ts, v)
+		require.NoError(t, err)
+		require.NoError(t, app.Commit())
+
+		if isOOO {
+			expOOOSamples = append(expOOOSamples, sample{t: ts, v: v})
+		}
+	}
+
+	// In-order sample.
+	appendSample(60, false)
+
+	// Out of order samples.
+	appendSample(40, true)
+	appendSample(35, true)
+	appendSample(50, true)
+	appendSample(55, true)
+	appendSample(59, true)
+	appendSample(31, true)
+
+	// Check that Head's time ranges are set properly.
+	require.Equal(t, 60*time.Minute.Milliseconds(), h.MinTime())
+	require.Equal(t, 60*time.Minute.Milliseconds(), h.MaxTime())
+	require.Equal(t, 31*time.Minute.Milliseconds(), h.MinOOOTime())
+	require.Equal(t, 59*time.Minute.Milliseconds(), h.MaxOOOTime())
+
+	// Restart head.
+	require.NoError(t, h.Close())
+	wlog, err = wal.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, true)
+	require.NoError(t, err)
+	oooWlog, err = wal.NewSize(nil, nil, filepath.Join(dir, wal.WblDirName), 32768, true)
+	require.NoError(t, err)
+	h, err = NewHead(nil, nil, wlog, oooWlog, opts, nil)
+	require.NoError(t, err)
+	require.NoError(t, h.Init(0)) // Replay happens here.
+
+	// Get the ooo samples from the Head.
+	ms, ok, err := h.getOrCreate(l.Hash(), l)
+	require.NoError(t, err)
+	require.False(t, ok)
+	require.NotNil(t, ms)
+
+	xor, err := ms.oooHeadChunk.chunk.ToXOR()
+	require.NoError(t, err)
+
+	it := xor.Iterator(nil)
+	actOOOSamples := make([]sample, 0, len(expOOOSamples))
+	for it.Next() {
+		ts, v := it.At()
+		actOOOSamples = append(actOOOSamples, sample{t: ts, v: v})
+	}
+
+	// OOO chunk will be sorted. Hence sort the expected samples.
+	sort.Slice(expOOOSamples, func(i, j int) bool {
+		return expOOOSamples[i].t < expOOOSamples[j].t
+	})
+
+	require.Equal(t, expOOOSamples, actOOOSamples)
+
+	require.NoError(t, h.Close())
+}
+
+// TestOOOMmapReplay checks the replay at a low level.
+func TestOOOMmapReplay(t *testing.T) {
+	dir := t.TempDir()
+	wlog, err := wal.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, true)
+	require.NoError(t, err)
+	oooWlog, err := wal.NewSize(nil, nil, filepath.Join(dir, wal.WblDirName), 32768, true)
+	require.NoError(t, err)
+
+	opts := DefaultHeadOptions()
+	opts.ChunkRange = 1000
+	opts.ChunkDirRoot = dir
+	opts.OutOfOrderCapMax.Store(30)
+	opts.OutOfOrderTimeWindow.Store(1000 * time.Minute.Milliseconds())
+
+	h, err := NewHead(nil, nil, wlog, oooWlog, opts, nil)
+	require.NoError(t, err)
+	require.NoError(t, h.Init(0))
+
+	l := labels.FromStrings("foo", "bar")
+	appendSample := func(mins int64) {
+		app := h.Appender(context.Background())
+		ts, v := mins*time.Minute.Milliseconds(), float64(mins)
+		_, err := app.Append(0, l, ts, v)
+		require.NoError(t, err)
+		require.NoError(t, app.Commit())
+	}
+
+	// In-order sample.
+	appendSample(200)
+
+	// Out of order samples. 92 samples to create 3 m-map chunks.
+	for mins := int64(100); mins <= 191; mins++ {
+		appendSample(mins)
+	}
+
+	ms, ok, err := h.getOrCreate(l.Hash(), l)
+	require.NoError(t, err)
+	require.False(t, ok)
+	require.NotNil(t, ms)
+
+	require.Len(t, ms.oooMmappedChunks, 3)
+	// Verify that we can access the chunks without error.
+	for _, m := range ms.oooMmappedChunks {
+		chk, err := h.chunkDiskMapper.Chunk(m.ref)
+		require.NoError(t, err)
+		require.Equal(t, int(m.numSamples), chk.NumSamples())
+	}
+
+	expMmapChunks := make([]*mmappedChunk, 3)
+	copy(expMmapChunks, ms.oooMmappedChunks)
+
+	// Restart head.
+	require.NoError(t, h.Close())
+
+	wlog, err = wal.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, true)
+	require.NoError(t, err)
+	oooWlog, err = wal.NewSize(nil, nil, filepath.Join(dir, wal.WblDirName), 32768, true)
+	require.NoError(t, err)
+	h, err = NewHead(nil, nil, wlog, oooWlog, opts, nil)
+	require.NoError(t, err)
+	require.NoError(t, h.Init(0)) // Replay happens here.
+
+	// Get the mmap chunks from the Head.
+	ms, ok, err = h.getOrCreate(l.Hash(), l)
+	require.NoError(t, err)
+	require.False(t, ok)
+	require.NotNil(t, ms)
+
+	require.Len(t, ms.oooMmappedChunks, len(expMmapChunks))
+	// Verify that we can access the chunks without error.
+	for _, m := range ms.oooMmappedChunks {
+		chk, err := h.chunkDiskMapper.Chunk(m.ref)
+		require.NoError(t, err)
+		require.Equal(t, int(m.numSamples), chk.NumSamples())
+	}
+
+	actMmapChunks := make([]*mmappedChunk, len(expMmapChunks))
+	copy(actMmapChunks, ms.oooMmappedChunks)
+
+	require.Equal(t, expMmapChunks, actMmapChunks)
+
+	require.NoError(t, h.Close())
+}
+
+func TestHeadInit_DiscardChunksWithUnsupportedEncoding(t *testing.T) {
+	h, _ := newTestHead(t, 1000, false, false)
+	defer func() {
+		require.NoError(t, h.Close())
+	}()
+
+	require.NoError(t, h.Init(0))
+
+	ctx := context.Background()
+	app := h.Appender(ctx)
+	seriesLabels := labels.FromStrings("a", "1")
+	var seriesRef storage.SeriesRef
+	var err error
+	for i := 0; i < 400; i++ {
+		seriesRef, err = app.Append(0, seriesLabels, int64(i), float64(i))
+		require.NoError(t, err)
+	}
+
+	require.NoError(t, app.Commit())
+	require.Greater(t, prom_testutil.ToFloat64(h.metrics.chunksCreated), 1.0)
+
+	uc := newUnsupportedChunk()
+	// Make this chunk not overlap with the previous and the next
+	h.chunkDiskMapper.WriteChunk(chunks.HeadSeriesRef(seriesRef), 500, 600, uc, func(err error) { require.NoError(t, err) })
+
+	app = h.Appender(ctx)
+	for i := 700; i < 1200; i++ {
+		_, err := app.Append(0, seriesLabels, int64(i), float64(i))
+		require.NoError(t, err)
+	}
+
+	require.NoError(t, app.Commit())
+	require.Greater(t, prom_testutil.ToFloat64(h.metrics.chunksCreated), 4.0)
+
+	series, created, err := h.getOrCreate(seriesLabels.Hash(), seriesLabels)
+	require.NoError(t, err)
+	require.False(t, created, "should already exist")
+	require.NotNil(t, series, "should return the series we created above")
+
+	expChunks := make([]*mmappedChunk, len(series.mmappedChunks))
+	copy(expChunks, series.mmappedChunks)
+
+	require.NoError(t, h.Close())
+
+	wlog, err := wal.NewSize(nil, nil, filepath.Join(h.opts.ChunkDirRoot, "wal"), 32768, false)
+	require.NoError(t, err)
+	h, err = NewHead(nil, nil, wlog, nil, h.opts, nil)
+	require.NoError(t, err)
+	require.NoError(t, h.Init(0))
+
+	series, created, err = h.getOrCreate(seriesLabels.Hash(), seriesLabels)
+	require.NoError(t, err)
+	require.False(t, created, "should already exist")
+	require.NotNil(t, series, "should return the series we created above")
+
+	require.Equal(t, expChunks, series.mmappedChunks)
+}
+
+const (
+	UnsupportedMask   = 0b10000000
+	EncUnsupportedXOR = chunkenc.EncXOR | UnsupportedMask
+)
+
+// unsupportedChunk holds a XORChunk and overrides the Encoding() method.
+type unsupportedChunk struct {
+	*chunkenc.XORChunk
+}
+
+func newUnsupportedChunk() *unsupportedChunk {
+	return &unsupportedChunk{chunkenc.NewXORChunk()}
+}
+
+func (c *unsupportedChunk) Encoding() chunkenc.Encoding {
+	return EncUnsupportedXOR
+}
+
 // Tests https://github.com/prometheus/prometheus/issues/10277.
 func TestMmapPanicAfterMmapReplayCorruption(t *testing.T) {
 	dir := t.TempDir()
@@ -3171,7 +3423,7 @@ func TestMmapPanicAfterMmapReplayCorruption(t *testing.T) {
 	opts.EnableExemplarStorage = true
 	opts.MaxExemplars.Store(config.DefaultExemplarsConfig.MaxExemplars)
 
-	h, err := NewHead(nil, nil, wlog, opts, nil)
+	h, err := NewHead(nil, nil, wlog, nil, opts, nil)
 	require.NoError(t, err)
 	require.NoError(t, h.Init(0))
 
@@ -3205,7 +3457,7 @@ func TestMmapPanicAfterMmapReplayCorruption(t *testing.T) {
 	require.NoError(t, err)
 	require.NoError(t, f.Close())
 
-	h, err = NewHead(nil, nil, wlog, opts, nil)
+	h, err = NewHead(nil, nil, wlog, nil, opts, nil)
 	require.NoError(t, err)
 	require.NoError(t, h.Init(0))
 
@@ -3230,7 +3482,7 @@ func TestReplayAfterMmapReplayError(t *testing.T) {
 		opts.EnableMemorySnapshotOnShutdown = true
 		opts.MaxExemplars.Store(config.DefaultExemplarsConfig.MaxExemplars)
 
-		h, err = NewHead(nil, nil, wlog, opts, nil)
+		h, err = NewHead(nil, nil, wlog, nil, opts, nil)
 		require.NoError(t, err)
 		require.NoError(t, h.Init(0))
 	}
@@ -3292,3 +3544,131 @@ func TestReplayAfterMmapReplayError(t *testing.T) {
 
 	require.NoError(t, h.Close())
 }
+
+func TestOOOAppendWithNoSeries(t *testing.T) {
+	dir := t.TempDir()
+	wlog, err := wal.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, true)
+	require.NoError(t, err)
+	oooWlog, err := wal.NewSize(nil, nil, filepath.Join(dir, wal.WblDirName), 32768, true)
+	require.NoError(t, err)
+
+	opts := DefaultHeadOptions()
+	opts.ChunkDirRoot = dir
+	opts.OutOfOrderCapMax.Store(30)
+	opts.OutOfOrderTimeWindow.Store(120 * time.Minute.Milliseconds())
+
+	h, err := NewHead(nil, nil, wlog, oooWlog, opts, nil)
+	require.NoError(t, err)
+	t.Cleanup(func() {
+		require.NoError(t, h.Close())
+	})
+	require.NoError(t, h.Init(0))
+
+	appendSample := func(lbls labels.Labels, ts int64) {
+		app := h.Appender(context.Background())
+		_, err := app.Append(0, lbls, ts*time.Minute.Milliseconds(), float64(ts))
+		require.NoError(t, err)
+		require.NoError(t, app.Commit())
+	}
+
+	verifyOOOSamples := func(lbls labels.Labels, expSamples int) {
+		ms, created, err := h.getOrCreate(lbls.Hash(), lbls)
+		require.NoError(t, err)
+		require.False(t, created)
+		require.NotNil(t, ms)
+
+		require.Nil(t, ms.headChunk)
+		require.NotNil(t, ms.oooHeadChunk)
+		require.Equal(t, expSamples, ms.oooHeadChunk.chunk.NumSamples())
+	}
+
+	verifyInOrderSamples := func(lbls labels.Labels, expSamples int) {
+		ms, created, err := h.getOrCreate(lbls.Hash(), lbls)
+		require.NoError(t, err)
+		require.False(t, created)
+		require.NotNil(t, ms)
+
+		require.Nil(t, ms.oooHeadChunk)
+		require.NotNil(t, ms.headChunk)
+		require.Equal(t, expSamples, ms.headChunk.chunk.NumSamples())
+	}
+
+	newLabels := func(idx int) labels.Labels { return labels.FromStrings("foo", fmt.Sprintf("%d", idx)) }
+
+	s1 := newLabels(1)
+	appendSample(s1, 300) // At 300m.
+	verifyInOrderSamples(s1, 1)
+
+	// At 239m, the sample cannot be appended to in-order chunk since it is
+	// beyond the minValidTime. So it should go in OOO chunk.
+	// Series does not exist for s2 yet.
+	s2 := newLabels(2)
+	appendSample(s2, 239) // OOO sample.
+	verifyOOOSamples(s2, 1)
+
+	// Similar for 180m.
+	s3 := newLabels(3)
+	appendSample(s3, 180) // OOO sample.
+	verifyOOOSamples(s3, 1)
+
+	// Now 179m is too old.
+	s4 := newLabels(4)
+	app := h.Appender(context.Background())
+	_, err = app.Append(0, s4, 179*time.Minute.Milliseconds(), float64(179))
+	require.Equal(t, storage.ErrTooOldSample, err)
+	require.NoError(t, app.Rollback())
+	verifyOOOSamples(s3, 1)
+
+	// Samples still go into in-order chunk for samples within
+	// appendable minValidTime.
+	s5 := newLabels(5)
+	appendSample(s5, 240)
+	verifyInOrderSamples(s5, 1)
+}
+
+func TestHeadMinOOOTimeUpdate(t *testing.T) {
+	dir := t.TempDir()
+	wlog, err := wal.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, true)
+	require.NoError(t, err)
+	oooWlog, err := wal.NewSize(nil, nil, filepath.Join(dir, wal.WblDirName), 32768, true)
+	require.NoError(t, err)
+
+	opts := DefaultHeadOptions()
+	opts.ChunkDirRoot = dir
+	opts.OutOfOrderTimeWindow.Store(10 * time.Minute.Milliseconds())
+
+	h, err := NewHead(nil, nil, wlog, oooWlog, opts, nil)
+	require.NoError(t, err)
+	t.Cleanup(func() {
+		require.NoError(t, h.Close())
+	})
+	require.NoError(t, h.Init(0))
+
+	appendSample := func(ts int64) {
+		lbls := labels.FromStrings("foo", "bar")
+		app := h.Appender(context.Background())
+		_, err := app.Append(0, lbls, ts*time.Minute.Milliseconds(), float64(ts))
+		require.NoError(t, err)
+		require.NoError(t, app.Commit())
+	}
+
+	appendSample(300) // In-order sample.
+
+	require.Equal(t, int64(math.MaxInt64), h.MinOOOTime())
+
+	appendSample(295) // OOO sample.
+	require.Equal(t, 295*time.Minute.Milliseconds(), h.MinOOOTime())
+
+	// Allowed window for OOO is >=290, which is before the earliest ooo sample 295, so it gets set to the lower value.
+	require.NoError(t, h.truncateOOO(0, 1))
+	require.Equal(t, 290*time.Minute.Milliseconds(), h.MinOOOTime())
+
+	appendSample(310) // In-order sample.
+	appendSample(305) // OOO sample.
+	require.Equal(t, 290*time.Minute.Milliseconds(), h.MinOOOTime())
+
+	// Now the OOO sample 295 was not gc'ed yet. And allowed window for OOO is now >=300.
+	// So the lowest among them, 295, is set as minOOOTime.
+	require.NoError(t, h.truncateOOO(0, 2))
+	require.Equal(t, 295*time.Minute.Milliseconds(), h.MinOOOTime())
+}
diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go
index 6ea58bcd4e..8bbe33cc48 100644
--- a/tsdb/head_wal.go
+++ b/tsdb/head_wal.go
@@ -42,7 +42,7 @@ import (
 	"github.com/prometheus/prometheus/tsdb/wal"
 )
 
-func (h *Head) loadWAL(r *wal.Reader, multiRef map[chunks.HeadSeriesRef]chunks.HeadSeriesRef, mmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk) (err error) {
+func (h *Head) loadWAL(r *wal.Reader, multiRef map[chunks.HeadSeriesRef]chunks.HeadSeriesRef, mmappedChunks, oooMmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk) (err error) {
 	// Track number of samples that referenced a series we don't know about
 	// for error reporting.
 	var unknownRefs atomic.Uint64
@@ -107,7 +107,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H
 		processors[i].setup()
 
 		go func(wp *walSubsetProcessor) {
-			unknown, overlapping := wp.processWALSamples(h, mmappedChunks)
+			unknown, overlapping := wp.processWALSamples(h, mmappedChunks, oooMmappedChunks)
 			unknownRefs.Add(unknown)
 			mmapOverlappingChunks.Add(overlapping)
 			wg.Done()
@@ -343,7 +343,7 @@ Outer:
 }
 
 // resetSeriesWithMMappedChunks is only used during the WAL replay.
-func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc []*mmappedChunk, walSeriesRef chunks.HeadSeriesRef) (overlapped bool) {
+func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc, oooMmc []*mmappedChunk, walSeriesRef chunks.HeadSeriesRef) (overlapped bool) {
 	if mSeries.ref != walSeriesRef {
 		// Checking if the new m-mapped chunks overlap with the already existing ones.
 		if len(mSeries.mmappedChunks) > 0 && len(mmc) > 0 {
@@ -368,10 +368,11 @@ func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc []*mmappedCh
 		}
 	}
 
-	h.metrics.chunksCreated.Add(float64(len(mmc)))
+	h.metrics.chunksCreated.Add(float64(len(mmc) + len(oooMmc)))
 	h.metrics.chunksRemoved.Add(float64(len(mSeries.mmappedChunks)))
-	h.metrics.chunks.Add(float64(len(mmc) - len(mSeries.mmappedChunks)))
+	h.metrics.chunks.Add(float64(len(mmc) + len(oooMmc) - len(mSeries.mmappedChunks)))
 	mSeries.mmappedChunks = mmc
+	mSeries.oooMmappedChunks = oooMmc
 	// Cache the last mmapped chunk time, so we can skip calling append() for samples it will reject.
 	if len(mmc) == 0 {
 		mSeries.mmMaxTime = math.MinInt64
@@ -379,6 +380,19 @@ func (h *Head) resetSeriesWithMMappedChunks(mSeries *memSeries, mmc []*mmappedCh
 		mSeries.mmMaxTime = mmc[len(mmc)-1].maxTime
 		h.updateMinMaxTime(mmc[0].minTime, mSeries.mmMaxTime)
 	}
+	if len(oooMmc) != 0 {
+		// Mint and maxt can be in any chunk, they are not sorted.
+		mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)
+		for _, ch := range oooMmc {
+			if ch.minTime < mint {
+				mint = ch.minTime
+			}
+			if ch.maxTime > maxt {
+				maxt = ch.maxTime
+			}
+		}
+		h.updateMinOOOMaxOOOTime(mint, maxt)
+	}
 
 	// Any samples replayed till now would already be compacted. Resetting the head chunk.
 	mSeries.nextAt = 0
@@ -421,7 +435,7 @@ func (wp *walSubsetProcessor) reuseBuf() []record.RefSample {
 
 // processWALSamples adds the samples it receives to the head and passes
 // the buffer received to an output channel for reuse.
-func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk) (unknownRefs, mmapOverlappingChunks uint64) {
+func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks, oooMmappedChunks map[chunks.HeadSeriesRef][]*mmappedChunk) (unknownRefs, mmapOverlappingChunks uint64) {
 	defer close(wp.output)
 
 	mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)
@@ -429,7 +443,8 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks map[chunk
 	for in := range wp.input {
 		if in.existingSeries != nil {
 			mmc := mmappedChunks[in.walSeriesRef]
-			if h.resetSeriesWithMMappedChunks(in.existingSeries, mmc, in.walSeriesRef) {
+			oooMmc := oooMmappedChunks[in.walSeriesRef]
+			if h.resetSeriesWithMMappedChunks(in.existingSeries, mmc, oooMmc, in.walSeriesRef) {
 				mmapOverlappingChunks++
 			}
 			continue
@@ -465,6 +480,292 @@ func (wp *walSubsetProcessor) processWALSamples(h *Head, mmappedChunks map[chunk
 	return unknownRefs, mmapOverlappingChunks
 }
 
+func (h *Head) loadWBL(r *wal.Reader, multiRef map[chunks.HeadSeriesRef]chunks.HeadSeriesRef, lastMmapRef chunks.ChunkDiskMapperRef) (err error) {
+	// Track number of samples, m-map markers, that referenced a series we don't know about
+	// for error reporting.
+	var unknownRefs, mmapMarkerUnknownRefs atomic.Uint64
+
+	lastSeq, lastOff := lastMmapRef.Unpack()
+	// Start workers that each process samples for a partition of the series ID space.
+	var (
+		wg         sync.WaitGroup
+		n          = runtime.GOMAXPROCS(0)
+		processors = make([]wblSubsetProcessor, n)
+
+		dec    record.Decoder
+		shards = make([][]record.RefSample, n)
+
+		decodedCh   = make(chan interface{}, 10)
+		decodeErr   error
+		samplesPool = sync.Pool{
+			New: func() interface{} {
+				return []record.RefSample{}
+			},
+		}
+		markersPool = sync.Pool{
+			New: func() interface{} {
+				return []record.RefMmapMarker{}
+			},
+		}
+	)
+
+	defer func() {
+		// For CorruptionErr ensure to terminate all workers before exiting.
+		// We also wrap it to identify OOO WBL corruption.
+		_, ok := err.(*wal.CorruptionErr)
+		if ok {
+			err = &errLoadWbl{err: err}
+			for i := 0; i < n; i++ {
+				processors[i].closeAndDrain()
+			}
+			wg.Wait()
+		}
+	}()
+
+	wg.Add(n)
+	for i := 0; i < n; i++ {
+		processors[i].setup()
+
+		go func(wp *wblSubsetProcessor) {
+			unknown := wp.processWBLSamples(h)
+			unknownRefs.Add(unknown)
+			wg.Done()
+		}(&processors[i])
+	}
+
+	go func() {
+		defer close(decodedCh)
+		for r.Next() {
+			rec := r.Record()
+			switch dec.Type(rec) {
+			case record.Samples:
+				samples := samplesPool.Get().([]record.RefSample)[:0]
+				samples, err = dec.Samples(rec, samples)
+				if err != nil {
+					decodeErr = &wal.CorruptionErr{
+						Err:     errors.Wrap(err, "decode samples"),
+						Segment: r.Segment(),
+						Offset:  r.Offset(),
+					}
+					return
+				}
+				decodedCh <- samples
+			case record.MmapMarkers:
+				markers := markersPool.Get().([]record.RefMmapMarker)[:0]
+				markers, err = dec.MmapMarkers(rec, markers)
+				if err != nil {
+					decodeErr = &wal.CorruptionErr{
+						Err:     errors.Wrap(err, "decode mmap markers"),
+						Segment: r.Segment(),
+						Offset:  r.Offset(),
+					}
+					return
+				}
+				decodedCh <- markers
+			default:
+				// Noop.
+			}
+		}
+	}()
+
+	// The records are always replayed from the oldest to the newest.
+	for d := range decodedCh {
+		switch v := d.(type) {
+		case []record.RefSample:
+			samples := v
+			// We split up the samples into parts of 5000 samples or less.
+			// With O(300 * #cores) in-flight sample batches, large scrapes could otherwise
+			// cause thousands of very large in flight buffers occupying large amounts
+			// of unused memory.
+			for len(samples) > 0 {
+				m := 5000
+				if len(samples) < m {
+					m = len(samples)
+				}
+				for i := 0; i < n; i++ {
+					shards[i] = processors[i].reuseBuf()
+				}
+				for _, sam := range samples[:m] {
+					if r, ok := multiRef[sam.Ref]; ok {
+						sam.Ref = r
+					}
+					mod := uint64(sam.Ref) % uint64(n)
+					shards[mod] = append(shards[mod], sam)
+				}
+				for i := 0; i < n; i++ {
+					processors[i].input <- shards[i]
+				}
+				samples = samples[m:]
+			}
+			//nolint:staticcheck // Ignore SA6002 relax staticcheck verification.
+			samplesPool.Put(d)
+		case []record.RefMmapMarker:
+			markers := v
+			for _, rm := range markers {
+				seq, off := rm.MmapRef.Unpack()
+				if seq > lastSeq || (seq == lastSeq && off > lastOff) {
+					// This m-map chunk from markers was not present during
+					// the load of mmapped chunks that happened in the head
+					// initialization.
+					continue
+				}
+
+				if r, ok := multiRef[rm.Ref]; ok {
+					rm.Ref = r
+				}
+
+				ms := h.series.getByID(rm.Ref)
+				if ms == nil {
+					mmapMarkerUnknownRefs.Inc()
+					continue
+				}
+				idx := uint64(ms.ref) % uint64(n)
+				// It is possible that some old sample is being processed in processWALSamples that
+				// could cause race below. So we wait for the goroutine to empty input the buffer and finish
+				// processing all old samples after emptying the buffer.
+				processors[idx].waitUntilIdle()
+				// Lock the subset so we can modify the series object
+				processors[idx].mx.Lock()
+
+				// All samples till now have been m-mapped. Hence clear out the headChunk.
+				// In case some samples slipped through and went into m-map chunks because of changed
+				// chunk size parameters, we are not taking care of that here.
+				// TODO(codesome): see if there is a way to avoid duplicate m-map chunks if
+				// the size of ooo chunk was reduced between restart.
+				ms.oooHeadChunk = nil
+
+				processors[idx].mx.Unlock()
+			}
+		default:
+			panic(fmt.Errorf("unexpected decodedCh type: %T", d))
+		}
+	}
+
+	if decodeErr != nil {
+		return decodeErr
+	}
+
+	// Signal termination to each worker and wait for it to close its output channel.
+	for i := 0; i < n; i++ {
+		processors[i].closeAndDrain()
+	}
+	wg.Wait()
+
+	if r.Err() != nil {
+		return errors.Wrap(r.Err(), "read records")
+	}
+
+	if unknownRefs.Load() > 0 || mmapMarkerUnknownRefs.Load() > 0 {
+		level.Warn(h.logger).Log("msg", "Unknown series references for ooo WAL replay", "samples", unknownRefs.Load(), "mmap_markers", mmapMarkerUnknownRefs.Load())
+	}
+	return nil
+}
+
+type errLoadWbl struct {
+	err error
+}
+
+func (e errLoadWbl) Error() string {
+	return e.err.Error()
+}
+
+// To support errors.Cause().
+func (e errLoadWbl) Cause() error {
+	return e.err
+}
+
+// To support errors.Unwrap().
+func (e errLoadWbl) Unwrap() error {
+	return e.err
+}
+
+// isErrLoadOOOWal returns a boolean if the error is errLoadWbl.
+func isErrLoadOOOWal(err error) bool {
+	_, ok := err.(*errLoadWbl)
+	return ok
+}
+
+type wblSubsetProcessor struct {
+	mx     sync.Mutex // Take this lock while modifying series in the subset.
+	input  chan []record.RefSample
+	output chan []record.RefSample
+}
+
+func (wp *wblSubsetProcessor) setup() {
+	wp.output = make(chan []record.RefSample, 300)
+	wp.input = make(chan []record.RefSample, 300)
+}
+
+func (wp *wblSubsetProcessor) closeAndDrain() {
+	close(wp.input)
+	for range wp.output {
+	}
+}
+
+// If there is a buffer in the output chan, return it for reuse, otherwise return nil.
+func (wp *wblSubsetProcessor) reuseBuf() []record.RefSample {
+	select {
+	case buf := <-wp.output:
+		return buf[:0]
+	default:
+	}
+	return nil
+}
+
+// processWBLSamples adds the samples it receives to the head and passes
+// the buffer received to an output channel for reuse.
+// Samples before the minValidTime timestamp are discarded.
+func (wp *wblSubsetProcessor) processWBLSamples(h *Head) (unknownRefs uint64) {
+	defer close(wp.output)
+
+	// We don't check for minValidTime for ooo samples.
+	mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)
+	for samples := range wp.input {
+		wp.mx.Lock()
+		for _, s := range samples {
+			ms := h.series.getByID(s.Ref)
+			if ms == nil {
+				unknownRefs++
+				continue
+			}
+			ok, chunkCreated, _ := ms.insert(s.T, s.V, h.chunkDiskMapper)
+			if chunkCreated {
+				h.metrics.chunksCreated.Inc()
+				h.metrics.chunks.Inc()
+			}
+			if ok {
+				if s.T < mint {
+					mint = s.T
+				}
+				if s.T > maxt {
+					maxt = s.T
+				}
+			}
+		}
+		wp.mx.Unlock()
+		wp.output <- samples
+	}
+
+	h.updateMinOOOMaxOOOTime(mint, maxt)
+
+	return unknownRefs
+}
+
+func (wp *wblSubsetProcessor) waitUntilIdle() {
+	select {
+	case <-wp.output: // Allow output side to drain to avoid deadlock.
+	default:
+	}
+	wp.input <- []record.RefSample{}
+	for len(wp.input) != 0 {
+		time.Sleep(10 * time.Microsecond)
+		select {
+		case <-wp.output: // Allow output side to drain to avoid deadlock.
+		default:
+		}
+	}
+}
+
 const (
 	chunkSnapshotRecordTypeSeries     uint8 = 1
 	chunkSnapshotRecordTypeTombstones uint8 = 2
diff --git a/tsdb/ooo_head.go b/tsdb/ooo_head.go
new file mode 100644
index 0000000000..3af6039912
--- /dev/null
+++ b/tsdb/ooo_head.go
@@ -0,0 +1,159 @@
+// Copyright 2022 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tsdb
+
+import (
+	"fmt"
+	"sort"
+
+	"github.com/prometheus/prometheus/tsdb/chunkenc"
+	"github.com/prometheus/prometheus/tsdb/tombstones"
+)
+
+// OOOChunk maintains samples in time-ascending order.
+// Inserts for timestamps already seen, are dropped.
+// Samples are stored uncompressed to allow easy sorting.
+// Perhaps we can be more efficient later.
+type OOOChunk struct {
+	samples []sample
+}
+
+func NewOOOChunk() *OOOChunk {
+	return &OOOChunk{samples: make([]sample, 0, 4)}
+}
+
+// Insert inserts the sample such that order is maintained.
+// Returns false if insert was not possible due to the same timestamp already existing.
+func (o *OOOChunk) Insert(t int64, v float64) bool {
+	// Find index of sample we should replace.
+	i := sort.Search(len(o.samples), func(i int) bool { return o.samples[i].t >= t })
+
+	if i >= len(o.samples) {
+		// none found. append it at the end
+		o.samples = append(o.samples, sample{t, v})
+		return true
+	}
+
+	if o.samples[i].t == t {
+		return false
+	}
+
+	// Expand length by 1 to make room. use a zero sample, we will overwrite it anyway.
+	o.samples = append(o.samples, sample{})
+	copy(o.samples[i+1:], o.samples[i:])
+	o.samples[i] = sample{t, v}
+
+	return true
+}
+
+func (o *OOOChunk) NumSamples() int {
+	return len(o.samples)
+}
+
+func (o *OOOChunk) ToXOR() (*chunkenc.XORChunk, error) {
+	x := chunkenc.NewXORChunk()
+	app, err := x.Appender()
+	if err != nil {
+		return nil, err
+	}
+	for _, s := range o.samples {
+		app.Append(s.t, s.v)
+	}
+	return x, nil
+}
+
+func (o *OOOChunk) ToXORBetweenTimestamps(mint, maxt int64) (*chunkenc.XORChunk, error) {
+	x := chunkenc.NewXORChunk()
+	app, err := x.Appender()
+	if err != nil {
+		return nil, err
+	}
+	for _, s := range o.samples {
+		if s.t < mint {
+			continue
+		}
+		if s.t > maxt {
+			break
+		}
+		app.Append(s.t, s.v)
+	}
+	return x, nil
+}
+
+var _ BlockReader = &OOORangeHead{}
+
+// OOORangeHead allows querying Head out of order samples via BlockReader
+// interface implementation.
+type OOORangeHead struct {
+	head *Head
+	// mint and maxt are tracked because when a query is handled we only want
+	// the timerange of the query and having preexisting pointers to the first
+	// and last timestamp help with that.
+	mint, maxt int64
+}
+
+func NewOOORangeHead(head *Head, mint, maxt int64) *OOORangeHead {
+	return &OOORangeHead{
+		head: head,
+		mint: mint,
+		maxt: maxt,
+	}
+}
+
+func (oh *OOORangeHead) Index() (IndexReader, error) {
+	return NewOOOHeadIndexReader(oh.head, oh.mint, oh.maxt), nil
+}
+
+func (oh *OOORangeHead) Chunks() (ChunkReader, error) {
+	return NewOOOHeadChunkReader(oh.head, oh.mint, oh.maxt), nil
+}
+
+func (oh *OOORangeHead) Tombstones() (tombstones.Reader, error) {
+	// As stated in the design doc https://docs.google.com/document/d/1Kppm7qL9C-BJB1j6yb6-9ObG3AbdZnFUBYPNNWwDBYM/edit?usp=sharing
+	// Tombstones are not supported for out of order metrics.
+	return tombstones.NewMemTombstones(), nil
+}
+
+func (oh *OOORangeHead) Meta() BlockMeta {
+	var id [16]byte
+	copy(id[:], "____ooo_head____")
+	return BlockMeta{
+		MinTime: oh.mint,
+		MaxTime: oh.maxt,
+		ULID:    id,
+		Stats: BlockStats{
+			NumSeries: oh.head.NumSeries(),
+		},
+	}
+}
+
+// Size returns the size taken by the Head block.
+func (oh *OOORangeHead) Size() int64 {
+	return oh.head.Size()
+}
+
+// String returns an human readable representation of the out of order range
+// head. It's important to keep this function in order to avoid the struct dump
+// when the head is stringified in errors or logs.
+func (oh *OOORangeHead) String() string {
+	return fmt.Sprintf("ooo range head (mint: %d, maxt: %d)", oh.MinTime(), oh.MaxTime())
+}
+
+func (oh *OOORangeHead) MinTime() int64 {
+	return oh.mint
+}
+
+func (oh *OOORangeHead) MaxTime() int64 {
+	return oh.maxt
+}
diff --git a/tsdb/ooo_head_read.go b/tsdb/ooo_head_read.go
new file mode 100644
index 0000000000..f63607dc9c
--- /dev/null
+++ b/tsdb/ooo_head_read.go
@@ -0,0 +1,433 @@
+// Copyright 2022 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tsdb
+
+import (
+	"errors"
+	"math"
+	"sort"
+
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/tsdb/chunkenc"
+	"github.com/prometheus/prometheus/tsdb/chunks"
+	"github.com/prometheus/prometheus/tsdb/index"
+	"github.com/prometheus/prometheus/tsdb/tombstones"
+)
+
+var _ IndexReader = &OOOHeadIndexReader{}
+
+// OOOHeadIndexReader implements IndexReader so ooo samples in the head can be
+// accessed.
+// It also has a reference to headIndexReader so we can leverage on its
+// IndexReader implementation for all the methods that remain the same. We
+// decided to do this to avoid code duplication.
+// The only methods that change are the ones about getting Series and Postings.
+type OOOHeadIndexReader struct {
+	*headIndexReader // A reference to the headIndexReader so we can reuse as many interface implementation as possible.
+}
+
+func NewOOOHeadIndexReader(head *Head, mint, maxt int64) *OOOHeadIndexReader {
+	hr := &headIndexReader{
+		head: head,
+		mint: mint,
+		maxt: maxt,
+	}
+	return &OOOHeadIndexReader{hr}
+}
+
+func (oh *OOOHeadIndexReader) Series(ref storage.SeriesRef, lbls *labels.Labels, chks *[]chunks.Meta) error {
+	return oh.series(ref, lbls, chks, 0)
+}
+
+// The passed lastMmapRef tells upto what max m-map chunk that we can consider.
+// If it is 0, it means all chunks need to be considered.
+// If it is non-0, then the oooHeadChunk must not be considered.
+func (oh *OOOHeadIndexReader) series(ref storage.SeriesRef, lbls *labels.Labels, chks *[]chunks.Meta, lastMmapRef chunks.ChunkDiskMapperRef) error {
+	s := oh.head.series.getByID(chunks.HeadSeriesRef(ref))
+
+	if s == nil {
+		oh.head.metrics.seriesNotFound.Inc()
+		return storage.ErrNotFound
+	}
+	*lbls = append((*lbls)[:0], s.lset...)
+
+	if chks == nil {
+		return nil
+	}
+
+	s.Lock()
+	defer s.Unlock()
+	*chks = (*chks)[:0]
+
+	tmpChks := make([]chunks.Meta, 0, len(s.oooMmappedChunks))
+
+	// We define these markers to track the last chunk reference while we
+	// fill the chunk meta.
+	// These markers are useful to give consistent responses to repeated queries
+	// even if new chunks that might be overlapping or not are added afterwards.
+	// Also, lastMinT and lastMaxT are initialized to the max int as a sentinel
+	// value to know they are unset.
+	var lastChunkRef chunks.ChunkRef
+	lastMinT, lastMaxT := int64(math.MaxInt64), int64(math.MaxInt64)
+
+	addChunk := func(minT, maxT int64, ref chunks.ChunkRef) {
+		// the first time we get called is for the last included chunk.
+		// set the markers accordingly
+		if lastMinT == int64(math.MaxInt64) {
+			lastChunkRef = ref
+			lastMinT = minT
+			lastMaxT = maxT
+		}
+
+		tmpChks = append(tmpChks, chunks.Meta{
+			MinTime:        minT,
+			MaxTime:        maxT,
+			Ref:            ref,
+			OOOLastRef:     lastChunkRef,
+			OOOLastMinTime: lastMinT,
+			OOOLastMaxTime: lastMaxT,
+		})
+	}
+
+	// Collect all chunks that overlap the query range, in order from most recent to most old,
+	// so we can set the correct markers.
+	if s.oooHeadChunk != nil {
+		c := s.oooHeadChunk
+		if c.OverlapsClosedInterval(oh.mint, oh.maxt) && lastMmapRef == 0 {
+			ref := chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(len(s.oooMmappedChunks))))
+			addChunk(c.minTime, c.maxTime, ref)
+		}
+	}
+	for i := len(s.oooMmappedChunks) - 1; i >= 0; i-- {
+		c := s.oooMmappedChunks[i]
+		if c.OverlapsClosedInterval(oh.mint, oh.maxt) && (lastMmapRef == 0 || lastMmapRef.GreaterThanOrEqualTo(c.ref)) {
+			ref := chunks.ChunkRef(chunks.NewHeadChunkRef(s.ref, s.oooHeadChunkID(i)))
+			addChunk(c.minTime, c.maxTime, ref)
+		}
+	}
+
+	// There is nothing to do if we did not collect any chunk
+	if len(tmpChks) == 0 {
+		return nil
+	}
+
+	// Next we want to sort all the collected chunks by min time so we can find
+	// those that overlap.
+	sort.Sort(metaByMinTimeAndMinRef(tmpChks))
+
+	// Next we want to iterate the sorted collected chunks and only return the
+	// chunks Meta the first chunk that overlaps with others.
+	// Example chunks of a series: 5:(100, 200) 6:(500, 600) 7:(150, 250) 8:(550, 650)
+	// In the example 5 overlaps with 7 and 6 overlaps with 8 so we only want to
+	// to return chunk Metas for chunk 5 and chunk 6
+	*chks = append(*chks, tmpChks[0])
+	maxTime := tmpChks[0].MaxTime // tracks the maxTime of the previous "to be merged chunk"
+	for _, c := range tmpChks[1:] {
+		if c.MinTime > maxTime {
+			*chks = append(*chks, c)
+			maxTime = c.MaxTime
+		} else if c.MaxTime > maxTime {
+			maxTime = c.MaxTime
+			(*chks)[len(*chks)-1].MaxTime = c.MaxTime
+		}
+	}
+
+	return nil
+}
+
+// LabelValues needs to be overridden from the headIndexReader implementation due
+// to the check that happens at the beginning where we make sure that the query
+// interval overlaps with the head minooot and maxooot.
+func (oh *OOOHeadIndexReader) LabelValues(name string, matchers ...*labels.Matcher) ([]string, error) {
+	if oh.maxt < oh.head.MinOOOTime() || oh.mint > oh.head.MaxOOOTime() {
+		return []string{}, nil
+	}
+
+	if len(matchers) == 0 {
+		return oh.head.postings.LabelValues(name), nil
+	}
+
+	return labelValuesWithMatchers(oh, name, matchers...)
+}
+
+type chunkMetaAndChunkDiskMapperRef struct {
+	meta     chunks.Meta
+	ref      chunks.ChunkDiskMapperRef
+	origMinT int64
+	origMaxT int64
+}
+
+type byMinTimeAndMinRef []chunkMetaAndChunkDiskMapperRef
+
+func (b byMinTimeAndMinRef) Len() int { return len(b) }
+func (b byMinTimeAndMinRef) Less(i, j int) bool {
+	if b[i].meta.MinTime == b[j].meta.MinTime {
+		return b[i].meta.Ref < b[j].meta.Ref
+	}
+	return b[i].meta.MinTime < b[j].meta.MinTime
+}
+
+func (b byMinTimeAndMinRef) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
+
+type metaByMinTimeAndMinRef []chunks.Meta
+
+func (b metaByMinTimeAndMinRef) Len() int { return len(b) }
+func (b metaByMinTimeAndMinRef) Less(i, j int) bool {
+	if b[i].MinTime == b[j].MinTime {
+		return b[i].Ref < b[j].Ref
+	}
+	return b[i].MinTime < b[j].MinTime
+}
+
+func (b metaByMinTimeAndMinRef) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
+
+func (oh *OOOHeadIndexReader) Postings(name string, values ...string) (index.Postings, error) {
+	switch len(values) {
+	case 0:
+		return index.EmptyPostings(), nil
+	case 1:
+		return oh.head.postings.Get(name, values[0]), nil // TODO(ganesh) Also call GetOOOPostings
+	default:
+		// TODO(ganesh) We want to only return postings for out of order series.
+		res := make([]index.Postings, 0, len(values))
+		for _, value := range values {
+			res = append(res, oh.head.postings.Get(name, value)) // TODO(ganesh) Also call GetOOOPostings
+		}
+		return index.Merge(res...), nil
+	}
+}
+
+type OOOHeadChunkReader struct {
+	head       *Head
+	mint, maxt int64
+}
+
+func NewOOOHeadChunkReader(head *Head, mint, maxt int64) *OOOHeadChunkReader {
+	return &OOOHeadChunkReader{
+		head: head,
+		mint: mint,
+		maxt: maxt,
+	}
+}
+
+func (cr OOOHeadChunkReader) Chunk(meta chunks.Meta) (chunkenc.Chunk, error) {
+	sid, _ := chunks.HeadChunkRef(meta.Ref).Unpack()
+
+	s := cr.head.series.getByID(sid)
+	// This means that the series has been garbage collected.
+	if s == nil {
+		return nil, storage.ErrNotFound
+	}
+
+	s.Lock()
+	c, err := s.oooMergedChunk(meta, cr.head.chunkDiskMapper, cr.mint, cr.maxt)
+	s.Unlock()
+	if err != nil {
+		return nil, err
+	}
+
+	// This means that the query range did not overlap with the requested chunk.
+	if len(c.chunks) == 0 {
+		return nil, storage.ErrNotFound
+	}
+
+	return c, nil
+}
+
+func (cr OOOHeadChunkReader) Close() error {
+	return nil
+}
+
+type OOOCompactionHead struct {
+	oooIR       *OOOHeadIndexReader
+	lastMmapRef chunks.ChunkDiskMapperRef
+	lastWBLFile int
+	postings    []storage.SeriesRef
+	chunkRange  int64
+	mint, maxt  int64 // Among all the compactable chunks.
+}
+
+// NewOOOCompactionHead does the following:
+// 1. M-maps all the in-memory ooo chunks.
+// 2. Compute the expected block ranges while iterating through all ooo series and store it.
+// 3. Store the list of postings having ooo series.
+// 4. Cuts a new WBL file for the OOO WBL.
+// All the above together have a bit of CPU and memory overhead, and can have a bit of impact
+// on the sample append latency. So call NewOOOCompactionHead only right before compaction.
+func NewOOOCompactionHead(head *Head) (*OOOCompactionHead, error) {
+	newWBLFile, err := head.wbl.NextSegmentSync()
+	if err != nil {
+		return nil, err
+	}
+
+	ch := &OOOCompactionHead{
+		chunkRange:  head.chunkRange.Load(),
+		mint:        math.MaxInt64,
+		maxt:        math.MinInt64,
+		lastWBLFile: newWBLFile,
+	}
+
+	ch.oooIR = NewOOOHeadIndexReader(head, math.MinInt64, math.MaxInt64)
+	n, v := index.AllPostingsKey()
+
+	// TODO: verify this gets only ooo samples.
+	p, err := ch.oooIR.Postings(n, v)
+	if err != nil {
+		return nil, err
+	}
+	p = ch.oooIR.SortedPostings(p)
+
+	var lastSeq, lastOff int
+	for p.Next() {
+		seriesRef := p.At()
+		ms := head.series.getByID(chunks.HeadSeriesRef(seriesRef))
+		if ms == nil {
+			continue
+		}
+
+		// M-map the in-memory chunk and keep track of the last one.
+		// Also build the block ranges -> series map.
+		// TODO: consider having a lock specifically for ooo data.
+		ms.Lock()
+
+		mmapRef := ms.mmapCurrentOOOHeadChunk(head.chunkDiskMapper)
+		if mmapRef == 0 && len(ms.oooMmappedChunks) > 0 {
+			// Nothing was m-mapped. So take the mmapRef from the existing slice if it exists.
+			mmapRef = ms.oooMmappedChunks[len(ms.oooMmappedChunks)-1].ref
+		}
+		seq, off := mmapRef.Unpack()
+		if seq > lastSeq || (seq == lastSeq && off > lastOff) {
+			ch.lastMmapRef, lastSeq, lastOff = mmapRef, seq, off
+		}
+		if len(ms.oooMmappedChunks) > 0 {
+			ch.postings = append(ch.postings, seriesRef)
+			for _, c := range ms.oooMmappedChunks {
+				if c.minTime < ch.mint {
+					ch.mint = c.minTime
+				}
+				if c.maxTime > ch.maxt {
+					ch.maxt = c.maxTime
+				}
+			}
+		}
+		ms.Unlock()
+	}
+
+	return ch, nil
+}
+
+func (ch *OOOCompactionHead) Index() (IndexReader, error) {
+	return NewOOOCompactionHeadIndexReader(ch), nil
+}
+
+func (ch *OOOCompactionHead) Chunks() (ChunkReader, error) {
+	return NewOOOHeadChunkReader(ch.oooIR.head, ch.oooIR.mint, ch.oooIR.maxt), nil
+}
+
+func (ch *OOOCompactionHead) Tombstones() (tombstones.Reader, error) {
+	return tombstones.NewMemTombstones(), nil
+}
+
+func (ch *OOOCompactionHead) Meta() BlockMeta {
+	var id [16]byte
+	copy(id[:], "copy(id[:], \"ooo_compact_head\")")
+	return BlockMeta{
+		MinTime: ch.mint,
+		MaxTime: ch.maxt,
+		ULID:    id,
+		Stats: BlockStats{
+			NumSeries: uint64(len(ch.postings)),
+		},
+	}
+}
+
+// CloneForTimeRange clones the OOOCompactionHead such that the IndexReader and ChunkReader
+// obtained from this only looks at the m-map chunks within the given time ranges while not looking
+// beyond the ch.lastMmapRef.
+// Only the method of BlockReader interface are valid for the cloned OOOCompactionHead.
+func (ch *OOOCompactionHead) CloneForTimeRange(mint, maxt int64) *OOOCompactionHead {
+	return &OOOCompactionHead{
+		oooIR:       NewOOOHeadIndexReader(ch.oooIR.head, mint, maxt),
+		lastMmapRef: ch.lastMmapRef,
+		postings:    ch.postings,
+		chunkRange:  ch.chunkRange,
+		mint:        ch.mint,
+		maxt:        ch.maxt,
+	}
+}
+
+func (ch *OOOCompactionHead) Size() int64                            { return 0 }
+func (ch *OOOCompactionHead) MinTime() int64                         { return ch.mint }
+func (ch *OOOCompactionHead) MaxTime() int64                         { return ch.maxt }
+func (ch *OOOCompactionHead) ChunkRange() int64                      { return ch.chunkRange }
+func (ch *OOOCompactionHead) LastMmapRef() chunks.ChunkDiskMapperRef { return ch.lastMmapRef }
+func (ch *OOOCompactionHead) LastWBLFile() int                       { return ch.lastWBLFile }
+
+type OOOCompactionHeadIndexReader struct {
+	ch *OOOCompactionHead
+}
+
+func NewOOOCompactionHeadIndexReader(ch *OOOCompactionHead) IndexReader {
+	return &OOOCompactionHeadIndexReader{ch: ch}
+}
+
+func (ir *OOOCompactionHeadIndexReader) Symbols() index.StringIter {
+	return ir.ch.oooIR.Symbols()
+}
+
+func (ir *OOOCompactionHeadIndexReader) Postings(name string, values ...string) (index.Postings, error) {
+	n, v := index.AllPostingsKey()
+	if name != n || len(values) != 1 || values[0] != v {
+		return nil, errors.New("only AllPostingsKey is supported")
+	}
+	return index.NewListPostings(ir.ch.postings), nil
+}
+
+func (ir *OOOCompactionHeadIndexReader) SortedPostings(p index.Postings) index.Postings {
+	// This will already be sorted from the Postings() call above.
+	return p
+}
+
+func (ir *OOOCompactionHeadIndexReader) Series(ref storage.SeriesRef, lset *labels.Labels, chks *[]chunks.Meta) error {
+	return ir.ch.oooIR.series(ref, lset, chks, ir.ch.lastMmapRef)
+}
+
+func (ir *OOOCompactionHeadIndexReader) SortedLabelValues(name string, matchers ...*labels.Matcher) ([]string, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (ir *OOOCompactionHeadIndexReader) LabelValues(name string, matchers ...*labels.Matcher) ([]string, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (ir *OOOCompactionHeadIndexReader) PostingsForMatchers(concurrent bool, ms ...*labels.Matcher) (index.Postings, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (ir *OOOCompactionHeadIndexReader) LabelNames(matchers ...*labels.Matcher) ([]string, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (ir *OOOCompactionHeadIndexReader) LabelValueFor(id storage.SeriesRef, label string) (string, error) {
+	return "", errors.New("not implemented")
+}
+
+func (ir *OOOCompactionHeadIndexReader) LabelNamesFor(ids ...storage.SeriesRef) ([]string, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (ir *OOOCompactionHeadIndexReader) Close() error {
+	return ir.ch.oooIR.Close()
+}
diff --git a/tsdb/ooo_head_read_test.go b/tsdb/ooo_head_read_test.go
new file mode 100644
index 0000000000..486ca31f3f
--- /dev/null
+++ b/tsdb/ooo_head_read_test.go
@@ -0,0 +1,1207 @@
+// Copyright 2022 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tsdb
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"sort"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/prometheus/prometheus/model/labels"
+	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/tsdb/chunkenc"
+	"github.com/prometheus/prometheus/tsdb/chunks"
+	"github.com/prometheus/prometheus/tsdb/tsdbutil"
+)
+
+type chunkInterval struct {
+	// because we permutate the order of chunks, we cannot determine at test declaration time which chunkRefs we expect in the Output.
+	// This ID matches expected output chunks against test input chunks, the test runner will assert the chunkRef for the matching chunk
+	ID   int
+	mint int64
+	maxt int64
+}
+
+// permutateChunkIntervals returns all possible orders of the given chunkIntervals
+func permutateChunkIntervals(in []chunkInterval, out [][]chunkInterval, left, right int) [][]chunkInterval {
+	if left == right {
+		inCopy := make([]chunkInterval, len(in))
+		copy(inCopy, in)
+		return append(out, inCopy)
+	}
+	for i := left; i <= right; i++ {
+		in[left], in[i] = in[i], in[left]
+		out = permutateChunkIntervals(in, out, left+1, right)
+		in[left], in[i] = in[i], in[left]
+	}
+	return out
+}
+
+// TestOOOHeadIndexReader_Series tests that the Series method works as expected.
+// However it does so by creating chunks and memory mapping them unlike other
+// tests of the head where samples are appended and we let the head memory map.
+// We do this because the ingestion path and the appender for out of order
+// samples are not ready yet.
+func TestOOOHeadIndexReader_Series(t *testing.T) {
+	tests := []struct {
+		name                string
+		queryMinT           int64
+		queryMaxT           int64
+		inputChunkIntervals []chunkInterval
+		expChunks           []chunkInterval
+	}{
+		{
+			name:      "Empty result and no error when head is empty",
+			queryMinT: 0,
+			queryMaxT: 100,
+			expChunks: nil,
+		},
+		{
+			name:      "If query interval is bigger than the existing chunks nothing is returned",
+			queryMinT: 500,
+			queryMaxT: 700,
+			inputChunkIntervals: []chunkInterval{
+				{0, 100, 400},
+			},
+			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700
+			// Query Interval                                                                                                  [---------------------------------------]
+			// Chunk 0                         [-----------------------------------------------------------]
+			expChunks: nil,
+		},
+		{
+			name:      "If query interval is smaller than the existing chunks nothing is returned",
+			queryMinT: 100,
+			queryMaxT: 400,
+			inputChunkIntervals: []chunkInterval{
+				{0, 500, 700},
+			},
+			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700
+			// Query Interval                [-----------------------------------------------------------]
+			// Chunk 0:                                                                                                        [---------------------------------------]
+			expChunks: nil,
+		},
+		{
+			name:      "If query interval exceeds the existing chunk, it is returned",
+			queryMinT: 100,
+			queryMaxT: 400,
+			inputChunkIntervals: []chunkInterval{
+				{0, 150, 350},
+			},
+			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700
+			// Query Interval                [-----------------------------------------------------------]
+			// Chunk 0:                                 [---------------------------------------]
+			expChunks: []chunkInterval{
+				{0, 150, 350},
+			},
+		},
+		{
+			name:      "If chunk exceeds the query interval, it is returned",
+			queryMinT: 150,
+			queryMaxT: 350,
+			inputChunkIntervals: []chunkInterval{
+				{0, 100, 400},
+			},
+			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700
+			// Query Interval:                          [---------------------------------------]
+			// Chunk 0:                       [-----------------------------------------------------------]
+			expChunks: []chunkInterval{
+				{0, 100, 400},
+			},
+		},
+		{
+			name:      "Pairwise overlaps should return the references of the first of each pair",
+			queryMinT: 0,
+			queryMaxT: 700,
+			inputChunkIntervals: []chunkInterval{
+				{0, 100, 200},
+				{1, 500, 600},
+				{2, 150, 250},
+				{3, 550, 650},
+			},
+			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700
+			// Query Interval        [---------------------------------------------------------------------------------------------------------------------------------]
+			// Chunk 0:                        [-------------------]
+			// Chunk 1:                                                                                                        [-------------------]
+			// Chunk 2:                                  [-------------------]
+			// Chunk 3:                                                                                                                  [-------------------]
+			// Output Graphically              [-----------------------------]                                                 [-----------------------------]
+			expChunks: []chunkInterval{
+				{0, 100, 250},
+				{1, 500, 650},
+			},
+		},
+		{
+			name:      "If all chunks overlap, single big chunk is returned",
+			queryMinT: 0,
+			queryMaxT: 700,
+			inputChunkIntervals: []chunkInterval{
+				{0, 100, 200},
+				{1, 200, 300},
+				{2, 300, 400},
+				{3, 400, 500},
+			},
+			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700
+			// Query Interval        [---------------------------------------------------------------------------------------------------------------------------------]
+			// Chunk 0:                        [-------------------]
+			// Chunk 1:                                            [-------------------]
+			// Chunk 2:                                                                [-------------------]
+			// Chunk 3:                                                                                    [------------------]
+			// Output Graphically              [------------------------------------------------------------------------------]
+			expChunks: []chunkInterval{
+				{0, 100, 500},
+			},
+		},
+		{
+			name:      "If no chunks overlap, all chunks are returned",
+			queryMinT: 0,
+			queryMaxT: 700,
+			inputChunkIntervals: []chunkInterval{
+				{0, 100, 199},
+				{1, 200, 299},
+				{2, 300, 399},
+				{3, 400, 499},
+			},
+			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700
+			// Query Interval        [---------------------------------------------------------------------------------------------------------------------------------]
+			// Chunk 0:                        [------------------]
+			// Chunk 1:                                            [------------------]
+			// Chunk 2:                                                                [------------------]
+			// Chunk 3:                                                                                    [------------------]
+			// Output Graphically              [------------------][------------------][------------------][------------------]
+			expChunks: []chunkInterval{
+				{0, 100, 199},
+				{1, 200, 299},
+				{2, 300, 399},
+				{3, 400, 499},
+			},
+		},
+		{
+			name:      "Triplet with pairwise overlaps, query range covers all, and distractor extra chunk",
+			queryMinT: 0,
+			queryMaxT: 400,
+			inputChunkIntervals: []chunkInterval{
+				{0, 100, 200},
+				{1, 150, 300},
+				{2, 250, 350},
+				{3, 450, 550},
+			},
+			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700
+			// Query Interval        [--------------------------------------------------------------------]
+			// Chunk 0:                        [------------------]
+			// Chunk 1:                                 [-----------------------------]
+			// Chunk 2:                                                     [------------------]
+			// Chunk 3:                                                                                             [------------------]
+			// Output Graphically              [-----------------------------------------------]
+			expChunks: []chunkInterval{
+				{0, 100, 350},
+			},
+		},
+		{
+			name:      "Query interval partially overlaps some chunks",
+			queryMinT: 100,
+			queryMaxT: 400,
+			inputChunkIntervals: []chunkInterval{
+				{0, 250, 500},
+				{1, 0, 200},
+				{2, 150, 300},
+			},
+			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700
+			// Query Interval                [------------------------------------------------------------]
+			// Chunk 0:                                                     [-------------------------------------------------]
+			// Chunk 1:             [-----------------------------]
+			// Chunk 2:                                [------------------------------]
+			// Output Graphically   [-----------------------------------------------------------------------------------------]
+			expChunks: []chunkInterval{
+				{1, 0, 500},
+			},
+		},
+		{
+			name:      "A full overlap pair and disjointed triplet",
+			queryMinT: 0,
+			queryMaxT: 900,
+			inputChunkIntervals: []chunkInterval{
+				{0, 100, 300},
+				{1, 770, 850},
+				{2, 150, 250},
+				{3, 650, 750},
+				{4, 600, 800},
+			},
+			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700       750       800       850
+			// Query Interval        [---------------------------------------------------------------------------------------------------------------------------------------------------------------]
+			// Chunk 0:                        [---------------------------------------]
+			// Chunk 1:                                                                                                                                                               [--------------]
+			// Chunk 2:                                  [-------------------]
+			// Chunk 3:                                                                                                                                      [-------------------]
+			// Chunk 4:                                                                                                                             [---------------------------------------]
+			// Output Graphically              [---------------------------------------]                                                            [------------------------------------------------]
+			expChunks: []chunkInterval{
+				{0, 100, 300},
+				{4, 600, 850},
+			},
+		},
+		{
+			name:      "Query range covers 3 disjoint chunks",
+			queryMinT: 0,
+			queryMaxT: 650,
+			inputChunkIntervals: []chunkInterval{
+				{0, 100, 150},
+				{1, 300, 350},
+				{2, 200, 250},
+			},
+			// ts                    0       100       150       200       250       300       350       400       450       500       550       600       650       700       750       800       850
+			// Query Interval        [----------------------------------------------------------------------------------------------------------------------]
+			// Chunk 0:                        [-------]
+			// Chunk 1:                                                              [----------]
+			// Chunk 2:                                           [--------]
+			// Output Graphically              [-------]          [--------]         [----------]
+			expChunks: []chunkInterval{
+				{0, 100, 150},
+				{1, 300, 350},
+				{2, 200, 250},
+			},
+		},
+	}
+
+	s1Lset := labels.FromStrings("foo", "bar")
+	s1ID := uint64(1)
+
+	for _, tc := range tests {
+		var permutations [][]chunkInterval
+		if len(tc.inputChunkIntervals) == 0 {
+			// handle special case
+			permutations = [][]chunkInterval{
+				nil,
+			}
+		} else {
+			permutations = permutateChunkIntervals(tc.inputChunkIntervals, nil, 0, len(tc.inputChunkIntervals)-1)
+		}
+		for perm, intervals := range permutations {
+			for _, headChunk := range []bool{false, true} {
+				t.Run(fmt.Sprintf("name=%s, permutation=%d, headChunk=%t", tc.name, perm, headChunk), func(t *testing.T) {
+					h, _ := newTestHead(t, 1000, false, true)
+					defer func() {
+						require.NoError(t, h.Close())
+					}()
+					require.NoError(t, h.Init(0))
+
+					s1, _, _ := h.getOrCreate(s1ID, s1Lset)
+
+					var lastChunk chunkInterval
+					var lastChunkPos int
+
+					// the marker should be set based on whichever is the last chunk/interval that overlaps with the query range
+					for i, interv := range intervals {
+						if overlapsClosedInterval(interv.mint, interv.maxt, tc.queryMinT, tc.queryMaxT) {
+							lastChunk = interv
+							lastChunkPos = i
+						}
+					}
+					lastChunkRef := chunks.ChunkRef(chunks.NewHeadChunkRef(1, chunks.HeadChunkID(uint64(lastChunkPos))))
+
+					// define our expected chunks, by looking at the expected ChunkIntervals and setting...
+					var expChunks []chunks.Meta
+					for _, e := range tc.expChunks {
+						meta := chunks.Meta{
+							Chunk:   chunkenc.Chunk(nil),
+							MinTime: e.mint,
+							MaxTime: e.maxt,
+							// markers based on the last chunk we found above
+							OOOLastMinTime: lastChunk.mint,
+							OOOLastMaxTime: lastChunk.maxt,
+							OOOLastRef:     lastChunkRef,
+						}
+
+						// Ref to whatever Ref the chunk has, that we refer to by ID
+						for ref, c := range intervals {
+							if c.ID == e.ID {
+								meta.Ref = chunks.ChunkRef(chunks.NewHeadChunkRef(chunks.HeadSeriesRef(s1ID), chunks.HeadChunkID(ref)))
+								break
+							}
+						}
+						expChunks = append(expChunks, meta)
+					}
+					sort.Sort(metaByMinTimeAndMinRef(expChunks)) // we always want the chunks to come back sorted by minTime asc
+
+					if headChunk && len(intervals) > 0 {
+						// Put the last interval in the head chunk
+						s1.oooHeadChunk = &oooHeadChunk{
+							minTime: intervals[len(intervals)-1].mint,
+							maxTime: intervals[len(intervals)-1].maxt,
+						}
+						intervals = intervals[:len(intervals)-1]
+					}
+
+					for _, ic := range intervals {
+						s1.oooMmappedChunks = append(s1.oooMmappedChunks, &mmappedChunk{
+							minTime: ic.mint,
+							maxTime: ic.maxt,
+						})
+					}
+
+					ir := NewOOOHeadIndexReader(h, tc.queryMinT, tc.queryMaxT)
+
+					var chks []chunks.Meta
+					var respLset labels.Labels
+					err := ir.Series(storage.SeriesRef(s1ID), &respLset, &chks)
+					require.NoError(t, err)
+					require.Equal(t, s1Lset, respLset)
+					require.Equal(t, expChunks, chks)
+
+					err = ir.Series(storage.SeriesRef(s1ID+1), &respLset, &chks)
+					require.Equal(t, storage.ErrNotFound, err)
+				})
+			}
+		}
+	}
+}
+
+func TestOOOHeadChunkReader_LabelValues(t *testing.T) {
+	chunkRange := int64(2000)
+	head, _ := newTestHead(t, chunkRange, false, true)
+	t.Cleanup(func() { require.NoError(t, head.Close()) })
+
+	app := head.Appender(context.Background())
+
+	// Add in-order samples
+	_, err := app.Append(0, labels.Labels{
+		{Name: "foo", Value: "bar1"},
+	}, 100, 1)
+	require.NoError(t, err)
+	_, err = app.Append(0, labels.Labels{
+		{Name: "foo", Value: "bar2"},
+	}, 100, 2)
+	require.NoError(t, err)
+
+	// Add ooo samples for those series
+	_, err = app.Append(0, labels.Labels{
+		{Name: "foo", Value: "bar1"},
+	}, 90, 1)
+	require.NoError(t, err)
+	_, err = app.Append(0, labels.Labels{
+		{Name: "foo", Value: "bar2"},
+	}, 90, 2)
+	require.NoError(t, err)
+
+	require.NoError(t, app.Commit())
+
+	cases := []struct {
+		name       string
+		queryMinT  int64
+		queryMaxT  int64
+		expValues1 []string
+		expValues2 []string
+		expValues3 []string
+		expValues4 []string
+	}{
+		{
+			name:       "LabelValues calls when ooo head has max query range",
+			queryMinT:  math.MinInt64,
+			queryMaxT:  math.MaxInt64,
+			expValues1: []string{"bar1"},
+			expValues2: []string{},
+			expValues3: []string{"bar1", "bar2"},
+			expValues4: []string{"bar1", "bar2"},
+		},
+		{
+			name:       "LabelValues calls with ooo head query range not overlapping in-order data",
+			queryMinT:  90,
+			queryMaxT:  90,
+			expValues1: []string{"bar1"},
+			expValues2: []string{},
+			expValues3: []string{"bar1", "bar2"},
+			expValues4: []string{"bar1", "bar2"},
+		},
+		{
+			name:       "LabelValues calls with ooo head query range not overlapping out-of-order data",
+			queryMinT:  100,
+			queryMaxT:  100,
+			expValues1: []string{},
+			expValues2: []string{},
+			expValues3: []string{},
+			expValues4: []string{},
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			// We first want to test using a head index reader that covers the biggest query interval
+			oh := NewOOOHeadIndexReader(head, tc.queryMinT, tc.queryMaxT)
+			matchers := []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "foo", "bar1")}
+			values, err := oh.LabelValues("foo", matchers...)
+			sort.Strings(values)
+			require.NoError(t, err)
+			require.Equal(t, tc.expValues1, values)
+
+			matchers = []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "foo", "^bar.")}
+			values, err = oh.LabelValues("foo", matchers...)
+			sort.Strings(values)
+			require.NoError(t, err)
+			require.Equal(t, tc.expValues2, values)
+
+			matchers = []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.")}
+			values, err = oh.LabelValues("foo", matchers...)
+			sort.Strings(values)
+			require.NoError(t, err)
+			require.Equal(t, tc.expValues3, values)
+
+			values, err = oh.LabelValues("foo")
+			sort.Strings(values)
+			require.NoError(t, err)
+			require.Equal(t, tc.expValues4, values)
+		})
+	}
+}
+
+// TestOOOHeadChunkReader_Chunk tests that the Chunk method works as expected.
+// It does so by appending out of order samples to the db and then initializing
+// an OOOHeadChunkReader to read chunks from it.
+func TestOOOHeadChunkReader_Chunk(t *testing.T) {
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 5
+	opts.OutOfOrderTimeWindow = 120 * time.Minute.Milliseconds()
+
+	s1 := labels.FromStrings("l", "v1")
+	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
+
+	appendSample := func(app storage.Appender, l labels.Labels, timestamp int64, value float64) storage.SeriesRef {
+		ref, err := app.Append(0, l, timestamp, value)
+		require.NoError(t, err)
+		return ref
+	}
+
+	t.Run("Getting a non existing chunk fails with not found error", func(t *testing.T) {
+		db := newTestDBWithOpts(t, opts)
+
+		cr := NewOOOHeadChunkReader(db.head, 0, 1000)
+		c, err := cr.Chunk(chunks.Meta{
+			Ref: 0x1000000, Chunk: chunkenc.Chunk(nil), MinTime: 100, MaxTime: 300,
+		})
+		require.Equal(t, err, fmt.Errorf("not found"))
+		require.Equal(t, c, nil)
+	})
+
+	tests := []struct {
+		name                 string
+		queryMinT            int64
+		queryMaxT            int64
+		firstInOrderSampleAt int64
+		inputSamples         tsdbutil.SampleSlice
+		expChunkError        bool
+		expChunksSamples     []tsdbutil.SampleSlice
+	}{
+		{
+			name:                 "Getting the head when there are no overlapping chunks returns just the samples in the head",
+			queryMinT:            minutes(0),
+			queryMaxT:            minutes(100),
+			firstInOrderSampleAt: minutes(120),
+			inputSamples: tsdbutil.SampleSlice{
+				sample{t: minutes(30), v: float64(0)},
+				sample{t: minutes(40), v: float64(0)},
+			},
+			expChunkError: false,
+			// ts (in minutes)         0       10       20       30       40       50       60       70       80       90       100
+			// Query Interval          [------------------------------------------------------------------------------------------]
+			// Chunk 0: Current Head                              [--------] (With 2 samples)
+			// Output Graphically                                 [--------] (With 2 samples)
+			expChunksSamples: []tsdbutil.SampleSlice{
+				{
+					sample{t: minutes(30), v: float64(0)},
+					sample{t: minutes(40), v: float64(0)},
+				},
+			},
+		},
+		{
+			name:                 "Getting the head chunk when there are overlapping chunks returns all combined",
+			queryMinT:            minutes(0),
+			queryMaxT:            minutes(100),
+			firstInOrderSampleAt: minutes(120),
+			inputSamples: tsdbutil.SampleSlice{
+				// opts.OOOCapMax is 5 so these will be mmapped to the first mmapped chunk
+				sample{t: minutes(41), v: float64(0)},
+				sample{t: minutes(42), v: float64(0)},
+				sample{t: minutes(43), v: float64(0)},
+				sample{t: minutes(44), v: float64(0)},
+				sample{t: minutes(45), v: float64(0)},
+				// The following samples will go to the head chunk, and we want it
+				// to overlap with the previous chunk
+				sample{t: minutes(30), v: float64(1)},
+				sample{t: minutes(50), v: float64(1)},
+			},
+			expChunkError: false,
+			// ts (in minutes)         0       10       20       30       40       50       60       70       80       90       100
+			// Query Interval          [------------------------------------------------------------------------------------------]
+			// Chunk 0                                                     [---] (With 5 samples)
+			// Chunk 1: Current Head                              [-----------------] (With 2 samples)
+			// Output Graphically                                 [-----------------] (With 7 samples)
+			expChunksSamples: []tsdbutil.SampleSlice{
+				{
+					sample{t: minutes(30), v: float64(1)},
+					sample{t: minutes(41), v: float64(0)},
+					sample{t: minutes(42), v: float64(0)},
+					sample{t: minutes(43), v: float64(0)},
+					sample{t: minutes(44), v: float64(0)},
+					sample{t: minutes(45), v: float64(0)},
+					sample{t: minutes(50), v: float64(1)},
+				},
+			},
+		},
+		{
+			name:                 "Two windows of overlapping chunks get properly converged",
+			queryMinT:            minutes(0),
+			queryMaxT:            minutes(100),
+			firstInOrderSampleAt: minutes(120),
+			inputSamples: tsdbutil.SampleSlice{
+				// Chunk 0
+				sample{t: minutes(10), v: float64(0)},
+				sample{t: minutes(12), v: float64(0)},
+				sample{t: minutes(14), v: float64(0)},
+				sample{t: minutes(16), v: float64(0)},
+				sample{t: minutes(20), v: float64(0)},
+				// Chunk 1
+				sample{t: minutes(20), v: float64(1)},
+				sample{t: minutes(22), v: float64(1)},
+				sample{t: minutes(24), v: float64(1)},
+				sample{t: minutes(26), v: float64(1)},
+				sample{t: minutes(29), v: float64(1)},
+				// Chunk 2
+				sample{t: minutes(30), v: float64(2)},
+				sample{t: minutes(32), v: float64(2)},
+				sample{t: minutes(34), v: float64(2)},
+				sample{t: minutes(36), v: float64(2)},
+				sample{t: minutes(40), v: float64(2)},
+				// Head
+				sample{t: minutes(40), v: float64(3)},
+				sample{t: minutes(50), v: float64(3)},
+			},
+			expChunkError: false,
+			// ts (in minutes)         0       10       20       30       40       50       60       70       80       90       100
+			// Query Interval          [------------------------------------------------------------------------------------------]
+			// Chunk 0                          [--------]
+			// Chunk 1                                   [-------]
+			// Chunk 2                                            [--------]
+			// Chunk 3: Current Head                                       [--------]
+			// Output Graphically               [----------------][-----------------]
+			expChunksSamples: []tsdbutil.SampleSlice{
+				{
+					sample{t: minutes(10), v: float64(0)},
+					sample{t: minutes(12), v: float64(0)},
+					sample{t: minutes(14), v: float64(0)},
+					sample{t: minutes(16), v: float64(0)},
+					sample{t: minutes(20), v: float64(1)},
+					sample{t: minutes(22), v: float64(1)},
+					sample{t: minutes(24), v: float64(1)},
+					sample{t: minutes(26), v: float64(1)},
+					sample{t: minutes(29), v: float64(1)},
+				},
+				{
+					sample{t: minutes(30), v: float64(2)},
+					sample{t: minutes(32), v: float64(2)},
+					sample{t: minutes(34), v: float64(2)},
+					sample{t: minutes(36), v: float64(2)},
+					sample{t: minutes(40), v: float64(3)},
+					sample{t: minutes(50), v: float64(3)},
+				},
+			},
+		},
+		{
+			name:                 "Two windows of overlapping chunks in descending order get properly converged",
+			queryMinT:            minutes(0),
+			queryMaxT:            minutes(100),
+			firstInOrderSampleAt: minutes(120),
+			inputSamples: tsdbutil.SampleSlice{
+				// Chunk 0
+				sample{t: minutes(40), v: float64(0)},
+				sample{t: minutes(42), v: float64(0)},
+				sample{t: minutes(44), v: float64(0)},
+				sample{t: minutes(46), v: float64(0)},
+				sample{t: minutes(50), v: float64(0)},
+				// Chunk 1
+				sample{t: minutes(30), v: float64(1)},
+				sample{t: minutes(32), v: float64(1)},
+				sample{t: minutes(34), v: float64(1)},
+				sample{t: minutes(36), v: float64(1)},
+				sample{t: minutes(40), v: float64(1)},
+				// Chunk 2
+				sample{t: minutes(20), v: float64(2)},
+				sample{t: minutes(22), v: float64(2)},
+				sample{t: minutes(24), v: float64(2)},
+				sample{t: minutes(26), v: float64(2)},
+				sample{t: minutes(29), v: float64(2)},
+				// Head
+				sample{t: minutes(10), v: float64(3)},
+				sample{t: minutes(20), v: float64(3)},
+			},
+			expChunkError: false,
+			// ts (in minutes)         0       10       20       30       40       50       60       70       80       90       100
+			// Query Interval          [------------------------------------------------------------------------------------------]
+			// Chunk 0                                                     [--------]
+			// Chunk 1                                            [--------]
+			// Chunk 2                                   [-------]
+			// Chunk 3: Current Head            [--------]
+			// Output Graphically               [----------------][-----------------]
+			expChunksSamples: []tsdbutil.SampleSlice{
+				{
+					sample{t: minutes(10), v: float64(3)},
+					sample{t: minutes(20), v: float64(2)},
+					sample{t: minutes(22), v: float64(2)},
+					sample{t: minutes(24), v: float64(2)},
+					sample{t: minutes(26), v: float64(2)},
+					sample{t: minutes(29), v: float64(2)},
+				},
+				{
+					sample{t: minutes(30), v: float64(1)},
+					sample{t: minutes(32), v: float64(1)},
+					sample{t: minutes(34), v: float64(1)},
+					sample{t: minutes(36), v: float64(1)},
+					sample{t: minutes(40), v: float64(0)},
+					sample{t: minutes(42), v: float64(0)},
+					sample{t: minutes(44), v: float64(0)},
+					sample{t: minutes(46), v: float64(0)},
+					sample{t: minutes(50), v: float64(0)},
+				},
+			},
+		},
+		{
+			name:                 "If chunks are not overlapped they are not converged",
+			queryMinT:            minutes(0),
+			queryMaxT:            minutes(100),
+			firstInOrderSampleAt: minutes(120),
+			inputSamples: tsdbutil.SampleSlice{
+				// Chunk 0
+				sample{t: minutes(10), v: float64(0)},
+				sample{t: minutes(12), v: float64(0)},
+				sample{t: minutes(14), v: float64(0)},
+				sample{t: minutes(16), v: float64(0)},
+				sample{t: minutes(18), v: float64(0)},
+				// Chunk 1
+				sample{t: minutes(20), v: float64(1)},
+				sample{t: minutes(22), v: float64(1)},
+				sample{t: minutes(24), v: float64(1)},
+				sample{t: minutes(26), v: float64(1)},
+				sample{t: minutes(28), v: float64(1)},
+				// Chunk 2
+				sample{t: minutes(30), v: float64(2)},
+				sample{t: minutes(32), v: float64(2)},
+				sample{t: minutes(34), v: float64(2)},
+				sample{t: minutes(36), v: float64(2)},
+				sample{t: minutes(38), v: float64(2)},
+				// Head
+				sample{t: minutes(40), v: float64(3)},
+				sample{t: minutes(42), v: float64(3)},
+			},
+			expChunkError: false,
+			// ts (in minutes)         0       10       20       30       40       50       60       70       80       90       100
+			// Query Interval          [------------------------------------------------------------------------------------------]
+			// Chunk 0                          [-------]
+			// Chunk 1                                   [-------]
+			// Chunk 2                                            [-------]
+			// Chunk 3: Current Head                                       [-------]
+			// Output Graphically               [-------][-------][-------][--------]
+			expChunksSamples: []tsdbutil.SampleSlice{
+				{
+					sample{t: minutes(10), v: float64(0)},
+					sample{t: minutes(12), v: float64(0)},
+					sample{t: minutes(14), v: float64(0)},
+					sample{t: minutes(16), v: float64(0)},
+					sample{t: minutes(18), v: float64(0)},
+				},
+				{
+					sample{t: minutes(20), v: float64(1)},
+					sample{t: minutes(22), v: float64(1)},
+					sample{t: minutes(24), v: float64(1)},
+					sample{t: minutes(26), v: float64(1)},
+					sample{t: minutes(28), v: float64(1)},
+				},
+				{
+					sample{t: minutes(30), v: float64(2)},
+					sample{t: minutes(32), v: float64(2)},
+					sample{t: minutes(34), v: float64(2)},
+					sample{t: minutes(36), v: float64(2)},
+					sample{t: minutes(38), v: float64(2)},
+				},
+				{
+					sample{t: minutes(40), v: float64(3)},
+					sample{t: minutes(42), v: float64(3)},
+				},
+			},
+		},
+		{
+			name:                 "Triplet of chunks overlapping returns a single merged chunk",
+			queryMinT:            minutes(0),
+			queryMaxT:            minutes(100),
+			firstInOrderSampleAt: minutes(120),
+			inputSamples: tsdbutil.SampleSlice{
+				// Chunk 0
+				sample{t: minutes(10), v: float64(0)},
+				sample{t: minutes(15), v: float64(0)},
+				sample{t: minutes(20), v: float64(0)},
+				sample{t: minutes(25), v: float64(0)},
+				sample{t: minutes(30), v: float64(0)},
+				// Chunk 1
+				sample{t: minutes(20), v: float64(1)},
+				sample{t: minutes(25), v: float64(1)},
+				sample{t: minutes(30), v: float64(1)},
+				sample{t: minutes(35), v: float64(1)},
+				sample{t: minutes(42), v: float64(1)},
+				// Chunk 2 Head
+				sample{t: minutes(32), v: float64(2)},
+				sample{t: minutes(50), v: float64(2)},
+			},
+			expChunkError: false,
+			// ts (in minutes)         0       10       20       30       40       50       60       70       80       90       100
+			// Query Interval          [------------------------------------------------------------------------------------------]
+			// Chunk 0                          [-----------------]
+			// Chunk 1                                   [--------------------]
+			// Chunk 2 Current Head                                  [--------------]
+			// Output Graphically               [-----------------------------------]
+			expChunksSamples: []tsdbutil.SampleSlice{
+				{
+					sample{t: minutes(10), v: float64(0)},
+					sample{t: minutes(15), v: float64(0)},
+					sample{t: minutes(20), v: float64(1)},
+					sample{t: minutes(25), v: float64(1)},
+					sample{t: minutes(30), v: float64(1)},
+					sample{t: minutes(32), v: float64(2)},
+					sample{t: minutes(35), v: float64(1)},
+					sample{t: minutes(42), v: float64(1)},
+					sample{t: minutes(50), v: float64(2)},
+				},
+			},
+		},
+		{
+			name:                 "Query interval partially overlaps with a triplet of chunks but still returns a single merged chunk",
+			queryMinT:            minutes(12),
+			queryMaxT:            minutes(33),
+			firstInOrderSampleAt: minutes(120),
+			inputSamples: tsdbutil.SampleSlice{
+				// Chunk 0
+				sample{t: minutes(10), v: float64(0)},
+				sample{t: minutes(15), v: float64(0)},
+				sample{t: minutes(20), v: float64(0)},
+				sample{t: minutes(25), v: float64(0)},
+				sample{t: minutes(30), v: float64(0)},
+				// Chunk 1
+				sample{t: minutes(20), v: float64(1)},
+				sample{t: minutes(25), v: float64(1)},
+				sample{t: minutes(30), v: float64(1)},
+				sample{t: minutes(35), v: float64(1)},
+				sample{t: minutes(42), v: float64(1)},
+				// Chunk 2 Head
+				sample{t: minutes(32), v: float64(2)},
+				sample{t: minutes(50), v: float64(2)},
+			},
+			expChunkError: false,
+			// ts (in minutes)         0       10       20       30       40       50       60       70       80       90       100
+			// Query Interval                     [------------------]
+			// Chunk 0                          [-----------------]
+			// Chunk 1                                   [--------------------]
+			// Chunk 2 Current Head                                  [--------------]
+			// Output Graphically               [-----------------------------------]
+			expChunksSamples: []tsdbutil.SampleSlice{
+				{
+					sample{t: minutes(10), v: float64(0)},
+					sample{t: minutes(15), v: float64(0)},
+					sample{t: minutes(20), v: float64(1)},
+					sample{t: minutes(25), v: float64(1)},
+					sample{t: minutes(30), v: float64(1)},
+					sample{t: minutes(32), v: float64(2)},
+					sample{t: minutes(35), v: float64(1)},
+					sample{t: minutes(42), v: float64(1)},
+					sample{t: minutes(50), v: float64(2)},
+				},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
+			db := newTestDBWithOpts(t, opts)
+
+			app := db.Appender(context.Background())
+			s1Ref := appendSample(app, s1, tc.firstInOrderSampleAt, float64(tc.firstInOrderSampleAt/1*time.Minute.Milliseconds()))
+			require.NoError(t, app.Commit())
+
+			// OOO few samples for s1.
+			app = db.Appender(context.Background())
+			for _, s := range tc.inputSamples {
+				appendSample(app, s1, s.T(), s.V())
+			}
+			require.NoError(t, app.Commit())
+
+			// The Series method is the one that populates the chunk meta OOO
+			// markers like OOOLastRef. These are then used by the ChunkReader.
+			ir := NewOOOHeadIndexReader(db.head, tc.queryMinT, tc.queryMaxT)
+			var chks []chunks.Meta
+			var respLset labels.Labels
+			err := ir.Series(s1Ref, &respLset, &chks)
+			require.NoError(t, err)
+			require.Equal(t, len(tc.expChunksSamples), len(chks))
+
+			cr := NewOOOHeadChunkReader(db.head, tc.queryMinT, tc.queryMaxT)
+			for i := 0; i < len(chks); i++ {
+				c, err := cr.Chunk(chks[i])
+				require.NoError(t, err)
+
+				var resultSamples tsdbutil.SampleSlice
+				it := c.Iterator(nil)
+				for it.Next() {
+					t, v := it.At()
+					resultSamples = append(resultSamples, sample{t: t, v: v})
+				}
+				require.Equal(t, tc.expChunksSamples[i], resultSamples)
+			}
+		})
+	}
+}
+
+// TestOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding tests
+// that if a query comes and performs a Series() call followed by a Chunks() call
+// the response is consistent with the data seen by Series() even if the OOO
+// head receives more samples before Chunks() is called.
+// An example:
+// - Response A comes from: Series() then Chunk()
+// - Response B comes from : Series(), in parallel new samples added to the head, then Chunk()
+// - A == B
+func TestOOOHeadChunkReader_Chunk_ConsistentQueryResponseDespiteOfHeadExpanding(t *testing.T) {
+	opts := DefaultOptions()
+	opts.OutOfOrderCapMax = 5
+	opts.OutOfOrderTimeWindow = 120 * time.Minute.Milliseconds()
+
+	s1 := labels.FromStrings("l", "v1")
+	minutes := func(m int64) int64 { return m * time.Minute.Milliseconds() }
+
+	appendSample := func(app storage.Appender, l labels.Labels, timestamp int64, value float64) storage.SeriesRef {
+		ref, err := app.Append(0, l, timestamp, value)
+		require.NoError(t, err)
+		return ref
+	}
+
+	tests := []struct {
+		name                   string
+		queryMinT              int64
+		queryMaxT              int64
+		firstInOrderSampleAt   int64
+		initialSamples         tsdbutil.SampleSlice
+		samplesAfterSeriesCall tsdbutil.SampleSlice
+		expChunkError          bool
+		expChunksSamples       []tsdbutil.SampleSlice
+	}{
+		{
+			name:                 "Current head gets old, new and in between sample after Series call, they all should be omitted from the result",
+			queryMinT:            minutes(0),
+			queryMaxT:            minutes(100),
+			firstInOrderSampleAt: minutes(120),
+			initialSamples: tsdbutil.SampleSlice{
+				// Chunk 0
+				sample{t: minutes(20), v: float64(0)},
+				sample{t: minutes(22), v: float64(0)},
+				sample{t: minutes(24), v: float64(0)},
+				sample{t: minutes(26), v: float64(0)},
+				sample{t: minutes(30), v: float64(0)},
+				// Chunk 1 Head
+				sample{t: minutes(25), v: float64(1)},
+				sample{t: minutes(35), v: float64(1)},
+			},
+			samplesAfterSeriesCall: tsdbutil.SampleSlice{
+				sample{t: minutes(10), v: float64(1)},
+				sample{t: minutes(32), v: float64(1)},
+				sample{t: minutes(50), v: float64(1)},
+			},
+			expChunkError: false,
+			// ts (in minutes)         0       10       20       30       40       50       60       70       80       90       100
+			// Query Interval          [-----------------------------------]
+			// Chunk 0:                                  [--------] (5 samples)
+			// Chunk 1: Current Head                          [-------] (2 samples)
+			// New samples added after Series()
+			// Chunk 1: Current Head            [-----------------------------------] (5 samples)
+			// Output Graphically                        [------------] (With 8 samples, samples newer than lastmint or older than lastmaxt are omitted but the ones in between are kept)
+			expChunksSamples: []tsdbutil.SampleSlice{
+				{
+					sample{t: minutes(20), v: float64(0)},
+					sample{t: minutes(22), v: float64(0)},
+					sample{t: minutes(24), v: float64(0)},
+					sample{t: minutes(25), v: float64(1)},
+					sample{t: minutes(26), v: float64(0)},
+					sample{t: minutes(30), v: float64(0)},
+					sample{t: minutes(32), v: float64(1)}, // This sample was added after Series() but before Chunk() and its in between the lastmint and maxt so it should be kept
+					sample{t: minutes(35), v: float64(1)},
+				},
+			},
+		},
+		{
+			name:                 "After Series() previous head gets mmapped after getting samples, new head gets new samples also overlapping, none of these should appear in the response.",
+			queryMinT:            minutes(0),
+			queryMaxT:            minutes(100),
+			firstInOrderSampleAt: minutes(120),
+			initialSamples: tsdbutil.SampleSlice{
+				// Chunk 0
+				sample{t: minutes(20), v: float64(0)},
+				sample{t: minutes(22), v: float64(0)},
+				sample{t: minutes(24), v: float64(0)},
+				sample{t: minutes(26), v: float64(0)},
+				sample{t: minutes(30), v: float64(0)},
+				// Chunk 1 Head
+				sample{t: minutes(25), v: float64(1)},
+				sample{t: minutes(35), v: float64(1)},
+			},
+			samplesAfterSeriesCall: tsdbutil.SampleSlice{
+				sample{t: minutes(10), v: float64(1)},
+				sample{t: minutes(32), v: float64(1)},
+				sample{t: minutes(50), v: float64(1)},
+				// Chunk 1 gets mmapped and Chunk 2, the new head is born
+				sample{t: minutes(25), v: float64(2)},
+				sample{t: minutes(31), v: float64(2)},
+			},
+			expChunkError: false,
+			// ts (in minutes)         0       10       20       30       40       50       60       70       80       90       100
+			// Query Interval          [-----------------------------------]
+			// Chunk 0:                                  [--------] (5 samples)
+			// Chunk 1: Current Head                          [-------] (2 samples)
+			// New samples added after Series()
+			// Chunk 1 (mmapped)                     [-------------------------] (5 samples)
+			// Chunk 2: Current Head                    [-----------] (2 samples)
+			// Output Graphically                        [------------]  (8 samples) It has 5 from Chunk 0 and 3 from Chunk 1
+			expChunksSamples: []tsdbutil.SampleSlice{
+				{
+					sample{t: minutes(20), v: float64(0)},
+					sample{t: minutes(22), v: float64(0)},
+					sample{t: minutes(24), v: float64(0)},
+					sample{t: minutes(25), v: float64(1)},
+					sample{t: minutes(26), v: float64(0)},
+					sample{t: minutes(30), v: float64(0)},
+					sample{t: minutes(32), v: float64(1)}, // This sample was added after Series() but before Chunk() and its in between the lastmint and maxt so it should be kept
+					sample{t: minutes(35), v: float64(1)},
+				},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
+			db := newTestDBWithOpts(t, opts)
+
+			app := db.Appender(context.Background())
+			s1Ref := appendSample(app, s1, tc.firstInOrderSampleAt, float64(tc.firstInOrderSampleAt/1*time.Minute.Milliseconds()))
+			require.NoError(t, app.Commit())
+
+			// OOO few samples for s1.
+			app = db.Appender(context.Background())
+			for _, s := range tc.initialSamples {
+				appendSample(app, s1, s.T(), s.V())
+			}
+			require.NoError(t, app.Commit())
+
+			// The Series method is the one that populates the chunk meta OOO
+			// markers like OOOLastRef. These are then used by the ChunkReader.
+			ir := NewOOOHeadIndexReader(db.head, tc.queryMinT, tc.queryMaxT)
+			var chks []chunks.Meta
+			var respLset labels.Labels
+			err := ir.Series(s1Ref, &respLset, &chks)
+			require.NoError(t, err)
+			require.Equal(t, len(tc.expChunksSamples), len(chks))
+
+			// Now we keep receiving ooo samples
+			// OOO few samples for s1.
+			app = db.Appender(context.Background())
+			for _, s := range tc.samplesAfterSeriesCall {
+				appendSample(app, s1, s.T(), s.V())
+			}
+			require.NoError(t, app.Commit())
+
+			cr := NewOOOHeadChunkReader(db.head, tc.queryMinT, tc.queryMaxT)
+			for i := 0; i < len(chks); i++ {
+				c, err := cr.Chunk(chks[i])
+				require.NoError(t, err)
+
+				var resultSamples tsdbutil.SampleSlice
+				it := c.Iterator(nil)
+				for it.Next() {
+					ts, v := it.At()
+					resultSamples = append(resultSamples, sample{t: ts, v: v})
+				}
+				require.Equal(t, tc.expChunksSamples[i], resultSamples)
+			}
+		})
+	}
+}
+
+// TestSortByMinTimeAndMinRef tests that the sort function for chunk metas does sort
+// by chunk meta MinTime and in case of same references by the lower reference.
+func TestSortByMinTimeAndMinRef(t *testing.T) {
+	tests := []struct {
+		name  string
+		input []chunkMetaAndChunkDiskMapperRef
+		exp   []chunkMetaAndChunkDiskMapperRef
+	}{
+		{
+			name: "chunks are ordered by min time",
+			input: []chunkMetaAndChunkDiskMapperRef{
+				{
+					meta: chunks.Meta{
+						Ref:     0,
+						MinTime: 0,
+					},
+					ref: chunks.ChunkDiskMapperRef(0),
+				},
+				{
+					meta: chunks.Meta{
+						Ref:     1,
+						MinTime: 1,
+					},
+					ref: chunks.ChunkDiskMapperRef(1),
+				},
+			},
+			exp: []chunkMetaAndChunkDiskMapperRef{
+				{
+					meta: chunks.Meta{
+						Ref:     0,
+						MinTime: 0,
+					},
+					ref: chunks.ChunkDiskMapperRef(0),
+				},
+				{
+					meta: chunks.Meta{
+						Ref:     1,
+						MinTime: 1,
+					},
+					ref: chunks.ChunkDiskMapperRef(1),
+				},
+			},
+		},
+		{
+			name: "if same mintime, lower reference goes first",
+			input: []chunkMetaAndChunkDiskMapperRef{
+				{
+					meta: chunks.Meta{
+						Ref:     10,
+						MinTime: 0,
+					},
+					ref: chunks.ChunkDiskMapperRef(0),
+				},
+				{
+					meta: chunks.Meta{
+						Ref:     5,
+						MinTime: 0,
+					},
+					ref: chunks.ChunkDiskMapperRef(1),
+				},
+			},
+			exp: []chunkMetaAndChunkDiskMapperRef{
+				{
+					meta: chunks.Meta{
+						Ref:     5,
+						MinTime: 0,
+					},
+					ref: chunks.ChunkDiskMapperRef(1),
+				},
+				{
+					meta: chunks.Meta{
+						Ref:     10,
+						MinTime: 0,
+					},
+					ref: chunks.ChunkDiskMapperRef(0),
+				},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
+			sort.Sort(byMinTimeAndMinRef(tc.input))
+			require.Equal(t, tc.exp, tc.input)
+		})
+	}
+}
+
+// TestSortMetaByMinTimeAndMinRef tests that the sort function for chunk metas does sort
+// by chunk meta MinTime and in case of same references by the lower reference.
+func TestSortMetaByMinTimeAndMinRef(t *testing.T) {
+	tests := []struct {
+		name       string
+		inputMetas []chunks.Meta
+		expMetas   []chunks.Meta
+	}{
+		{
+			name: "chunks are ordered by min time",
+			inputMetas: []chunks.Meta{
+				{
+					Ref:     0,
+					MinTime: 0,
+				},
+				{
+					Ref:     1,
+					MinTime: 1,
+				},
+			},
+			expMetas: []chunks.Meta{
+				{
+					Ref:     0,
+					MinTime: 0,
+				},
+				{
+					Ref:     1,
+					MinTime: 1,
+				},
+			},
+		},
+		{
+			name: "if same mintime, lower reference goes first",
+			inputMetas: []chunks.Meta{
+				{
+					Ref:     10,
+					MinTime: 0,
+				},
+				{
+					Ref:     5,
+					MinTime: 0,
+				},
+			},
+			expMetas: []chunks.Meta{
+				{
+					Ref:     5,
+					MinTime: 0,
+				},
+				{
+					Ref:     10,
+					MinTime: 0,
+				},
+			},
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(fmt.Sprintf("name=%s", tc.name), func(t *testing.T) {
+			sort.Sort(metaByMinTimeAndMinRef(tc.inputMetas))
+			require.Equal(t, tc.expMetas, tc.inputMetas)
+		})
+	}
+}
+
+func newTestDBWithOpts(t *testing.T, opts *Options) *DB {
+	dir := t.TempDir()
+
+	db, err := Open(dir, nil, nil, opts, nil)
+	require.NoError(t, err)
+
+	t.Cleanup(func() {
+		require.NoError(t, db.Close())
+	})
+
+	return db
+}
diff --git a/tsdb/ooo_head_test.go b/tsdb/ooo_head_test.go
new file mode 100644
index 0000000000..de078b94c4
--- /dev/null
+++ b/tsdb/ooo_head_test.go
@@ -0,0 +1,93 @@
+// Copyright 2022 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tsdb
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+const testMaxSize int = 32
+
+// Formulas chosen to make testing easy:
+func valEven(pos int) int { return pos*2 + 2 } // s[0]=2, s[1]=4, s[2]=6, ..., s[31]=64 - Predictable pre-existing values
+func valOdd(pos int) int  { return pos*2 + 1 } // s[0]=1, s[1]=3, s[2]=5, ..., s[31]=63 - New values will interject at chosen position because they sort before the pre-existing vals.
+
+func samplify(v int) sample { return sample{int64(v), float64(v)} }
+
+func makeEvenSampleSlice(n int) []sample {
+	s := make([]sample, n)
+	for i := 0; i < n; i++ {
+		s[i] = samplify(valEven(i))
+	}
+	return s
+}
+
+// TestOOOInsert tests the following cases:
+// - Number of pre-existing samples anywhere from 0 to testMaxSize-1.
+// - Insert new sample before first pre-existing samples, after the last, and anywhere in between.
+// - With a chunk initial capacity of testMaxSize/8 and testMaxSize, which lets us test non-full and full chunks, and chunks that need to expand themselves.
+// Note: In all samples used, t always equals v in numeric value. when we talk about 'value' we just refer to a value that will be used for both sample.t and sample.v.
+func TestOOOInsert(t *testing.T) {
+	for numPreExisting := 0; numPreExisting <= testMaxSize; numPreExisting++ {
+		// For example, if we have numPreExisting 2, then:
+		// chunk.samples indexes filled        0   1
+		// chunk.samples with these values     2   4     // valEven
+		// we want to test inserting at index  0   1   2 // insertPos=0..numPreExisting
+		// we can do this by using values      1,  3   5 // valOdd(insertPos)
+
+		for insertPos := 0; insertPos <= numPreExisting; insertPos++ {
+			chunk := NewOOOChunk()
+			chunk.samples = makeEvenSampleSlice(numPreExisting)
+			newSample := samplify(valOdd(insertPos))
+			chunk.Insert(newSample.t, newSample.v)
+
+			var expSamples []sample
+			// Our expected new samples slice, will be first the original samples.
+			for i := 0; i < insertPos; i++ {
+				expSamples = append(expSamples, samplify(valEven(i)))
+			}
+			// Then the new sample.
+			expSamples = append(expSamples, newSample)
+			// Followed by any original samples that were pushed back by the new one.
+			for i := insertPos; i < numPreExisting; i++ {
+				expSamples = append(expSamples, samplify(valEven(i)))
+			}
+
+			require.Equal(t, expSamples, chunk.samples, "numPreExisting %d, insertPos %d", numPreExisting, insertPos)
+		}
+	}
+}
+
+// TestOOOInsertDuplicate tests the correct behavior when inserting a sample that is a duplicate of any
+// pre-existing samples, with between 1 and testMaxSize pre-existing samples and
+// with a chunk initial capacity of testMaxSize/8 and testMaxSize, which lets us test non-full and full chunks, and chunks that need to expand themselves.
+func TestOOOInsertDuplicate(t *testing.T) {
+	for num := 1; num <= testMaxSize; num++ {
+		for dupPos := 0; dupPos < num; dupPos++ {
+			chunk := NewOOOChunk()
+			chunk.samples = makeEvenSampleSlice(num)
+
+			dupSample := chunk.samples[dupPos]
+			dupSample.v = 0.123
+
+			ok := chunk.Insert(dupSample.t, dupSample.v)
+
+			expSamples := makeEvenSampleSlice(num) // We expect no change.
+			require.False(t, ok)
+			require.Equal(t, expSamples, chunk.samples, "num %d, dupPos %d", num, dupPos)
+		}
+	}
+}
diff --git a/tsdb/querier.go b/tsdb/querier.go
index 522adb87cd..5141a2c1d2 100644
--- a/tsdb/querier.go
+++ b/tsdb/querier.go
@@ -569,7 +569,7 @@ func (p *populateWithDelGenericSeriesIterator) next() bool {
 	p.i++
 	p.currChkMeta = p.chks[p.i]
 
-	p.currChkMeta.Chunk, p.err = p.chunks.Chunk(p.currChkMeta.Ref)
+	p.currChkMeta.Chunk, p.err = p.chunks.Chunk(p.currChkMeta)
 	if p.err != nil {
 		p.err = errors.Wrapf(p.err, "cannot populate chunk %d", p.currChkMeta.Ref)
 		return false
@@ -898,7 +898,7 @@ func newNopChunkReader() ChunkReader {
 	}
 }
 
-func (cr nopChunkReader) Chunk(ref chunks.ChunkRef) (chunkenc.Chunk, error) {
+func (cr nopChunkReader) Chunk(meta chunks.Meta) (chunkenc.Chunk, error) {
 	return cr.emptyChunk, nil
 }
 
diff --git a/tsdb/querier_bench_test.go b/tsdb/querier_bench_test.go
index 0bd295fbe7..3cf4e19346 100644
--- a/tsdb/querier_bench_test.go
+++ b/tsdb/querier_bench_test.go
@@ -34,7 +34,7 @@ func BenchmarkQuerier(b *testing.B) {
 	opts := DefaultHeadOptions()
 	opts.ChunkRange = 1000
 	opts.ChunkDirRoot = chunkDir
-	h, err := NewHead(nil, nil, nil, opts, nil)
+	h, err := NewHead(nil, nil, nil, nil, opts, nil)
 	require.NoError(b, err)
 	defer func() {
 		require.NoError(b, h.Close())
@@ -180,7 +180,7 @@ func BenchmarkQuerierSelect(b *testing.B) {
 	opts := DefaultHeadOptions()
 	opts.ChunkRange = 1000
 	opts.ChunkDirRoot = chunkDir
-	h, err := NewHead(nil, nil, nil, opts, nil)
+	h, err := NewHead(nil, nil, nil, nil, opts, nil)
 	require.NoError(b, err)
 	defer h.Close()
 	app := h.Appender(context.Background())
diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go
index 7d3cf2dc32..c0ba864510 100644
--- a/tsdb/querier_test.go
+++ b/tsdb/querier_test.go
@@ -458,7 +458,7 @@ func TestBlockQuerier_AgainstHeadWithOpenChunks(t *testing.T) {
 		t.Run("", func(t *testing.T) {
 			opts := DefaultHeadOptions()
 			opts.ChunkRange = 2 * time.Hour.Milliseconds()
-			h, err := NewHead(nil, nil, nil, opts, nil)
+			h, err := NewHead(nil, nil, nil, nil, opts, nil)
 			require.NoError(t, err)
 			defer h.Close()
 
@@ -627,10 +627,10 @@ func createFakeReaderAndNotPopulatedChunks(s ...[]tsdbutil.Sample) (*fakeChunksR
 	return f, chks
 }
 
-func (r *fakeChunksReader) Chunk(ref chunks.ChunkRef) (chunkenc.Chunk, error) {
-	chk, ok := r.chks[ref]
+func (r *fakeChunksReader) Chunk(meta chunks.Meta) (chunkenc.Chunk, error) {
+	chk, ok := r.chks[meta.Ref]
 	if !ok {
-		return nil, errors.Errorf("chunk not found at ref %v", ref)
+		return nil, errors.Errorf("chunk not found at ref %v", meta.Ref)
 	}
 	return chk, nil
 }
@@ -1016,8 +1016,8 @@ func BenchmarkMergedSeriesSet(b *testing.B) {
 
 type mockChunkReader map[chunks.ChunkRef]chunkenc.Chunk
 
-func (cr mockChunkReader) Chunk(id chunks.ChunkRef) (chunkenc.Chunk, error) {
-	chk, ok := cr[id]
+func (cr mockChunkReader) Chunk(meta chunks.Meta) (chunkenc.Chunk, error) {
+	chk, ok := cr[meta.Ref]
 	if ok {
 		return chk, nil
 	}
@@ -1643,7 +1643,7 @@ func TestPostingsForMatchers(t *testing.T) {
 	opts := DefaultHeadOptions()
 	opts.ChunkRange = 1000
 	opts.ChunkDirRoot = chunkDir
-	h, err := NewHead(nil, nil, nil, opts, nil)
+	h, err := NewHead(nil, nil, nil, nil, opts, nil)
 	require.NoError(t, err)
 	defer func() {
 		require.NoError(t, h.Close())
@@ -1944,13 +1944,17 @@ func BenchmarkQueries(b *testing.B) {
 		},
 	}
 
-	queryTypes := make(map[string]storage.Querier)
+	type qt struct {
+		typ     string
+		querier storage.Querier
+	}
+	var queryTypes []qt // We use a slice instead of map to keep the order of test cases consistent.
 	defer func() {
 		for _, q := range queryTypes {
 			// Can't run a check for error here as some of these will fail as
 			// queryTypes is using the same slice for the different block queriers
 			// and would have been closed in the previous iteration.
-			q.Close()
+			q.querier.Close()
 		}
 	}()
 
@@ -1991,21 +1995,38 @@ func BenchmarkQueries(b *testing.B) {
 					qs = append(qs, q)
 				}
 
-				queryTypes["_1-Block"] = storage.NewMergeQuerier(qs[:1], nil, storage.ChainedSeriesMerge)
-				queryTypes["_3-Blocks"] = storage.NewMergeQuerier(qs[0:3], nil, storage.ChainedSeriesMerge)
-				queryTypes["_10-Blocks"] = storage.NewMergeQuerier(qs, nil, storage.ChainedSeriesMerge)
+				queryTypes = append(queryTypes, qt{"_1-Block", storage.NewMergeQuerier(qs[:1], nil, storage.ChainedSeriesMerge)})
+				queryTypes = append(queryTypes, qt{"_3-Blocks", storage.NewMergeQuerier(qs[0:3], nil, storage.ChainedSeriesMerge)})
+				queryTypes = append(queryTypes, qt{"_10-Blocks", storage.NewMergeQuerier(qs, nil, storage.ChainedSeriesMerge)})
 
 				chunkDir := b.TempDir()
 				head := createHead(b, nil, series, chunkDir)
-				qHead, err := NewBlockQuerier(head, 1, nSamples)
+				qHead, err := NewBlockQuerier(NewRangeHead(head, 1, nSamples), 1, nSamples)
 				require.NoError(b, err)
-				queryTypes["_Head"] = qHead
+				queryTypes = append(queryTypes, qt{"_Head", qHead})
 
-				for qtype, querier := range queryTypes {
-					b.Run(title+qtype+"_nSeries:"+strconv.Itoa(nSeries)+"_nSamples:"+strconv.Itoa(int(nSamples)), func(b *testing.B) {
+				for _, oooPercentage := range []int{1, 3, 5, 10} {
+					chunkDir := b.TempDir()
+					totalOOOSamples := oooPercentage * int(nSamples) / 100
+					oooSampleFrequency := int(nSamples) / totalOOOSamples
+					head := createHeadWithOOOSamples(b, nil, series, chunkDir, oooSampleFrequency)
+
+					qHead, err := NewBlockQuerier(NewRangeHead(head, 1, nSamples), 1, nSamples)
+					require.NoError(b, err)
+					qOOOHead, err := NewBlockQuerier(NewOOORangeHead(head, 1, nSamples), 1, nSamples)
+					require.NoError(b, err)
+
+					queryTypes = append(queryTypes, qt{
+						fmt.Sprintf("_Head_oooPercent:%d", oooPercentage),
+						storage.NewMergeQuerier([]storage.Querier{qHead, qOOOHead}, nil, storage.ChainedSeriesMerge),
+					})
+				}
+
+				for _, q := range queryTypes {
+					b.Run(title+q.typ+"_nSeries:"+strconv.Itoa(nSeries)+"_nSamples:"+strconv.Itoa(int(nSamples)), func(b *testing.B) {
 						expExpansions, err := strconv.Atoi(string(title[len(title)-1]))
 						require.NoError(b, err)
-						benchQuery(b, expExpansions, querier, selectors)
+						benchQuery(b, expExpansions, q.querier, selectors)
 					})
 				}
 				require.NoError(b, head.Close())
@@ -2025,6 +2046,7 @@ func benchQuery(b *testing.B, expExpansions int, q storage.Querier, selectors la
 			s.Labels()
 			it := s.Iterator()
 			for it.Next() {
+				_, _ = it.At()
 			}
 			actualExpansions++
 		}
diff --git a/tsdb/record/record.go b/tsdb/record/record.go
index ee7169a457..162414a3ce 100644
--- a/tsdb/record/record.go
+++ b/tsdb/record/record.go
@@ -43,6 +43,8 @@ const (
 	Tombstones Type = 3
 	// Exemplars is used to match WAL records of type Exemplars.
 	Exemplars Type = 4
+	// MmapMarkers is used to match OOO WBL records of type MmapMarkers.
+	MmapMarkers Type = 5
 	// Metadata is used to match WAL records of type Metadata.
 	Metadata Type = 6
 )
@@ -57,6 +59,8 @@ func (rt Type) String() string {
 		return "exemplars"
 	case Tombstones:
 		return "tombstones"
+	case MmapMarkers:
+		return "mmapmarkers"
 	case Metadata:
 		return "metadata"
 	default:
@@ -157,6 +161,12 @@ type RefExemplar struct {
 	Labels labels.Labels
 }
 
+// RefMmapMarker marks that the all the samples of the given series until now have been m-mapped to disk.
+type RefMmapMarker struct {
+	Ref     chunks.HeadSeriesRef
+	MmapRef chunks.ChunkDiskMapperRef
+}
+
 // Decoder decodes series, sample, metadata and tombstone records.
 // The zero value is ready to use.
 type Decoder struct{}
@@ -168,7 +178,7 @@ func (d *Decoder) Type(rec []byte) Type {
 		return Unknown
 	}
 	switch t := Type(rec[0]); t {
-	case Series, Samples, Tombstones, Exemplars, Metadata:
+	case Series, Samples, Tombstones, Exemplars, MmapMarkers, Metadata:
 		return t
 	}
 	return Unknown
@@ -354,6 +364,34 @@ func (d *Decoder) ExemplarsFromBuffer(dec *encoding.Decbuf, exemplars []RefExemp
 	return exemplars, nil
 }
 
+func (d *Decoder) MmapMarkers(rec []byte, markers []RefMmapMarker) ([]RefMmapMarker, error) {
+	dec := encoding.Decbuf{B: rec}
+	t := Type(dec.Byte())
+	if t != MmapMarkers {
+		return nil, errors.New("invalid record type")
+	}
+
+	if dec.Len() == 0 {
+		return markers, nil
+	}
+	for len(dec.B) > 0 && dec.Err() == nil {
+		ref := chunks.HeadSeriesRef(dec.Be64())
+		mmapRef := chunks.ChunkDiskMapperRef(dec.Be64())
+		markers = append(markers, RefMmapMarker{
+			Ref:     ref,
+			MmapRef: mmapRef,
+		})
+	}
+
+	if dec.Err() != nil {
+		return nil, errors.Wrapf(dec.Err(), "decode error after %d mmap markers", len(markers))
+	}
+	if len(dec.B) > 0 {
+		return nil, errors.Errorf("unexpected %d bytes left in entry", len(dec.B))
+	}
+	return markers, nil
+}
+
 // Encoder encodes series, sample, and tombstones records.
 // The zero value is ready to use.
 type Encoder struct{}
@@ -467,3 +505,15 @@ func (e *Encoder) EncodeExemplarsIntoBuffer(exemplars []RefExemplar, buf *encodi
 		EncodeLabels(buf, ex.Labels)
 	}
 }
+
+func (e *Encoder) MmapMarkers(markers []RefMmapMarker, b []byte) []byte {
+	buf := encoding.Encbuf{B: b}
+	buf.PutByte(byte(MmapMarkers))
+
+	for _, s := range markers {
+		buf.PutBE64(uint64(s.Ref))
+		buf.PutBE64(uint64(s.MmapRef))
+	}
+
+	return buf.Get()
+}
diff --git a/tsdb/wal/wal.go b/tsdb/wal/wal.go
index ace6a99566..191b09ed99 100644
--- a/tsdb/wal/wal.go
+++ b/tsdb/wal/wal.go
@@ -40,6 +40,7 @@ const (
 	DefaultSegmentSize = 128 * 1024 * 1024 // 128 MB
 	pageSize           = 32 * 1024         // 32KB
 	recordHeaderSize   = 7
+	WblDirName         = "wbl"
 )
 
 // The table gets initialized with sync.Once but may still cause a race
@@ -204,32 +205,32 @@ func newWALMetrics(r prometheus.Registerer) *walMetrics {
 	m := &walMetrics{}
 
 	m.fsyncDuration = prometheus.NewSummary(prometheus.SummaryOpts{
-		Name:       "prometheus_tsdb_wal_fsync_duration_seconds",
+		Name:       "fsync_duration_seconds",
 		Help:       "Duration of WAL fsync.",
 		Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
 	})
 	m.pageFlushes = prometheus.NewCounter(prometheus.CounterOpts{
-		Name: "prometheus_tsdb_wal_page_flushes_total",
+		Name: "page_flushes_total",
 		Help: "Total number of page flushes.",
 	})
 	m.pageCompletions = prometheus.NewCounter(prometheus.CounterOpts{
-		Name: "prometheus_tsdb_wal_completed_pages_total",
+		Name: "completed_pages_total",
 		Help: "Total number of completed pages.",
 	})
 	m.truncateFail = prometheus.NewCounter(prometheus.CounterOpts{
-		Name: "prometheus_tsdb_wal_truncations_failed_total",
+		Name: "truncations_failed_total",
 		Help: "Total number of WAL truncations that failed.",
 	})
 	m.truncateTotal = prometheus.NewCounter(prometheus.CounterOpts{
-		Name: "prometheus_tsdb_wal_truncations_total",
+		Name: "truncations_total",
 		Help: "Total number of WAL truncations attempted.",
 	})
 	m.currentSegment = prometheus.NewGauge(prometheus.GaugeOpts{
-		Name: "prometheus_tsdb_wal_segment_current",
+		Name: "segment_current",
 		Help: "WAL segment index that TSDB is currently writing to.",
 	})
 	m.writesFailed = prometheus.NewCounter(prometheus.CounterOpts{
-		Name: "prometheus_tsdb_wal_writes_failed_total",
+		Name: "writes_failed_total",
 		Help: "Total number of WAL writes that failed.",
 	})
 
@@ -274,7 +275,11 @@ func NewSize(logger log.Logger, reg prometheus.Registerer, dir string, segmentSi
 		stopc:       make(chan chan struct{}),
 		compress:    compress,
 	}
-	w.metrics = newWALMetrics(reg)
+	prefix := "prometheus_tsdb_wal_"
+	if filepath.Base(dir) == WblDirName {
+		prefix = "prometheus_tsdb_out_of_order_wal_"
+	}
+	w.metrics = newWALMetrics(prometheus.WrapRegistererWithPrefix(prefix, reg))
 
 	_, last, err := Segments(w.Dir())
 	if err != nil {
@@ -459,36 +464,46 @@ func SegmentName(dir string, i int) string {
 	return filepath.Join(dir, fmt.Sprintf("%08d", i))
 }
 
-// NextSegment creates the next segment and closes the previous one.
-func (w *WAL) NextSegment() error {
+// NextSegment creates the next segment and closes the previous one asynchronously.
+// It returns the file number of the new file.
+func (w *WAL) NextSegment() (int, error) {
 	w.mtx.Lock()
 	defer w.mtx.Unlock()
-	return w.nextSegment()
+	return w.nextSegment(true)
+}
+
+// NextSegmentSync creates the next segment and closes the previous one in sync.
+// It returns the file number of the new file.
+func (w *WAL) NextSegmentSync() (int, error) {
+	w.mtx.Lock()
+	defer w.mtx.Unlock()
+	return w.nextSegment(false)
 }
 
 // nextSegment creates the next segment and closes the previous one.
-func (w *WAL) nextSegment() error {
+// It returns the file number of the new file.
+func (w *WAL) nextSegment(async bool) (int, error) {
 	if w.closed {
-		return errors.New("wal is closed")
+		return 0, errors.New("wal is closed")
 	}
 
 	// Only flush the current page if it actually holds data.
 	if w.page.alloc > 0 {
 		if err := w.flushPage(true); err != nil {
-			return err
+			return 0, err
 		}
 	}
 	next, err := CreateSegment(w.Dir(), w.segment.Index()+1)
 	if err != nil {
-		return errors.Wrap(err, "create new segment file")
+		return 0, errors.Wrap(err, "create new segment file")
 	}
 	prev := w.segment
 	if err := w.setSegment(next); err != nil {
-		return err
+		return 0, err
 	}
 
 	// Don't block further writes by fsyncing the last segment.
-	w.actorc <- func() {
+	f := func() {
 		if err := w.fsync(prev); err != nil {
 			level.Error(w.logger).Log("msg", "sync previous segment", "err", err)
 		}
@@ -496,7 +511,12 @@ func (w *WAL) nextSegment() error {
 			level.Error(w.logger).Log("msg", "close previous segment", "err", err)
 		}
 	}
-	return nil
+	if async {
+		w.actorc <- f
+	} else {
+		f()
+	}
+	return next.Index(), nil
 }
 
 func (w *WAL) setSegment(segment *Segment) error {
@@ -638,7 +658,7 @@ func (w *WAL) log(rec []byte, final bool) error {
 	left += (pageSize - recordHeaderSize) * (w.pagesPerSegment() - w.donePages - 1) // Free pages in the active segment.
 
 	if len(rec) > left {
-		if err := w.nextSegment(); err != nil {
+		if _, err := w.nextSegment(true); err != nil {
 			return err
 		}
 	}
@@ -745,6 +765,13 @@ func (w *WAL) fsync(f *Segment) error {
 	return err
 }
 
+// Sync forces a file sync on the current wal segment. This function is meant
+// to be used only on tests due to different behaviour on Operating Systems
+// like windows and linux
+func (w *WAL) Sync() error {
+	return w.fsync(w.segment)
+}
+
 // Close flushes all writes and closes active segment.
 func (w *WAL) Close() (err error) {
 	w.mtx.Lock()
diff --git a/tsdb/wal/watcher_test.go b/tsdb/wal/watcher_test.go
index 0892d972c3..b89f8bead9 100644
--- a/tsdb/wal/watcher_test.go
+++ b/tsdb/wal/watcher_test.go
@@ -364,14 +364,16 @@ func TestReadCheckpoint(t *testing.T) {
 			err := os.Mkdir(wdir, 0o777)
 			require.NoError(t, err)
 
-			os.Create(SegmentName(wdir, 30))
+			f, err := os.Create(SegmentName(wdir, 30))
+			require.NoError(t, err)
+			require.NoError(t, f.Close())
 
 			enc := record.Encoder{}
 			w, err := NewSize(nil, nil, wdir, 128*pageSize, compress)
 			require.NoError(t, err)
-			defer func() {
+			t.Cleanup(func() {
 				require.NoError(t, w.Close())
-			}()
+			})
 
 			// Write to the initial segment then checkpoint.
 			for i := 0; i < seriesCount; i++ {
@@ -396,8 +398,11 @@ func TestReadCheckpoint(t *testing.T) {
 					require.NoError(t, w.Log(sample))
 				}
 			}
-			Checkpoint(log.NewNopLogger(), w, 30, 31, func(x chunks.HeadSeriesRef) bool { return true }, 0)
-			w.Truncate(32)
+			_, err = w.NextSegmentSync()
+			require.NoError(t, err)
+			_, err = Checkpoint(log.NewNopLogger(), w, 30, 31, func(x chunks.HeadSeriesRef) bool { return true }, 0)
+			require.NoError(t, err)
+			require.NoError(t, w.Truncate(32))
 
 			// Start read after checkpoint, no more data written.
 			_, _, err = Segments(w.Dir())
diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go
index a904e47db3..d672807d3f 100644
--- a/web/api/v1/api_test.go
+++ b/web/api/v1/api_test.go
@@ -2314,7 +2314,7 @@ func (f *fakeDB) Stats(statsByLabelName string) (_ *tsdb.Stats, retErr error) {
 	}()
 	opts := tsdb.DefaultHeadOptions()
 	opts.ChunkRange = 1000
-	h, _ := tsdb.NewHead(nil, nil, nil, opts, nil)
+	h, _ := tsdb.NewHead(nil, nil, nil, nil, opts, nil)
 	return h.Stats(statsByLabelName), nil
 }