storage: Recover from corrupted indices for archived series

An unopenable archived_fingerprint_to_timerange is simply deleted and
will be rebuilt during crash recovery (wich can then take quite some time).

An unopenable archived_fingerprint_to_metric is not deleted but
instructions to the user are logged. A deletion has to be done by the
user explicitly as it means losing all archived series (and a repair
with a 3rd party tool might still be possible).
This commit is contained in:
beorn7 2017-04-06 19:26:18 +02:00
parent 9775ad4754
commit 4fcc73a04c
2 changed files with 32 additions and 11 deletions

View file

@ -26,11 +26,12 @@ import (
"github.com/prometheus/prometheus/storage/local/codable" "github.com/prometheus/prometheus/storage/local/codable"
) )
// Directory names for LevelDB indices.
const ( const (
fingerprintToMetricDir = "archived_fingerprint_to_metric" FingerprintToMetricDir = "archived_fingerprint_to_metric"
fingerprintTimeRangeDir = "archived_fingerprint_to_timerange" FingerprintTimeRangeDir = "archived_fingerprint_to_timerange"
labelNameToLabelValuesDir = "labelname_to_labelvalues" LabelNameToLabelValuesDir = "labelname_to_labelvalues"
labelPairToFingerprintsDir = "labelpair_to_fingerprints" LabelPairToFingerprintsDir = "labelpair_to_fingerprints"
) )
// LevelDB cache sizes, changeable via flags. // LevelDB cache sizes, changeable via flags.
@ -96,7 +97,7 @@ func (i *FingerprintMetricIndex) Lookup(fp model.Fingerprint) (metric model.Metr
// ready to use. // ready to use.
func NewFingerprintMetricIndex(basePath string) (*FingerprintMetricIndex, error) { func NewFingerprintMetricIndex(basePath string) (*FingerprintMetricIndex, error) {
fingerprintToMetricDB, err := NewLevelDB(LevelDBOptions{ fingerprintToMetricDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, fingerprintToMetricDir), Path: filepath.Join(basePath, FingerprintToMetricDir),
CacheSizeBytes: FingerprintMetricCacheSize, CacheSizeBytes: FingerprintMetricCacheSize,
}) })
if err != nil { if err != nil {
@ -168,7 +169,7 @@ func (i *LabelNameLabelValuesIndex) LookupSet(l model.LabelName) (values map[mod
// LabelNameLabelValuesIndex ready to use. // LabelNameLabelValuesIndex ready to use.
func NewLabelNameLabelValuesIndex(basePath string) (*LabelNameLabelValuesIndex, error) { func NewLabelNameLabelValuesIndex(basePath string) (*LabelNameLabelValuesIndex, error) {
labelNameToLabelValuesDB, err := NewLevelDB(LevelDBOptions{ labelNameToLabelValuesDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, labelNameToLabelValuesDir), Path: filepath.Join(basePath, LabelNameToLabelValuesDir),
CacheSizeBytes: LabelNameLabelValuesCacheSize, CacheSizeBytes: LabelNameLabelValuesCacheSize,
}) })
if err != nil { if err != nil {
@ -182,7 +183,7 @@ func NewLabelNameLabelValuesIndex(basePath string) (*LabelNameLabelValuesIndex,
// DeleteLabelNameLabelValuesIndex deletes the LevelDB-backed // DeleteLabelNameLabelValuesIndex deletes the LevelDB-backed
// LabelNameLabelValuesIndex. Use only for a not yet opened index. // LabelNameLabelValuesIndex. Use only for a not yet opened index.
func DeleteLabelNameLabelValuesIndex(basePath string) error { func DeleteLabelNameLabelValuesIndex(basePath string) error {
return os.RemoveAll(path.Join(basePath, labelNameToLabelValuesDir)) return os.RemoveAll(path.Join(basePath, LabelNameToLabelValuesDir))
} }
// LabelPairFingerprintsMapping is an in-memory map of label pairs to // LabelPairFingerprintsMapping is an in-memory map of label pairs to
@ -246,7 +247,7 @@ func (i *LabelPairFingerprintIndex) LookupSet(p model.LabelPair) (fps map[model.
// LabelPairFingerprintIndex ready to use. // LabelPairFingerprintIndex ready to use.
func NewLabelPairFingerprintIndex(basePath string) (*LabelPairFingerprintIndex, error) { func NewLabelPairFingerprintIndex(basePath string) (*LabelPairFingerprintIndex, error) {
labelPairToFingerprintsDB, err := NewLevelDB(LevelDBOptions{ labelPairToFingerprintsDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, labelPairToFingerprintsDir), Path: filepath.Join(basePath, LabelPairToFingerprintsDir),
CacheSizeBytes: LabelPairFingerprintsCacheSize, CacheSizeBytes: LabelPairFingerprintsCacheSize,
}) })
if err != nil { if err != nil {
@ -260,7 +261,7 @@ func NewLabelPairFingerprintIndex(basePath string) (*LabelPairFingerprintIndex,
// DeleteLabelPairFingerprintIndex deletes the LevelDB-backed // DeleteLabelPairFingerprintIndex deletes the LevelDB-backed
// LabelPairFingerprintIndex. Use only for a not yet opened index. // LabelPairFingerprintIndex. Use only for a not yet opened index.
func DeleteLabelPairFingerprintIndex(basePath string) error { func DeleteLabelPairFingerprintIndex(basePath string) error {
return os.RemoveAll(path.Join(basePath, labelPairToFingerprintsDir)) return os.RemoveAll(path.Join(basePath, LabelPairToFingerprintsDir))
} }
// FingerprintTimeRangeIndex models a database tracking the time ranges // FingerprintTimeRangeIndex models a database tracking the time ranges
@ -284,7 +285,7 @@ func (i *FingerprintTimeRangeIndex) Lookup(fp model.Fingerprint) (firstTime, las
// FingerprintTimeRangeIndex ready to use. // FingerprintTimeRangeIndex ready to use.
func NewFingerprintTimeRangeIndex(basePath string) (*FingerprintTimeRangeIndex, error) { func NewFingerprintTimeRangeIndex(basePath string) (*FingerprintTimeRangeIndex, error) {
fingerprintTimeRangeDB, err := NewLevelDB(LevelDBOptions{ fingerprintTimeRangeDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, fingerprintTimeRangeDir), Path: filepath.Join(basePath, FingerprintTimeRangeDir),
CacheSizeBytes: FingerprintTimeRangeCacheSize, CacheSizeBytes: FingerprintTimeRangeCacheSize,
}) })
if err != nil { if err != nil {
@ -294,3 +295,9 @@ func NewFingerprintTimeRangeIndex(basePath string) (*FingerprintTimeRangeIndex,
KeyValueStore: fingerprintTimeRangeDB, KeyValueStore: fingerprintTimeRangeDB,
}, nil }, nil
} }
// DeleteFingerprintTimeRangeIndex deletes the LevelDB-backed
// FingerprintTimeRangeIndex. Use only for a not yet opened index.
func DeleteFingerprintTimeRangeIndex(basePath string) error {
return os.RemoveAll(path.Join(basePath, FingerprintTimeRangeDir))
}

View file

@ -205,11 +205,25 @@ func newPersistence(
archivedFingerprintToMetrics, err := index.NewFingerprintMetricIndex(basePath) archivedFingerprintToMetrics, err := index.NewFingerprintMetricIndex(basePath)
if err != nil { if err != nil {
// At this point, we could simply blow away the archived
// fingerprint-to-metric index. However, then we would lose
// _all_ archived metrics. So better give the user an
// opportunity to repair the LevelDB with a 3rd party tool.
log.Errorf("Could not open the fingerprint-to-metric index for archived series. Please try a 3rd party tool to repair LevelDB in directory %q. If unsuccessful or undesired, delete the whole directory and restart Prometheus for crash recovery. You will lose all archived time series.", filepath.Join(basePath, index.FingerprintToMetricDir))
return nil, err return nil, err
} }
archivedFingerprintToTimeRange, err := index.NewFingerprintTimeRangeIndex(basePath) archivedFingerprintToTimeRange, err := index.NewFingerprintTimeRangeIndex(basePath)
if err != nil { if err != nil {
return nil, err // We can recover the archived fingerprint-to-timerange index,
// so blow it away and set ourselves dirty. Then re-open the now
// empty index.
if err := index.DeleteFingerprintTimeRangeIndex(basePath); err != nil {
return nil, err
}
dirty = true
if archivedFingerprintToTimeRange, err = index.NewFingerprintTimeRangeIndex(basePath); err != nil {
return nil, err
}
} }
p := &persistence{ p := &persistence{