Merge pull request #2593 from prometheus/beorn7/storage2

storage: Recover from corrupted indices for archived series
This commit is contained in:
Björn Rabenstein 2017-04-07 12:55:35 +02:00 committed by GitHub
commit 934d86b936
2 changed files with 32 additions and 11 deletions

View file

@ -26,11 +26,12 @@ import (
"github.com/prometheus/prometheus/storage/local/codable"
)
// Directory names for LevelDB indices.
const (
fingerprintToMetricDir = "archived_fingerprint_to_metric"
fingerprintTimeRangeDir = "archived_fingerprint_to_timerange"
labelNameToLabelValuesDir = "labelname_to_labelvalues"
labelPairToFingerprintsDir = "labelpair_to_fingerprints"
FingerprintToMetricDir = "archived_fingerprint_to_metric"
FingerprintTimeRangeDir = "archived_fingerprint_to_timerange"
LabelNameToLabelValuesDir = "labelname_to_labelvalues"
LabelPairToFingerprintsDir = "labelpair_to_fingerprints"
)
// LevelDB cache sizes, changeable via flags.
@ -96,7 +97,7 @@ func (i *FingerprintMetricIndex) Lookup(fp model.Fingerprint) (metric model.Metr
// ready to use.
func NewFingerprintMetricIndex(basePath string) (*FingerprintMetricIndex, error) {
fingerprintToMetricDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, fingerprintToMetricDir),
Path: filepath.Join(basePath, FingerprintToMetricDir),
CacheSizeBytes: FingerprintMetricCacheSize,
})
if err != nil {
@ -168,7 +169,7 @@ func (i *LabelNameLabelValuesIndex) LookupSet(l model.LabelName) (values map[mod
// LabelNameLabelValuesIndex ready to use.
func NewLabelNameLabelValuesIndex(basePath string) (*LabelNameLabelValuesIndex, error) {
labelNameToLabelValuesDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, labelNameToLabelValuesDir),
Path: filepath.Join(basePath, LabelNameToLabelValuesDir),
CacheSizeBytes: LabelNameLabelValuesCacheSize,
})
if err != nil {
@ -182,7 +183,7 @@ func NewLabelNameLabelValuesIndex(basePath string) (*LabelNameLabelValuesIndex,
// DeleteLabelNameLabelValuesIndex deletes the LevelDB-backed
// LabelNameLabelValuesIndex. Use only for a not yet opened index.
func DeleteLabelNameLabelValuesIndex(basePath string) error {
return os.RemoveAll(path.Join(basePath, labelNameToLabelValuesDir))
return os.RemoveAll(path.Join(basePath, LabelNameToLabelValuesDir))
}
// LabelPairFingerprintsMapping is an in-memory map of label pairs to
@ -246,7 +247,7 @@ func (i *LabelPairFingerprintIndex) LookupSet(p model.LabelPair) (fps map[model.
// LabelPairFingerprintIndex ready to use.
func NewLabelPairFingerprintIndex(basePath string) (*LabelPairFingerprintIndex, error) {
labelPairToFingerprintsDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, labelPairToFingerprintsDir),
Path: filepath.Join(basePath, LabelPairToFingerprintsDir),
CacheSizeBytes: LabelPairFingerprintsCacheSize,
})
if err != nil {
@ -260,7 +261,7 @@ func NewLabelPairFingerprintIndex(basePath string) (*LabelPairFingerprintIndex,
// DeleteLabelPairFingerprintIndex deletes the LevelDB-backed
// LabelPairFingerprintIndex. Use only for a not yet opened index.
func DeleteLabelPairFingerprintIndex(basePath string) error {
return os.RemoveAll(path.Join(basePath, labelPairToFingerprintsDir))
return os.RemoveAll(path.Join(basePath, LabelPairToFingerprintsDir))
}
// FingerprintTimeRangeIndex models a database tracking the time ranges
@ -284,7 +285,7 @@ func (i *FingerprintTimeRangeIndex) Lookup(fp model.Fingerprint) (firstTime, las
// FingerprintTimeRangeIndex ready to use.
func NewFingerprintTimeRangeIndex(basePath string) (*FingerprintTimeRangeIndex, error) {
fingerprintTimeRangeDB, err := NewLevelDB(LevelDBOptions{
Path: filepath.Join(basePath, fingerprintTimeRangeDir),
Path: filepath.Join(basePath, FingerprintTimeRangeDir),
CacheSizeBytes: FingerprintTimeRangeCacheSize,
})
if err != nil {
@ -294,3 +295,9 @@ func NewFingerprintTimeRangeIndex(basePath string) (*FingerprintTimeRangeIndex,
KeyValueStore: fingerprintTimeRangeDB,
}, nil
}
// DeleteFingerprintTimeRangeIndex deletes the LevelDB-backed
// FingerprintTimeRangeIndex. Use only for a not yet opened index.
func DeleteFingerprintTimeRangeIndex(basePath string) error {
return os.RemoveAll(path.Join(basePath, FingerprintTimeRangeDir))
}

View file

@ -205,11 +205,25 @@ func newPersistence(
archivedFingerprintToMetrics, err := index.NewFingerprintMetricIndex(basePath)
if err != nil {
// At this point, we could simply blow away the archived
// fingerprint-to-metric index. However, then we would lose
// _all_ archived metrics. So better give the user an
// opportunity to repair the LevelDB with a 3rd party tool.
log.Errorf("Could not open the fingerprint-to-metric index for archived series. Please try a 3rd party tool to repair LevelDB in directory %q. If unsuccessful or undesired, delete the whole directory and restart Prometheus for crash recovery. You will lose all archived time series.", filepath.Join(basePath, index.FingerprintToMetricDir))
return nil, err
}
archivedFingerprintToTimeRange, err := index.NewFingerprintTimeRangeIndex(basePath)
if err != nil {
return nil, err
// We can recover the archived fingerprint-to-timerange index,
// so blow it away and set ourselves dirty. Then re-open the now
// empty index.
if err := index.DeleteFingerprintTimeRangeIndex(basePath); err != nil {
return nil, err
}
dirty = true
if archivedFingerprintToTimeRange, err = index.NewFingerprintTimeRangeIndex(basePath); err != nil {
return nil, err
}
}
p := &persistence{