Fix a race in target.go.

Also, fix problems in shutdown.
Starting serving and shutdown still has to be cleaned up properly.
It's a mess.

Change-Id: I51061db12064e434066446e6fceac32741c4f84c
This commit is contained in:
Bjoern Rabenstein 2014-10-09 15:59:47 +02:00
parent fd6600850a
commit 4447708c9f
4 changed files with 32 additions and 22 deletions

View file

@ -86,8 +86,6 @@ func (p *prometheus) interruptHandler() {
glog.Warning("Received SIGINT/SIGTERM; Exiting gracefully...") glog.Warning("Received SIGINT/SIGTERM; Exiting gracefully...")
p.Close() p.Close()
os.Exit(0)
} }
func (p *prometheus) Close() { func (p *prometheus) Close() {
@ -138,7 +136,6 @@ func main() {
if err != nil { if err != nil {
glog.Fatal("Error opening memory series storage: ", err) glog.Fatal("Error opening memory series storage: ", err)
} }
defer memStorage.Close()
registry.MustRegister(memStorage) registry.MustRegister(memStorage)
var remoteTSDBQueue *remote.TSDBQueueManager var remoteTSDBQueue *remote.TSDBQueueManager
@ -220,7 +217,6 @@ func main() {
storage: memStorage, storage: memStorage,
remoteTSDBQueue: remoteTSDBQueue, remoteTSDBQueue: remoteTSDBQueue,
} }
defer prometheus.Close()
webService := &web.WebService{ webService := &web.WebService{
StatusHandler: prometheusStatus, StatusHandler: prometheusStatus,

View file

@ -126,10 +126,12 @@ type Target interface {
GlobalAddress() string GlobalAddress() string
// Return the target's base labels. // Return the target's base labels.
BaseLabels() clientmodel.LabelSet BaseLabels() clientmodel.LabelSet
// Merge a new externally supplied target definition (e.g. with changed base // SetBaseLabelsFrom queues a replacement of the current base labels by
// labels) into an old target definition for the same endpoint. Preserve // the labels of the given target. The method returns immediately after
// remaining information - like health state - from the old target. // queuing. The actual replacement of the base labels happens
Merge(newTarget Target) // asynchronously (but most likely before the next scrape for the target
// begins).
SetBaseLabelsFrom(Target)
// Scrape target at the specified interval. // Scrape target at the specified interval.
RunScraper(extraction.Ingester, time.Duration) RunScraper(extraction.Ingester, time.Duration)
// Stop scraping, synchronous. // Stop scraping, synchronous.
@ -149,6 +151,8 @@ type target struct {
// Channel to signal RunScraper should stop, holds a channel // Channel to signal RunScraper should stop, holds a channel
// to notify once stopped. // to notify once stopped.
stopScraper chan bool stopScraper chan bool
// Channel to queue base labels to be replaced.
newBaseLabels chan clientmodel.LabelSet
address string address string
// What is the deadline for the HTTP or HTTPS against this endpoint. // What is the deadline for the HTTP or HTTPS against this endpoint.
@ -167,6 +171,7 @@ func NewTarget(address string, deadline time.Duration, baseLabels clientmodel.La
baseLabels: baseLabels, baseLabels: baseLabels,
httpClient: utility.NewDeadlineClient(deadline), httpClient: utility.NewDeadlineClient(deadline),
stopScraper: make(chan bool), stopScraper: make(chan bool),
newBaseLabels: make(chan clientmodel.LabelSet, 1),
} }
return target return target
@ -197,6 +202,7 @@ func (t *target) recordScrapeHealth(ingester extraction.Ingester, timestamp clie
}) })
} }
// RunScraper implements Target.
func (t *target) RunScraper(ingester extraction.Ingester, interval time.Duration) { func (t *target) RunScraper(ingester extraction.Ingester, interval time.Duration) {
jitterTimer := time.NewTimer(time.Duration(float64(interval) * rand.Float64())) jitterTimer := time.NewTimer(time.Duration(float64(interval) * rand.Float64()))
select { select {
@ -217,12 +223,15 @@ func (t *target) RunScraper(ingester extraction.Ingester, interval time.Duration
targetIntervalLength.WithLabelValues(interval.String()).Observe(float64(time.Since(t.lastScrape) / time.Second)) targetIntervalLength.WithLabelValues(interval.String()).Observe(float64(time.Since(t.lastScrape) / time.Second))
t.lastScrape = time.Now() t.lastScrape = time.Now()
t.scrape(ingester) t.scrape(ingester)
case newBaseLabels := <-t.newBaseLabels:
t.baseLabels = newBaseLabels
case <-t.stopScraper: case <-t.stopScraper:
return return
} }
} }
} }
// StopScraper implements Target.
func (t *target) StopScraper() { func (t *target) StopScraper() {
t.stopScraper <- true t.stopScraper <- true
} }
@ -270,7 +279,7 @@ func (t *target) scrape(ingester extraction.Ingester) (err error) {
return err return err
} }
// XXX: This is a wart; we need to handle this more gracefully down the // TODO: This is a wart; we need to handle this more gracefully down the
// road, especially once we have service discovery support. // road, especially once we have service discovery support.
baseLabels := clientmodel.LabelSet{InstanceLabel: clientmodel.LabelValue(t.Address())} baseLabels := clientmodel.LabelSet{InstanceLabel: clientmodel.LabelValue(t.Address())}
for baseLabel, baseValue := range t.baseLabels { for baseLabel, baseValue := range t.baseLabels {
@ -289,22 +298,27 @@ func (t *target) scrape(ingester extraction.Ingester) (err error) {
return processor.ProcessSingle(resp.Body, i, processOptions) return processor.ProcessSingle(resp.Body, i, processOptions)
} }
// LastError implements Target.
func (t *target) LastError() error { func (t *target) LastError() error {
return t.lastError return t.lastError
} }
// State implements Target.
func (t *target) State() TargetState { func (t *target) State() TargetState {
return t.state return t.state
} }
// LastScrape implements Target.
func (t *target) LastScrape() time.Time { func (t *target) LastScrape() time.Time {
return t.lastScrape return t.lastScrape
} }
// Address implements Target.
func (t *target) Address() string { func (t *target) Address() string {
return t.address return t.address
} }
// GlobalAddress implements Target.
func (t *target) GlobalAddress() string { func (t *target) GlobalAddress() string {
address := t.address address := t.address
hostname, err := os.Hostname() hostname, err := os.Hostname()
@ -318,18 +332,17 @@ func (t *target) GlobalAddress() string {
return address return address
} }
// BaseLabels implements Target.
func (t *target) BaseLabels() clientmodel.LabelSet { func (t *target) BaseLabels() clientmodel.LabelSet {
return t.baseLabels return t.baseLabels
} }
// Merge a new externally supplied target definition (e.g. with changed base // SetBaseLabelsFrom implements Target.
// labels) into an old target definition for the same endpoint. Preserve func (t *target) SetBaseLabelsFrom(newTarget Target) {
// remaining information - like health state - from the old target.
func (t *target) Merge(newTarget Target) {
if t.Address() != newTarget.Address() { if t.Address() != newTarget.Address() {
panic("targets don't refer to the same endpoint") panic("targets don't refer to the same endpoint")
} }
t.baseLabels = newTarget.BaseLabels() t.newBaseLabels <- newTarget.BaseLabels()
} }
type targets []Target type targets []Target

View file

@ -108,7 +108,7 @@ func (p *TargetPool) ReplaceTargets(newTargets []Target) {
newTargetAddresses.Add(newTarget.Address()) newTargetAddresses.Add(newTarget.Address())
oldTarget, ok := p.targetsByAddress[newTarget.Address()] oldTarget, ok := p.targetsByAddress[newTarget.Address()]
if ok { if ok {
oldTarget.Merge(newTarget) oldTarget.SetBaseLabelsFrom(newTarget)
} else { } else {
p.targetsByAddress[newTarget.Address()] = newTarget p.targetsByAddress[newTarget.Address()] = newTarget
go newTarget.RunScraper(p.ingester, p.interval) go newTarget.RunScraper(p.ingester, p.interval)

View file

@ -249,8 +249,8 @@ func (s *memorySeriesStorage) Close() error {
stopped := make(chan bool) stopped := make(chan bool)
glog.Info("Waiting for storage to stop serving...") glog.Info("Waiting for storage to stop serving...")
s.stopServing <- stopped s.stopServing <- stopped
glog.Info("Serving stopped.")
<-stopped <-stopped
glog.Info("Serving stopped.")
glog.Info("Stopping persist loop...") glog.Info("Stopping persist loop...")
close(s.persistQueue) close(s.persistQueue)
@ -276,6 +276,7 @@ func (s *memorySeriesStorage) purgePeriodically(stop <-chan bool) {
for { for {
select { select {
case <-stop: case <-stop:
glog.Info("Purging loop stopped.")
return return
case <-purgeTicker.C: case <-purgeTicker.C:
glog.Info("Purging old series data...") glog.Info("Purging old series data...")