mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-11 22:07:27 -08:00
1909686789
- Always spell out the time unit (e.g. milliseconds instead of ms). - Remove "_total" from the names of metrics that are not counters. - Make use of the "Namespace" and "Subsystem" fields in the options. - Removed the "capacity" facet from all metrics about channels/queues. These are all fixed via command line flags and will never change during the runtime of a process. Also, they should not be part of the same metric family. I have added separate metrics for the capacity of queues as convenience. (They will never change and are only set once.) - I left "metric_disk_latency_microseconds" unchanged, although that metric measures the latency of the storage device, even if it is not a spinning disk. "SSD" is read by many as "solid state disk", so it's not too far off. (It should be "solid state drive", of course, but "metric_drive_latency_microseconds" is probably confusing.) - Brian suggested to not mix "failure" and "success" outcome in the same metric family (distinguished by labels). For now, I left it as it is. We are touching some bigger issue here, especially as other parts in the Prometheus ecosystem are following the same principle. We still need to come to terms here and then change things consistently everywhere. Change-Id: If799458b450d18f78500f05990301c12525197d3
299 lines
8.4 KiB
Go
299 lines
8.4 KiB
Go
// Copyright 2013 Prometheus Team
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package retrieval
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/golang/glog"
|
|
"github.com/prometheus/client_golang/extraction"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
clientmodel "github.com/prometheus/client_golang/model"
|
|
|
|
"github.com/prometheus/prometheus/utility"
|
|
)
|
|
|
|
const (
|
|
InstanceLabel clientmodel.LabelName = "instance"
|
|
// The metric name for the synthetic health variable.
|
|
ScrapeHealthMetricName clientmodel.LabelValue = "up"
|
|
|
|
// Constants for instrumentation.
|
|
namespace = "prometheus"
|
|
job = "target_job"
|
|
instance = "target_instance"
|
|
failure = "failure"
|
|
outcome = "outcome"
|
|
success = "success"
|
|
)
|
|
|
|
var (
|
|
localhostRepresentations = []string{"http://127.0.0.1", "http://localhost"}
|
|
|
|
targetOperationLatencies = prometheus.NewSummaryVec(
|
|
prometheus.SummaryOpts{
|
|
Namespace: namespace,
|
|
Name: "target_operation_latency_milliseconds",
|
|
Help: "The latencies for target operations.",
|
|
Objectives: []float64{0.01, 0.05, 0.5, 0.90, 0.99},
|
|
},
|
|
[]string{job, instance, outcome},
|
|
)
|
|
)
|
|
|
|
func init() {
|
|
prometheus.MustRegister(targetOperationLatencies)
|
|
}
|
|
|
|
// The state of the given Target.
|
|
type TargetState int
|
|
|
|
func (t TargetState) String() string {
|
|
switch t {
|
|
case UNKNOWN:
|
|
return "UNKNOWN"
|
|
case ALIVE:
|
|
return "ALIVE"
|
|
case UNREACHABLE:
|
|
return "UNREACHABLE"
|
|
}
|
|
|
|
panic("unknown state")
|
|
}
|
|
|
|
const (
|
|
// The Target has not been seen; we know nothing about it, except that it is
|
|
// on our docket for examination.
|
|
UNKNOWN TargetState = iota
|
|
// The Target has been found and successfully queried.
|
|
ALIVE
|
|
// The Target was either historically found or not found and then determined
|
|
// to be unhealthy by either not responding or disappearing.
|
|
UNREACHABLE
|
|
)
|
|
|
|
// A Target represents an endpoint that should be interrogated for metrics.
|
|
//
|
|
// The protocol described by this type will likely change in future iterations,
|
|
// as it offers no good support for aggregated targets and fan out. Thusly,
|
|
// it is likely that the current Target and target uses will be
|
|
// wrapped with some resolver type.
|
|
//
|
|
// For the future, the Target protocol will abstract away the exact means that
|
|
// metrics are retrieved and deserialized from the given instance to which it
|
|
// refers.
|
|
type Target interface {
|
|
// Retrieve values from this target.
|
|
Scrape(ingester extraction.Ingester) error
|
|
// Return the last encountered scrape error, if any.
|
|
LastError() error
|
|
// Return the health of the target.
|
|
State() TargetState
|
|
// Return the last time a scrape was attempted.
|
|
LastScrape() time.Time
|
|
// The address to which the Target corresponds. Out of all of the available
|
|
// points in this interface, this one is the best candidate to change given
|
|
// the ways to express the endpoint.
|
|
Address() string
|
|
// The address as seen from other hosts. References to localhost are resolved
|
|
// to the address of the prometheus server.
|
|
GlobalAddress() string
|
|
// Return the target's base labels.
|
|
BaseLabels() clientmodel.LabelSet
|
|
// Merge a new externally supplied target definition (e.g. with changed base
|
|
// labels) into an old target definition for the same endpoint. Preserve
|
|
// remaining information - like health state - from the old target.
|
|
Merge(newTarget Target)
|
|
}
|
|
|
|
// target is a Target that refers to a singular HTTP or HTTPS endpoint.
|
|
type target struct {
|
|
// The current health state of the target.
|
|
state TargetState
|
|
// The last encountered scrape error, if any.
|
|
lastError error
|
|
// The last time a scrape was attempted.
|
|
lastScrape time.Time
|
|
|
|
address string
|
|
// What is the deadline for the HTTP or HTTPS against this endpoint.
|
|
Deadline time.Duration
|
|
// Any base labels that are added to this target and its metrics.
|
|
baseLabels clientmodel.LabelSet
|
|
// The HTTP client used to scrape the target's endpoint.
|
|
httpClient *http.Client
|
|
}
|
|
|
|
// Furnish a reasonably configured target for querying.
|
|
func NewTarget(address string, deadline time.Duration, baseLabels clientmodel.LabelSet) Target {
|
|
target := &target{
|
|
address: address,
|
|
Deadline: deadline,
|
|
baseLabels: baseLabels,
|
|
httpClient: utility.NewDeadlineClient(deadline),
|
|
}
|
|
|
|
return target
|
|
}
|
|
|
|
func (t *target) recordScrapeHealth(ingester extraction.Ingester, timestamp clientmodel.Timestamp, healthy bool) {
|
|
metric := clientmodel.Metric{}
|
|
for label, value := range t.baseLabels {
|
|
metric[label] = value
|
|
}
|
|
metric[clientmodel.MetricNameLabel] = clientmodel.LabelValue(ScrapeHealthMetricName)
|
|
metric[InstanceLabel] = clientmodel.LabelValue(t.Address())
|
|
|
|
healthValue := clientmodel.SampleValue(0)
|
|
if healthy {
|
|
healthValue = clientmodel.SampleValue(1)
|
|
}
|
|
|
|
sample := &clientmodel.Sample{
|
|
Metric: metric,
|
|
Timestamp: timestamp,
|
|
Value: healthValue,
|
|
}
|
|
|
|
ingester.Ingest(&extraction.Result{
|
|
Err: nil,
|
|
Samples: clientmodel.Samples{sample},
|
|
})
|
|
}
|
|
|
|
func (t *target) Scrape(ingester extraction.Ingester) error {
|
|
now := clientmodel.Now()
|
|
err := t.scrape(now, ingester)
|
|
if err == nil {
|
|
t.state = ALIVE
|
|
t.recordScrapeHealth(ingester, now, true)
|
|
} else {
|
|
t.state = UNREACHABLE
|
|
t.recordScrapeHealth(ingester, now, false)
|
|
}
|
|
t.lastScrape = time.Now()
|
|
t.lastError = err
|
|
return err
|
|
}
|
|
|
|
const acceptHeader = `application/vnd.google.protobuf;proto=io.prometheus.client.MetricFamily;encoding=delimited;q=0.7,text/plain;version=0.0.4;q=0.3,application/json;schema=prometheus/telemetry;version=0.0.2;q=0.2,*/*;q=0.1`
|
|
|
|
func (t *target) scrape(timestamp clientmodel.Timestamp, ingester extraction.Ingester) (err error) {
|
|
defer func(start time.Time) {
|
|
ms := float64(time.Since(start)) / float64(time.Millisecond)
|
|
labels := prometheus.Labels{
|
|
job: string(t.baseLabels[clientmodel.JobLabel]),
|
|
instance: t.Address(),
|
|
outcome: success,
|
|
}
|
|
if err != nil {
|
|
labels[outcome] = failure
|
|
}
|
|
|
|
targetOperationLatencies.With(labels).Observe(ms)
|
|
}(time.Now())
|
|
|
|
req, err := http.NewRequest("GET", t.Address(), nil)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
req.Header.Add("Accept", acceptHeader)
|
|
|
|
resp, err := t.httpClient.Do(req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode != http.StatusOK {
|
|
return fmt.Errorf("server returned HTTP status %s", resp.Status)
|
|
}
|
|
|
|
processor, err := extraction.ProcessorForRequestHeader(resp.Header)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// XXX: This is a wart; we need to handle this more gracefully down the
|
|
// road, especially once we have service discovery support.
|
|
baseLabels := clientmodel.LabelSet{InstanceLabel: clientmodel.LabelValue(t.Address())}
|
|
for baseLabel, baseValue := range t.baseLabels {
|
|
baseLabels[baseLabel] = baseValue
|
|
}
|
|
|
|
i := &MergeLabelsIngester{
|
|
Labels: baseLabels,
|
|
CollisionPrefix: clientmodel.ExporterLabelPrefix,
|
|
|
|
Ingester: ingester,
|
|
}
|
|
processOptions := &extraction.ProcessOptions{
|
|
Timestamp: timestamp,
|
|
}
|
|
return processor.ProcessSingle(resp.Body, i, processOptions)
|
|
}
|
|
|
|
func (t *target) LastError() error {
|
|
return t.lastError
|
|
}
|
|
|
|
func (t *target) State() TargetState {
|
|
return t.state
|
|
}
|
|
|
|
func (t *target) LastScrape() time.Time {
|
|
return t.lastScrape
|
|
}
|
|
|
|
func (t *target) Address() string {
|
|
return t.address
|
|
}
|
|
|
|
func (t *target) GlobalAddress() string {
|
|
address := t.address
|
|
hostname, err := os.Hostname()
|
|
if err != nil {
|
|
glog.Warningf("Couldn't get hostname: %s, returning target.Address()", err)
|
|
return address
|
|
}
|
|
for _, localhostRepresentation := range localhostRepresentations {
|
|
address = strings.Replace(address, localhostRepresentation, fmt.Sprintf("http://%s", hostname), -1)
|
|
}
|
|
return address
|
|
}
|
|
|
|
func (t *target) BaseLabels() clientmodel.LabelSet {
|
|
return t.baseLabels
|
|
}
|
|
|
|
// Merge a new externally supplied target definition (e.g. with changed base
|
|
// labels) into an old target definition for the same endpoint. Preserve
|
|
// remaining information - like health state - from the old target.
|
|
func (t *target) Merge(newTarget Target) {
|
|
if t.Address() != newTarget.Address() {
|
|
panic("targets don't refer to the same endpoint")
|
|
}
|
|
t.baseLabels = newTarget.BaseLabels()
|
|
}
|
|
|
|
type targets []Target
|
|
|
|
func (t targets) Len() int {
|
|
return len(t)
|
|
}
|