prometheus/web/api/v1/api.go
beorn7 836f1db04c Improve MetricsForLabelMatchers
WIP: This needs more tests.

It now gets a from and through value, which it may opportunistically
use to optimize the retrieval. With possible future range indices,
this could be used in a very efficient way. This change merely applies
some easy checks, which should nevertheless solve the use case of
heavy rule evaluations on servers with a lot of series churn.

Idea is the following:

- Only archive series that are at least as old as the headChunkTimeout
  (which was already extremely unlikely to happen).

- Then maintain a high watermark for the last archival, i.e. no
  archived series has a sample more recent than that watermark.

- Any query that doesn't reach to a time before that watermark doesn't
  have to touch the archive index at all. (A production server at
  Soundcloud with the aforementioned series churn and heavy rule
  evaluations spends 50% of its CPU time in archive index
  lookups. Since rule evaluations usually only touch very recent
  values, most of those lookup should disappear with this change.)

- Federation with a very broad label matcher will profit from this,
  too.

As a byproduct, the un-needed MetricForFingerprint method was removed
from the Storage interface.
2016-03-09 00:25:59 +01:00

337 lines
8.3 KiB
Go

package v1
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"sort"
"strconv"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/common/route"
"golang.org/x/net/context"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/storage/local"
"github.com/prometheus/prometheus/storage/metric"
"github.com/prometheus/prometheus/util/httputil"
)
type status string
const (
statusSuccess status = "success"
statusError = "error"
)
type errorType string
const (
errorNone errorType = ""
errorTimeout = "timeout"
errorCanceled = "canceled"
errorExec = "execution"
errorBadData = "bad_data"
)
var corsHeaders = map[string]string{
"Access-Control-Allow-Headers": "Accept, Authorization, Content-Type, Origin",
"Access-Control-Allow-Methods": "GET, OPTIONS",
"Access-Control-Allow-Origin": "*",
"Access-Control-Expose-Headers": "Date",
}
type apiError struct {
typ errorType
err error
}
func (e *apiError) Error() string {
return fmt.Sprintf("%s: %s", e.typ, e.err)
}
type response struct {
Status status `json:"status"`
Data interface{} `json:"data,omitempty"`
ErrorType errorType `json:"errorType,omitempty"`
Error string `json:"error,omitempty"`
}
// Enables cross-site script calls.
func setCORS(w http.ResponseWriter) {
for h, v := range corsHeaders {
w.Header().Set(h, v)
}
}
type apiFunc func(r *http.Request) (interface{}, *apiError)
// API can register a set of endpoints in a router and handle
// them using the provided storage and query engine.
type API struct {
Storage local.Storage
QueryEngine *promql.Engine
context func(r *http.Request) context.Context
now func() model.Time
}
// NewAPI returns an initialized API type.
func NewAPI(qe *promql.Engine, st local.Storage) *API {
return &API{
QueryEngine: qe,
Storage: st,
context: route.Context,
now: model.Now,
}
}
// Register the API's endpoints in the given router.
func (api *API) Register(r *route.Router) {
instr := func(name string, f apiFunc) http.HandlerFunc {
hf := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
setCORS(w)
if data, err := f(r); err != nil {
respondError(w, err, data)
} else if data != nil {
respond(w, data)
} else {
w.WriteHeader(http.StatusNoContent)
}
})
return prometheus.InstrumentHandler(name, httputil.CompressionHandler{
Handler: hf,
})
}
r.Options("/*path", instr("options", api.options))
r.Get("/query", instr("query", api.query))
r.Get("/query_range", instr("query_range", api.queryRange))
r.Get("/label/:name/values", instr("label_values", api.labelValues))
r.Get("/series", instr("series", api.series))
r.Del("/series", instr("drop_series", api.dropSeries))
}
type queryData struct {
ResultType model.ValueType `json:"resultType"`
Result model.Value `json:"result"`
}
func (api *API) options(r *http.Request) (interface{}, *apiError) {
return nil, nil
}
func (api *API) query(r *http.Request) (interface{}, *apiError) {
var ts model.Time
if t := r.FormValue("time"); t != "" {
var err error
ts, err = parseTime(t)
if err != nil {
return nil, &apiError{errorBadData, err}
}
} else {
ts = api.now()
}
qry, err := api.QueryEngine.NewInstantQuery(r.FormValue("query"), ts)
if err != nil {
return nil, &apiError{errorBadData, err}
}
res := qry.Exec()
if res.Err != nil {
switch res.Err.(type) {
case promql.ErrQueryCanceled:
return nil, &apiError{errorCanceled, res.Err}
case promql.ErrQueryTimeout:
return nil, &apiError{errorTimeout, res.Err}
}
return nil, &apiError{errorExec, res.Err}
}
return &queryData{
ResultType: res.Value.Type(),
Result: res.Value,
}, nil
}
func (api *API) queryRange(r *http.Request) (interface{}, *apiError) {
start, err := parseTime(r.FormValue("start"))
if err != nil {
return nil, &apiError{errorBadData, err}
}
end, err := parseTime(r.FormValue("end"))
if err != nil {
return nil, &apiError{errorBadData, err}
}
step, err := parseDuration(r.FormValue("step"))
if err != nil {
return nil, &apiError{errorBadData, err}
}
// For safety, limit the number of returned points per timeseries.
// This is sufficient for 60s resolution for a week or 1h resolution for a year.
if end.Sub(start)/step > 11000 {
err := errors.New("exceeded maximum resolution of 11,000 points per timeseries. Try decreasing the query resolution (?step=XX)")
return nil, &apiError{errorBadData, err}
}
qry, err := api.QueryEngine.NewRangeQuery(r.FormValue("query"), start, end, step)
if err != nil {
return nil, &apiError{errorBadData, err}
}
res := qry.Exec()
if res.Err != nil {
switch res.Err.(type) {
case promql.ErrQueryCanceled:
return nil, &apiError{errorCanceled, res.Err}
case promql.ErrQueryTimeout:
return nil, &apiError{errorTimeout, res.Err}
}
return nil, &apiError{errorExec, res.Err}
}
return &queryData{
ResultType: res.Value.Type(),
Result: res.Value,
}, nil
}
func (api *API) labelValues(r *http.Request) (interface{}, *apiError) {
name := route.Param(api.context(r), "name")
if !model.LabelNameRE.MatchString(name) {
return nil, &apiError{errorBadData, fmt.Errorf("invalid label name: %q", name)}
}
vals := api.Storage.LabelValuesForLabelName(model.LabelName(name))
sort.Sort(vals)
return vals, nil
}
func (api *API) series(r *http.Request) (interface{}, *apiError) {
r.ParseForm()
if len(r.Form["match[]"]) == 0 {
return nil, &apiError{errorBadData, fmt.Errorf("no match[] parameter provided")}
}
res := map[model.Fingerprint]metric.Metric{}
for _, lm := range r.Form["match[]"] {
matchers, err := promql.ParseMetricSelector(lm)
if err != nil {
return nil, &apiError{errorBadData, err}
}
for fp, met := range api.Storage.MetricsForLabelMatchers(
model.Earliest, model.Latest, // Get every series.
matchers...,
) {
res[fp] = met
}
}
metrics := make([]model.Metric, 0, len(res))
for _, met := range res {
metrics = append(metrics, met.Metric)
}
return metrics, nil
}
func (api *API) dropSeries(r *http.Request) (interface{}, *apiError) {
r.ParseForm()
if len(r.Form["match[]"]) == 0 {
return nil, &apiError{errorBadData, fmt.Errorf("no match[] parameter provided")}
}
fps := map[model.Fingerprint]struct{}{}
for _, lm := range r.Form["match[]"] {
matchers, err := promql.ParseMetricSelector(lm)
if err != nil {
return nil, &apiError{errorBadData, err}
}
for fp := range api.Storage.MetricsForLabelMatchers(
model.Earliest, model.Latest, // Get every series.
matchers...,
) {
fps[fp] = struct{}{}
}
}
for fp := range fps {
api.Storage.DropMetricsForFingerprints(fp)
}
res := struct {
NumDeleted int `json:"numDeleted"`
}{
NumDeleted: len(fps),
}
return res, nil
}
func respond(w http.ResponseWriter, data interface{}) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
b, err := json.Marshal(&response{
Status: statusSuccess,
Data: data,
})
if err != nil {
return
}
w.Write(b)
}
func respondError(w http.ResponseWriter, apiErr *apiError, data interface{}) {
w.Header().Set("Content-Type", "application/json")
var code int
switch apiErr.typ {
case errorBadData:
code = http.StatusBadRequest
case errorExec:
code = 422
case errorCanceled, errorTimeout:
code = http.StatusServiceUnavailable
default:
code = http.StatusInternalServerError
}
w.WriteHeader(code)
b, err := json.Marshal(&response{
Status: statusError,
ErrorType: apiErr.typ,
Error: apiErr.err.Error(),
Data: data,
})
if err != nil {
return
}
w.Write(b)
}
func parseTime(s string) (model.Time, error) {
if t, err := strconv.ParseFloat(s, 64); err == nil {
ts := int64(t * float64(time.Second))
return model.TimeFromUnixNano(ts), nil
}
if t, err := time.Parse(time.RFC3339Nano, s); err == nil {
return model.TimeFromUnixNano(t.UnixNano()), nil
}
return 0, fmt.Errorf("cannot parse %q to a valid timestamp", s)
}
func parseDuration(s string) (time.Duration, error) {
if d, err := strconv.ParseFloat(s, 64); err == nil {
return time.Duration(d * float64(time.Second)), nil
}
if d, err := model.ParseDuration(s); err == nil {
return time.Duration(d), nil
}
return 0, fmt.Errorf("cannot parse %q to a valid duration", s)
}