prometheus/retrieval/discovery/kubernetes/discovery.go

// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package kubernetes

import (
	"fmt"
	"io/ioutil"
	"net"
	"net/http"
	"os"
	"sync"
	"time"

	"github.com/prometheus/common/log"
	"github.com/prometheus/common/model"
	"golang.org/x/net/context"

	"github.com/prometheus/prometheus/config"
	"github.com/prometheus/prometheus/util/httputil"
)

const (
	// kubernetesMetaLabelPrefix is the meta prefix used for all meta labels.
	// in this discovery.
	metaLabelPrefix = model.MetaLabelPrefix + "kubernetes_"

	// roleLabel is the name for the label containing a target's role.
	roleLabel = metaLabelPrefix + "role"

	sourcePodPrefix = "pods"
	// podsTargetGroupNAme is the name given to the target group for pods
	podsTargetGroupName = "pods"
	// podNamespaceLabel is the name for the label containing a target pod's namespace
	podNamespaceLabel = metaLabelPrefix + "pod_namespace"
	// podNameLabel is the name for the label containing a target pod's name
	podNameLabel = metaLabelPrefix + "pod_name"
	// podAddressLabel is the name for the label containing a target pod's IP address (the PodIP)
	podAddressLabel = metaLabelPrefix + "pod_address"
	// podContainerNameLabel is the name for the label containing a target's container name
	podContainerNameLabel = metaLabelPrefix + "pod_container_name"
	// podContainerPortNameLabel is the name for the label containing the name of the port selected for a target
	podContainerPortNameLabel = metaLabelPrefix + "pod_container_port_name"
	// PodContainerPortListLabel is the name for the label containing a list of all TCP ports on the target container
	podContainerPortListLabel = metaLabelPrefix + "pod_container_port_list"
	// PodContainerPortMapPrefix is the prefix used to create the names of labels that associate container port names to port values
	// Such labels will be named (podContainerPortMapPrefix)_(PortName) = (ContainerPort)
	podContainerPortMapPrefix = metaLabelPrefix + "pod_container_port_map_"
	// podReadyLabel is the name for the label containing the 'Ready' status (true/false/unknown) for a target
	podReadyLabel = metaLabelPrefix + "pod_ready"
	// podLabelPrefix is the prefix for prom label names corresponding to k8s labels for a target pod
	podLabelPrefix = metaLabelPrefix + "pod_label_"
	// podAnnotationPrefix is the prefix for prom label names corresponding to k8s annotations for a target pod
	podAnnotationPrefix = metaLabelPrefix + "pod_annotation_"

	sourceServicePrefix = "services"
	// serviceNamespaceLabel is the name for the label containing a target's service namespace.
	serviceNamespaceLabel = metaLabelPrefix + "service_namespace"
	// serviceNameLabel is the name for the label containing a target's service name.
	serviceNameLabel = metaLabelPrefix + "service_name"
	// serviceLabelPrefix is the prefix for the service labels.
	serviceLabelPrefix = metaLabelPrefix + "service_label_"
	// serviceAnnotationPrefix is the prefix for the service annotations.
	serviceAnnotationPrefix = metaLabelPrefix + "service_annotation_"

	// nodesTargetGroupName is the name given to the target group for nodes.
	nodesTargetGroupName = "nodes"
	// nodeLabelPrefix is the prefix for the node labels.
	nodeLabelPrefix = metaLabelPrefix + "node_label_"
	// nodeAddressPrefix is the prefix for the node addresses.
	nodeAddressPrefix = metaLabelPrefix + "node_address_"
	// nodePortLabel is the name of the label for the node port.
	nodePortLabel = metaLabelPrefix + "node_port"

	// apiServersTargetGroupName is the name given to the target group for API servers.
	apiServersTargetGroupName = "apiServers"

	serviceAccountToken  = "/var/run/secrets/kubernetes.io/serviceaccount/token"
	serviceAccountCACert = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"

	apiVersion          = "v1"
	apiPrefix           = "/api/" + apiVersion
	nodesURL            = apiPrefix + "/nodes"
	podsURL             = apiPrefix + "/pods"
	servicesURL         = apiPrefix + "/services"
	endpointsURL        = apiPrefix + "/endpoints"
	serviceEndpointsURL = apiPrefix + "/namespaces/%s/endpoints/%s"
)

// Discovery implements a TargetProvider for Kubernetes services.
type Discovery struct {
	client *http.Client
	Conf   *config.KubernetesSDConfig

	apiServers   []config.URL
	apiServersMu sync.RWMutex
	runDone      chan struct{}
}

// Initialize sets up the discovery for usage.
func (kd *Discovery) Initialize() error {
	client, err := newKubernetesHTTPClient(kd.Conf)

	if err != nil {
		return err
	}

	kd.apiServers = kd.Conf.APIServers
	kd.client = client
	kd.runDone = make(chan struct{})

	return nil
}

// Run implements the TargetProvider interface.
func (kd *Discovery) Run(ctx context.Context, ch chan<- []*config.TargetGroup) {
	log.Debugf("Kubernetes Discovery.Run beginning")
	defer close(ch)

	switch kd.Conf.Role {
	case config.KubernetesRolePod, config.KubernetesRoleContainer:
		pd := &podDiscovery{
			retryInterval: time.Duration(kd.Conf.RetryInterval),
			kd:            kd,
		}
		pd.run(ctx, ch)
	case config.KubernetesRoleNode:
		nd := &nodeDiscovery{
			retryInterval: time.Duration(kd.Conf.RetryInterval),
			kd:            kd,
		}
		nd.run(ctx, ch)
	case config.KubernetesRoleService, config.KubernetesRoleEndpoint:
		sd := &serviceDiscovery{
			retryInterval: time.Duration(kd.Conf.RetryInterval),
			kd:            kd,
		}
		sd.run(ctx, ch)
	case config.KubernetesRoleAPIServer:
		select {
		case ch <- []*config.TargetGroup{kd.updateAPIServersTargetGroup()}:
		case <-ctx.Done():
			return
		}
	default:
		log.Errorf("unknown Kubernetes discovery kind %q", kd.Conf.Role)
		return
	}
}

func (kd *Discovery) queryAPIServerPath(path string) (*http.Response, error) {
	req, err := http.NewRequest("GET", path, nil)
	if err != nil {
		return nil, err
	}
	return kd.queryAPIServerReq(req)
}

func (kd *Discovery) queryAPIServerReq(req *http.Request) (*http.Response, error) {
	// Lock in case we need to rotate API servers to request.
	kd.apiServersMu.Lock()
	defer kd.apiServersMu.Unlock()
	var lastErr error
	for i := 0; i < len(kd.apiServers); i++ {
		cloneReq := *req
		cloneReq.URL.Host = kd.apiServers[0].Host
		cloneReq.URL.Scheme = kd.apiServers[0].Scheme
		res, err := kd.client.Do(&cloneReq)
		if err == nil {
			return res, nil
		}
		lastErr = err
		kd.rotateAPIServers()
	}
	return nil, fmt.Errorf("unable to query any API servers: %v", lastErr)
}

func (kd *Discovery) rotateAPIServers() {
	if len(kd.apiServers) > 1 {
		kd.apiServers = append(kd.apiServers[1:], kd.apiServers[0])
	}
}

func (kd *Discovery) updateAPIServersTargetGroup() *config.TargetGroup {
	tg := &config.TargetGroup{
		Source: apiServersTargetGroupName,
		Labels: model.LabelSet{
			roleLabel: model.LabelValue("apiserver"),
		},
	}

	for _, apiServer := range kd.apiServers {
		apiServerAddress := apiServer.Host
		_, _, err := net.SplitHostPort(apiServerAddress)
		// If error then no port is specified - use default for scheme.
		if err != nil {
			switch apiServer.Scheme {
			case "http":
				apiServerAddress = net.JoinHostPort(apiServerAddress, "80")
			case "https":
				apiServerAddress = net.JoinHostPort(apiServerAddress, "443")
			}
		}

		t := model.LabelSet{
			model.AddressLabel: model.LabelValue(apiServerAddress),
			model.SchemeLabel:  model.LabelValue(apiServer.Scheme),
		}
		tg.Targets = append(tg.Targets, t)
	}

	return tg
}

func newKubernetesHTTPClient(conf *config.KubernetesSDConfig) (*http.Client, error) {
	bearerTokenFile := conf.BearerTokenFile
	caFile := conf.TLSConfig.CAFile
	if conf.InCluster {
		if len(bearerTokenFile) == 0 {
			bearerTokenFile = serviceAccountToken
		}
		if len(caFile) == 0 {
			// With recent versions, the CA certificate is mounted as a secret
			// but we need to handle older versions too. In this case, don't
			// set the CAFile & the configuration will have to use InsecureSkipVerify.
			if _, err := os.Stat(serviceAccountCACert); err == nil {
				caFile = serviceAccountCACert
			}
		}
	}

	tlsOpts := httputil.TLSOptions{
		InsecureSkipVerify: conf.TLSConfig.InsecureSkipVerify,
		CAFile:             caFile,
		CertFile:           conf.TLSConfig.CertFile,
		KeyFile:            conf.TLSConfig.KeyFile,
	}
	tlsConfig, err := httputil.NewTLSConfig(tlsOpts)
	if err != nil {
		return nil, err
	}

	var rt http.RoundTripper = &http.Transport{
		Dial: func(netw, addr string) (c net.Conn, err error) {
			c, err = net.DialTimeout(netw, addr, time.Duration(conf.RequestTimeout))
			return
		},
		TLSClientConfig: tlsConfig,
	}

	// If a bearer token is provided, create a round tripper that will set the
	// Authorization header correctly on each request.
	bearerToken := conf.BearerToken
	if len(bearerToken) == 0 && len(bearerTokenFile) > 0 {
		b, err := ioutil.ReadFile(bearerTokenFile)
		if err != nil {
			return nil, fmt.Errorf("unable to read bearer token file %s: %s", bearerTokenFile, err)
		}
		bearerToken = string(b)
	}
	if len(bearerToken) > 0 {
		rt = httputil.NewBearerAuthRoundTripper(bearerToken, rt)
	}

	if conf.BasicAuth != nil {
		rt = httputil.NewBasicAuthRoundTripper(conf.BasicAuth.Username, conf.BasicAuth.Password, rt)
	}

	return &http.Client{
		Transport: rt,
	}, nil
}

// Until loops until stop channel is closed, running f every period.
// f may not be invoked if stop channel is already closed.
func until(f func(), period time.Duration, stopCh <-chan struct{}) {
	select {
	case <-stopCh:
		return
	default:
		f()
	}
	for {
		select {
		case <-stopCh:
			return
		case <-time.After(period):
			f()
		}
	}
}