mirror of
https://github.com/prometheus/prometheus.git
synced 2025-01-12 22:37:27 -08:00
Add discovery support for triton compute nodes (#7250)
Added optional configuration item role, defaults to 'container' (backwards-compatible).
Setting role to 'cn' will discover compute nodes instead.
Human-friendly compute node hostname discovery depends on cmon 1.7.0:
c1a2aeca36
Adjust testcases to use discovery config per case as two different types are now supported.
Updated documentation:
* new role setting
* clarify what the name 'container' covers as triton uses different names in different locations
Signed-off-by: jzinkweg <jzinkweg@gmail.com>
This commit is contained in:
parent
f4dd45609a
commit
1f69c38ba4
|
@ -590,8 +590,8 @@ var expectedConf = &Config{
|
||||||
ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{
|
ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{
|
||||||
TritonSDConfigs: []*triton.SDConfig{
|
TritonSDConfigs: []*triton.SDConfig{
|
||||||
{
|
{
|
||||||
|
|
||||||
Account: "testAccount",
|
Account: "testAccount",
|
||||||
|
Role: "container",
|
||||||
DNSSuffix: "triton.example.com",
|
DNSSuffix: "triton.example.com",
|
||||||
Endpoint: "triton.example.com",
|
Endpoint: "triton.example.com",
|
||||||
Port: 9163,
|
Port: 9163,
|
||||||
|
|
|
@ -46,6 +46,7 @@ const (
|
||||||
|
|
||||||
// DefaultSDConfig is the default Triton SD configuration.
|
// DefaultSDConfig is the default Triton SD configuration.
|
||||||
var DefaultSDConfig = SDConfig{
|
var DefaultSDConfig = SDConfig{
|
||||||
|
Role: "container",
|
||||||
Port: 9163,
|
Port: 9163,
|
||||||
RefreshInterval: model.Duration(60 * time.Second),
|
RefreshInterval: model.Duration(60 * time.Second),
|
||||||
Version: 1,
|
Version: 1,
|
||||||
|
@ -54,6 +55,7 @@ var DefaultSDConfig = SDConfig{
|
||||||
// SDConfig is the configuration for Triton based service discovery.
|
// SDConfig is the configuration for Triton based service discovery.
|
||||||
type SDConfig struct {
|
type SDConfig struct {
|
||||||
Account string `yaml:"account"`
|
Account string `yaml:"account"`
|
||||||
|
Role string `yaml:"role"`
|
||||||
DNSSuffix string `yaml:"dns_suffix"`
|
DNSSuffix string `yaml:"dns_suffix"`
|
||||||
Endpoint string `yaml:"endpoint"`
|
Endpoint string `yaml:"endpoint"`
|
||||||
Groups []string `yaml:"groups,omitempty"`
|
Groups []string `yaml:"groups,omitempty"`
|
||||||
|
@ -71,6 +73,9 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if c.Role != "container" && c.Role != "cn" {
|
||||||
|
return errors.New("triton SD configuration requires role to be 'container' or 'cn'")
|
||||||
|
}
|
||||||
if c.Account == "" {
|
if c.Account == "" {
|
||||||
return errors.New("triton SD configuration requires an account")
|
return errors.New("triton SD configuration requires an account")
|
||||||
}
|
}
|
||||||
|
@ -87,7 +92,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// DiscoveryResponse models a JSON response from the Triton discovery.
|
// DiscoveryResponse models a JSON response from the Triton discovery.
|
||||||
type discoveryResponse struct {
|
type DiscoveryResponse struct {
|
||||||
Containers []struct {
|
Containers []struct {
|
||||||
Groups []string `json:"groups"`
|
Groups []string `json:"groups"`
|
||||||
ServerUUID string `json:"server_uuid"`
|
ServerUUID string `json:"server_uuid"`
|
||||||
|
@ -98,6 +103,14 @@ type discoveryResponse struct {
|
||||||
} `json:"containers"`
|
} `json:"containers"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ComputeNodeDiscoveryResponse models a JSON response from the Triton discovery /gz/ endpoint.
|
||||||
|
type ComputeNodeDiscoveryResponse struct {
|
||||||
|
ComputeNodes []struct {
|
||||||
|
ServerUUID string `json:"server_uuid"`
|
||||||
|
ServerHostname string `json:"server_hostname"`
|
||||||
|
} `json:"cns"`
|
||||||
|
}
|
||||||
|
|
||||||
// Discovery periodically performs Triton-SD requests. It implements
|
// Discovery periodically performs Triton-SD requests. It implements
|
||||||
// the Discoverer interface.
|
// the Discoverer interface.
|
||||||
type Discovery struct {
|
type Discovery struct {
|
||||||
|
@ -137,17 +150,34 @@ func New(logger log.Logger, conf *SDConfig) (*Discovery, error) {
|
||||||
return d, nil
|
return d, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// triton-cmon has two discovery endpoints:
|
||||||
|
// https://github.com/joyent/triton-cmon/blob/master/lib/endpoints/discover.js
|
||||||
|
//
|
||||||
|
// The default endpoint exposes "containers", otherwise called "virtual machines" in triton,
|
||||||
|
// which are (branded) zones running on the triton platform.
|
||||||
|
//
|
||||||
|
// The /gz/ endpoint exposes "compute nodes", also known as "servers" or "global zones",
|
||||||
|
// on which the "containers" are running.
|
||||||
|
//
|
||||||
|
// As triton is not internally consistent in using these names,
|
||||||
|
// the terms as used in triton-cmon are used here.
|
||||||
|
|
||||||
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||||
var endpoint = fmt.Sprintf("https://%s:%d/v%d/discover", d.sdConfig.Endpoint, d.sdConfig.Port, d.sdConfig.Version)
|
var endpointFormat string
|
||||||
|
switch d.sdConfig.Role {
|
||||||
|
case "container":
|
||||||
|
endpointFormat = "https://%s:%d/v%d/discover"
|
||||||
|
case "cn":
|
||||||
|
endpointFormat = "https://%s:%d/v%d/gz/discover"
|
||||||
|
default:
|
||||||
|
return nil, errors.New(fmt.Sprintf("unknown role '%s' in configuration", d.sdConfig.Role))
|
||||||
|
}
|
||||||
|
var endpoint = fmt.Sprintf(endpointFormat, d.sdConfig.Endpoint, d.sdConfig.Port, d.sdConfig.Version)
|
||||||
if len(d.sdConfig.Groups) > 0 {
|
if len(d.sdConfig.Groups) > 0 {
|
||||||
groups := url.QueryEscape(strings.Join(d.sdConfig.Groups, ","))
|
groups := url.QueryEscape(strings.Join(d.sdConfig.Groups, ","))
|
||||||
endpoint = fmt.Sprintf("%s?groups=%s", endpoint, groups)
|
endpoint = fmt.Sprintf("%s?groups=%s", endpoint, groups)
|
||||||
}
|
}
|
||||||
|
|
||||||
tg := &targetgroup.Group{
|
|
||||||
Source: endpoint,
|
|
||||||
}
|
|
||||||
|
|
||||||
req, err := http.NewRequest("GET", endpoint, nil)
|
req, err := http.NewRequest("GET", endpoint, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -168,8 +198,24 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||||
return nil, errors.Wrap(err, "an error occurred when reading the response body")
|
return nil, errors.Wrap(err, "an error occurred when reading the response body")
|
||||||
}
|
}
|
||||||
|
|
||||||
dr := discoveryResponse{}
|
// The JSON response body is different so it needs to be processed/mapped separately.
|
||||||
err = json.Unmarshal(data, &dr)
|
switch d.sdConfig.Role {
|
||||||
|
case "container":
|
||||||
|
return d.processContainerResponse(data, endpoint)
|
||||||
|
case "cn":
|
||||||
|
return d.processComputeNodeResponse(data, endpoint)
|
||||||
|
default:
|
||||||
|
return nil, errors.New(fmt.Sprintf("unknown role '%s' in configuration", d.sdConfig.Role))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *Discovery) processContainerResponse(data []byte, endpoint string) ([]*targetgroup.Group, error) {
|
||||||
|
tg := &targetgroup.Group{
|
||||||
|
Source: endpoint,
|
||||||
|
}
|
||||||
|
|
||||||
|
dr := DiscoveryResponse{}
|
||||||
|
err := json.Unmarshal(data, &dr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "an error occurred unmarshaling the discovery response json")
|
return nil, errors.Wrap(err, "an error occurred unmarshaling the discovery response json")
|
||||||
}
|
}
|
||||||
|
@ -195,3 +241,28 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
|
||||||
|
|
||||||
return []*targetgroup.Group{tg}, nil
|
return []*targetgroup.Group{tg}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (d *Discovery) processComputeNodeResponse(data []byte, endpoint string) ([]*targetgroup.Group, error) {
|
||||||
|
tg := &targetgroup.Group{
|
||||||
|
Source: endpoint,
|
||||||
|
}
|
||||||
|
|
||||||
|
dr := ComputeNodeDiscoveryResponse{}
|
||||||
|
err := json.Unmarshal(data, &dr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Wrap(err, "an error occurred unmarshaling the compute node discovery response json")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cn := range dr.ComputeNodes {
|
||||||
|
labels := model.LabelSet{
|
||||||
|
tritonLabelMachineID: model.LabelValue(cn.ServerUUID),
|
||||||
|
tritonLabelMachineAlias: model.LabelValue(cn.ServerHostname),
|
||||||
|
}
|
||||||
|
addr := fmt.Sprintf("%s.%s:%d", cn.ServerUUID, d.sdConfig.DNSSuffix, d.sdConfig.Port)
|
||||||
|
labels[model.AddressLabel] = model.LabelValue(addr)
|
||||||
|
|
||||||
|
tg.Targets = append(tg.Targets, labels)
|
||||||
|
}
|
||||||
|
|
||||||
|
return []*targetgroup.Group{tg}, nil
|
||||||
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ import (
|
||||||
var (
|
var (
|
||||||
conf = SDConfig{
|
conf = SDConfig{
|
||||||
Account: "testAccount",
|
Account: "testAccount",
|
||||||
|
Role: "container",
|
||||||
DNSSuffix: "triton.example.com",
|
DNSSuffix: "triton.example.com",
|
||||||
Endpoint: "127.0.0.1",
|
Endpoint: "127.0.0.1",
|
||||||
Port: 443,
|
Port: 443,
|
||||||
|
@ -42,6 +43,7 @@ var (
|
||||||
}
|
}
|
||||||
badconf = SDConfig{
|
badconf = SDConfig{
|
||||||
Account: "badTestAccount",
|
Account: "badTestAccount",
|
||||||
|
Role: "container",
|
||||||
DNSSuffix: "bad.triton.example.com",
|
DNSSuffix: "bad.triton.example.com",
|
||||||
Endpoint: "127.0.0.1",
|
Endpoint: "127.0.0.1",
|
||||||
Port: 443,
|
Port: 443,
|
||||||
|
@ -56,6 +58,7 @@ var (
|
||||||
}
|
}
|
||||||
groupsconf = SDConfig{
|
groupsconf = SDConfig{
|
||||||
Account: "testAccount",
|
Account: "testAccount",
|
||||||
|
Role: "container",
|
||||||
DNSSuffix: "triton.example.com",
|
DNSSuffix: "triton.example.com",
|
||||||
Endpoint: "127.0.0.1",
|
Endpoint: "127.0.0.1",
|
||||||
Groups: []string{"foo", "bar"},
|
Groups: []string{"foo", "bar"},
|
||||||
|
@ -64,6 +67,16 @@ var (
|
||||||
RefreshInterval: 1,
|
RefreshInterval: 1,
|
||||||
TLSConfig: config.TLSConfig{InsecureSkipVerify: true},
|
TLSConfig: config.TLSConfig{InsecureSkipVerify: true},
|
||||||
}
|
}
|
||||||
|
cnconf = SDConfig{
|
||||||
|
Account: "testAccount",
|
||||||
|
Role: "cn",
|
||||||
|
DNSSuffix: "triton.example.com",
|
||||||
|
Endpoint: "127.0.0.1",
|
||||||
|
Port: 443,
|
||||||
|
Version: 1,
|
||||||
|
RefreshInterval: 1,
|
||||||
|
TLSConfig: config.TLSConfig{InsecureSkipVerify: true},
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
func newTritonDiscovery(c SDConfig) (*Discovery, error) {
|
func newTritonDiscovery(c SDConfig) (*Discovery, error) {
|
||||||
|
@ -103,8 +116,22 @@ func TestTritonSDNewGroupsConfig(t *testing.T) {
|
||||||
testutil.Equals(t, groupsconf.Port, td.sdConfig.Port)
|
testutil.Equals(t, groupsconf.Port, td.sdConfig.Port)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTritonSDNewCNConfig(t *testing.T) {
|
||||||
|
td, err := newTritonDiscovery(cnconf)
|
||||||
|
testutil.Ok(t, err)
|
||||||
|
testutil.Assert(t, td != nil, "")
|
||||||
|
testutil.Assert(t, td.client != nil, "")
|
||||||
|
testutil.Assert(t, td.interval != 0, "")
|
||||||
|
testutil.Assert(t, td.sdConfig != nil, "")
|
||||||
|
testutil.Equals(t, cnconf.Role, td.sdConfig.Role)
|
||||||
|
testutil.Equals(t, cnconf.Account, td.sdConfig.Account)
|
||||||
|
testutil.Equals(t, cnconf.DNSSuffix, td.sdConfig.DNSSuffix)
|
||||||
|
testutil.Equals(t, cnconf.Endpoint, td.sdConfig.Endpoint)
|
||||||
|
testutil.Equals(t, cnconf.Port, td.sdConfig.Port)
|
||||||
|
}
|
||||||
|
|
||||||
func TestTritonSDRefreshNoTargets(t *testing.T) {
|
func TestTritonSDRefreshNoTargets(t *testing.T) {
|
||||||
tgts := testTritonSDRefresh(t, "{\"containers\":[]}")
|
tgts := testTritonSDRefresh(t, conf, "{\"containers\":[]}")
|
||||||
testutil.Assert(t, tgts == nil, "")
|
testutil.Assert(t, tgts == nil, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,7 +156,7 @@ func TestTritonSDRefreshMultipleTargets(t *testing.T) {
|
||||||
}`
|
}`
|
||||||
)
|
)
|
||||||
|
|
||||||
tgts := testTritonSDRefresh(t, dstr)
|
tgts := testTritonSDRefresh(t, conf, dstr)
|
||||||
testutil.Assert(t, tgts != nil, "")
|
testutil.Assert(t, tgts != nil, "")
|
||||||
testutil.Equals(t, 2, len(tgts))
|
testutil.Equals(t, 2, len(tgts))
|
||||||
}
|
}
|
||||||
|
@ -156,9 +183,45 @@ func TestTritonSDRefreshCancelled(t *testing.T) {
|
||||||
testutil.Equals(t, strings.Contains(err.Error(), context.Canceled.Error()), true)
|
testutil.Equals(t, strings.Contains(err.Error(), context.Canceled.Error()), true)
|
||||||
}
|
}
|
||||||
|
|
||||||
func testTritonSDRefresh(t *testing.T, dstr string) []model.LabelSet {
|
func TestTritonSDRefreshCNsUUIDOnly(t *testing.T) {
|
||||||
var (
|
var (
|
||||||
td, _ = newTritonDiscovery(conf)
|
dstr = `{"cns":[
|
||||||
|
{
|
||||||
|
"server_uuid":"44454c4c-5000-104d-8037-b7c04f5a5131"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"server_uuid":"a5894692-bd32-4ca1-908a-e2dda3c3a5e6"
|
||||||
|
}]
|
||||||
|
}`
|
||||||
|
)
|
||||||
|
|
||||||
|
tgts := testTritonSDRefresh(t, cnconf, dstr)
|
||||||
|
testutil.Assert(t, tgts != nil, "")
|
||||||
|
testutil.Equals(t, 2, len(tgts))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTritonSDRefreshCNsWithHostname(t *testing.T) {
|
||||||
|
var (
|
||||||
|
dstr = `{"cns":[
|
||||||
|
{
|
||||||
|
"server_uuid":"44454c4c-5000-104d-8037-b7c04f5a5131",
|
||||||
|
"server_hostname": "server01"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"server_uuid":"a5894692-bd32-4ca1-908a-e2dda3c3a5e6",
|
||||||
|
"server_hostname": "server02"
|
||||||
|
}]
|
||||||
|
}`
|
||||||
|
)
|
||||||
|
|
||||||
|
tgts := testTritonSDRefresh(t, cnconf, dstr)
|
||||||
|
testutil.Assert(t, tgts != nil, "")
|
||||||
|
testutil.Equals(t, 2, len(tgts))
|
||||||
|
}
|
||||||
|
|
||||||
|
func testTritonSDRefresh(t *testing.T, c SDConfig, dstr string) []model.LabelSet {
|
||||||
|
var (
|
||||||
|
td, _ = newTritonDiscovery(c)
|
||||||
s = httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
s = httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
fmt.Fprintln(w, dstr)
|
fmt.Fprintln(w, dstr)
|
||||||
}))
|
}))
|
||||||
|
|
|
@ -1020,37 +1020,60 @@ Serverset data must be in the JSON format, the Thrift format is not currently su
|
||||||
scrape targets from [Container Monitor](https://github.com/joyent/rfd/blob/master/rfd/0027/README.md)
|
scrape targets from [Container Monitor](https://github.com/joyent/rfd/blob/master/rfd/0027/README.md)
|
||||||
discovery endpoints.
|
discovery endpoints.
|
||||||
|
|
||||||
The following meta labels are available on targets during relabeling:
|
One of the following `<triton_role>` types can be configured to discover targets:
|
||||||
|
|
||||||
|
#### `container`
|
||||||
|
|
||||||
|
The `container` role discovers one target per "virtual machine" owned by the `account`.
|
||||||
|
These are SmartOS zones or lx/KVM/bhyve branded zones.
|
||||||
|
|
||||||
|
The following meta labels are available on targets during [relabeling](#relabel_config):
|
||||||
|
|
||||||
* `__meta_triton_groups`: the list of groups belonging to the target joined by a comma separator
|
* `__meta_triton_groups`: the list of groups belonging to the target joined by a comma separator
|
||||||
* `__meta_triton_machine_alias`: the alias of the target container
|
* `__meta_triton_machine_alias`: the alias of the target container
|
||||||
* `__meta_triton_machine_brand`: the brand of the target container
|
* `__meta_triton_machine_brand`: the brand of the target container
|
||||||
* `__meta_triton_machine_id`: the UUID of the target container
|
* `__meta_triton_machine_id`: the UUID of the target container
|
||||||
* `__meta_triton_machine_image`: the target containers image type
|
* `__meta_triton_machine_image`: the target container's image type
|
||||||
* `__meta_triton_server_id`: the server UUID for the target container
|
* `__meta_triton_server_id`: the server UUID the target container is running on
|
||||||
|
|
||||||
|
#### `cn`
|
||||||
|
|
||||||
|
The `cn` role discovers one target for per compute node (also known as "server" or "global zone") making up the Triton infrastructure.
|
||||||
|
The `account` must be a Triton operator and is currently required to own at least one `container`.
|
||||||
|
|
||||||
|
The following meta labels are available on targets during [relabeling](#relabel_config):
|
||||||
|
|
||||||
|
* `__meta_triton_machine_alias`: the hostname of the target (requires triton-cmon 1.7.0 or newer)
|
||||||
|
* `__meta_triton_machine_id`: the UUID of the target
|
||||||
|
|
||||||
|
See below for the configuration options for Triton discovery:
|
||||||
```yaml
|
```yaml
|
||||||
# The information to access the Triton discovery API.
|
# The information to access the Triton discovery API.
|
||||||
|
|
||||||
# The account to use for discovering new target containers.
|
# The account to use for discovering new targets.
|
||||||
account: <string>
|
account: <string>
|
||||||
|
|
||||||
# The DNS suffix which should be applied to target containers.
|
# The type of targets to discover, can be set to:
|
||||||
|
# * "container" to discover virtual machines (SmartOS zones, lx/KVM/bhyve branded zones) running on Triton
|
||||||
|
# * "cn" to discover compute nodes (servers/global zones) making up the Triton infrastructure
|
||||||
|
[ role : <string> | default = "container" ]
|
||||||
|
|
||||||
|
# The DNS suffix which should be applied to target.
|
||||||
dns_suffix: <string>
|
dns_suffix: <string>
|
||||||
|
|
||||||
# The Triton discovery endpoint (e.g. 'cmon.us-east-3b.triton.zone'). This is
|
# The Triton discovery endpoint (e.g. 'cmon.us-east-3b.triton.zone'). This is
|
||||||
# often the same value as dns_suffix.
|
# often the same value as dns_suffix.
|
||||||
endpoint: <string>
|
endpoint: <string>
|
||||||
|
|
||||||
# A list of groups for which targets are retrieved. If omitted, all containers
|
# A list of groups for which targets are retrieved, only supported when `role` == `container`.
|
||||||
# available to the requesting account are scraped.
|
# If omitted all containers owned by the requesting account are scraped.
|
||||||
groups:
|
groups:
|
||||||
[ - <string> ... ]
|
[ - <string> ... ]
|
||||||
|
|
||||||
# The port to use for discovery and metric scraping.
|
# The port to use for discovery and metric scraping.
|
||||||
[ port: <int> | default = 9163 ]
|
[ port: <int> | default = 9163 ]
|
||||||
|
|
||||||
# The interval which should be used for refreshing target containers.
|
# The interval which should be used for refreshing targets.
|
||||||
[ refresh_interval: <duration> | default = 60s ]
|
[ refresh_interval: <duration> | default = 60s ]
|
||||||
|
|
||||||
# The Triton discovery API version.
|
# The Triton discovery API version.
|
||||||
|
|
Loading…
Reference in a new issue