Add discovery support for triton compute nodes (#7250)

Added optional configuration item role, defaults to 'container' (backwards-compatible).
Setting role to 'cn' will discover compute nodes instead.

Human-friendly compute node hostname discovery depends on cmon 1.7.0:
c1a2aeca36

Adjust testcases to use discovery config per case as two different types are now supported.

Updated documentation:
* new role setting
* clarify what the name 'container' covers as triton uses different names in different locations

Signed-off-by: jzinkweg <jzinkweg@gmail.com>
This commit is contained in:
Jop Zinkweg 2020-05-22 17:19:21 +02:00 committed by GitHub
parent f4dd45609a
commit 1f69c38ba4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 178 additions and 21 deletions

View file

@ -590,8 +590,8 @@ var expectedConf = &Config{
ServiceDiscoveryConfig: sd_config.ServiceDiscoveryConfig{
TritonSDConfigs: []*triton.SDConfig{
{
Account: "testAccount",
Role: "container",
DNSSuffix: "triton.example.com",
Endpoint: "triton.example.com",
Port: 9163,

View file

@ -46,6 +46,7 @@ const (
// DefaultSDConfig is the default Triton SD configuration.
var DefaultSDConfig = SDConfig{
Role: "container",
Port: 9163,
RefreshInterval: model.Duration(60 * time.Second),
Version: 1,
@ -54,6 +55,7 @@ var DefaultSDConfig = SDConfig{
// SDConfig is the configuration for Triton based service discovery.
type SDConfig struct {
Account string `yaml:"account"`
Role string `yaml:"role"`
DNSSuffix string `yaml:"dns_suffix"`
Endpoint string `yaml:"endpoint"`
Groups []string `yaml:"groups,omitempty"`
@ -71,6 +73,9 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
if err != nil {
return err
}
if c.Role != "container" && c.Role != "cn" {
return errors.New("triton SD configuration requires role to be 'container' or 'cn'")
}
if c.Account == "" {
return errors.New("triton SD configuration requires an account")
}
@ -87,7 +92,7 @@ func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
}
// DiscoveryResponse models a JSON response from the Triton discovery.
type discoveryResponse struct {
type DiscoveryResponse struct {
Containers []struct {
Groups []string `json:"groups"`
ServerUUID string `json:"server_uuid"`
@ -98,6 +103,14 @@ type discoveryResponse struct {
} `json:"containers"`
}
// ComputeNodeDiscoveryResponse models a JSON response from the Triton discovery /gz/ endpoint.
type ComputeNodeDiscoveryResponse struct {
ComputeNodes []struct {
ServerUUID string `json:"server_uuid"`
ServerHostname string `json:"server_hostname"`
} `json:"cns"`
}
// Discovery periodically performs Triton-SD requests. It implements
// the Discoverer interface.
type Discovery struct {
@ -137,17 +150,34 @@ func New(logger log.Logger, conf *SDConfig) (*Discovery, error) {
return d, nil
}
// triton-cmon has two discovery endpoints:
// https://github.com/joyent/triton-cmon/blob/master/lib/endpoints/discover.js
//
// The default endpoint exposes "containers", otherwise called "virtual machines" in triton,
// which are (branded) zones running on the triton platform.
//
// The /gz/ endpoint exposes "compute nodes", also known as "servers" or "global zones",
// on which the "containers" are running.
//
// As triton is not internally consistent in using these names,
// the terms as used in triton-cmon are used here.
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
var endpoint = fmt.Sprintf("https://%s:%d/v%d/discover", d.sdConfig.Endpoint, d.sdConfig.Port, d.sdConfig.Version)
var endpointFormat string
switch d.sdConfig.Role {
case "container":
endpointFormat = "https://%s:%d/v%d/discover"
case "cn":
endpointFormat = "https://%s:%d/v%d/gz/discover"
default:
return nil, errors.New(fmt.Sprintf("unknown role '%s' in configuration", d.sdConfig.Role))
}
var endpoint = fmt.Sprintf(endpointFormat, d.sdConfig.Endpoint, d.sdConfig.Port, d.sdConfig.Version)
if len(d.sdConfig.Groups) > 0 {
groups := url.QueryEscape(strings.Join(d.sdConfig.Groups, ","))
endpoint = fmt.Sprintf("%s?groups=%s", endpoint, groups)
}
tg := &targetgroup.Group{
Source: endpoint,
}
req, err := http.NewRequest("GET", endpoint, nil)
if err != nil {
return nil, err
@ -168,8 +198,24 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
return nil, errors.Wrap(err, "an error occurred when reading the response body")
}
dr := discoveryResponse{}
err = json.Unmarshal(data, &dr)
// The JSON response body is different so it needs to be processed/mapped separately.
switch d.sdConfig.Role {
case "container":
return d.processContainerResponse(data, endpoint)
case "cn":
return d.processComputeNodeResponse(data, endpoint)
default:
return nil, errors.New(fmt.Sprintf("unknown role '%s' in configuration", d.sdConfig.Role))
}
}
func (d *Discovery) processContainerResponse(data []byte, endpoint string) ([]*targetgroup.Group, error) {
tg := &targetgroup.Group{
Source: endpoint,
}
dr := DiscoveryResponse{}
err := json.Unmarshal(data, &dr)
if err != nil {
return nil, errors.Wrap(err, "an error occurred unmarshaling the discovery response json")
}
@ -195,3 +241,28 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
return []*targetgroup.Group{tg}, nil
}
func (d *Discovery) processComputeNodeResponse(data []byte, endpoint string) ([]*targetgroup.Group, error) {
tg := &targetgroup.Group{
Source: endpoint,
}
dr := ComputeNodeDiscoveryResponse{}
err := json.Unmarshal(data, &dr)
if err != nil {
return nil, errors.Wrap(err, "an error occurred unmarshaling the compute node discovery response json")
}
for _, cn := range dr.ComputeNodes {
labels := model.LabelSet{
tritonLabelMachineID: model.LabelValue(cn.ServerUUID),
tritonLabelMachineAlias: model.LabelValue(cn.ServerHostname),
}
addr := fmt.Sprintf("%s.%s:%d", cn.ServerUUID, d.sdConfig.DNSSuffix, d.sdConfig.Port)
labels[model.AddressLabel] = model.LabelValue(addr)
tg.Targets = append(tg.Targets, labels)
}
return []*targetgroup.Group{tg}, nil
}

View file

@ -33,6 +33,7 @@ import (
var (
conf = SDConfig{
Account: "testAccount",
Role: "container",
DNSSuffix: "triton.example.com",
Endpoint: "127.0.0.1",
Port: 443,
@ -42,6 +43,7 @@ var (
}
badconf = SDConfig{
Account: "badTestAccount",
Role: "container",
DNSSuffix: "bad.triton.example.com",
Endpoint: "127.0.0.1",
Port: 443,
@ -56,6 +58,7 @@ var (
}
groupsconf = SDConfig{
Account: "testAccount",
Role: "container",
DNSSuffix: "triton.example.com",
Endpoint: "127.0.0.1",
Groups: []string{"foo", "bar"},
@ -64,6 +67,16 @@ var (
RefreshInterval: 1,
TLSConfig: config.TLSConfig{InsecureSkipVerify: true},
}
cnconf = SDConfig{
Account: "testAccount",
Role: "cn",
DNSSuffix: "triton.example.com",
Endpoint: "127.0.0.1",
Port: 443,
Version: 1,
RefreshInterval: 1,
TLSConfig: config.TLSConfig{InsecureSkipVerify: true},
}
)
func newTritonDiscovery(c SDConfig) (*Discovery, error) {
@ -103,8 +116,22 @@ func TestTritonSDNewGroupsConfig(t *testing.T) {
testutil.Equals(t, groupsconf.Port, td.sdConfig.Port)
}
func TestTritonSDNewCNConfig(t *testing.T) {
td, err := newTritonDiscovery(cnconf)
testutil.Ok(t, err)
testutil.Assert(t, td != nil, "")
testutil.Assert(t, td.client != nil, "")
testutil.Assert(t, td.interval != 0, "")
testutil.Assert(t, td.sdConfig != nil, "")
testutil.Equals(t, cnconf.Role, td.sdConfig.Role)
testutil.Equals(t, cnconf.Account, td.sdConfig.Account)
testutil.Equals(t, cnconf.DNSSuffix, td.sdConfig.DNSSuffix)
testutil.Equals(t, cnconf.Endpoint, td.sdConfig.Endpoint)
testutil.Equals(t, cnconf.Port, td.sdConfig.Port)
}
func TestTritonSDRefreshNoTargets(t *testing.T) {
tgts := testTritonSDRefresh(t, "{\"containers\":[]}")
tgts := testTritonSDRefresh(t, conf, "{\"containers\":[]}")
testutil.Assert(t, tgts == nil, "")
}
@ -129,7 +156,7 @@ func TestTritonSDRefreshMultipleTargets(t *testing.T) {
}`
)
tgts := testTritonSDRefresh(t, dstr)
tgts := testTritonSDRefresh(t, conf, dstr)
testutil.Assert(t, tgts != nil, "")
testutil.Equals(t, 2, len(tgts))
}
@ -156,9 +183,45 @@ func TestTritonSDRefreshCancelled(t *testing.T) {
testutil.Equals(t, strings.Contains(err.Error(), context.Canceled.Error()), true)
}
func testTritonSDRefresh(t *testing.T, dstr string) []model.LabelSet {
func TestTritonSDRefreshCNsUUIDOnly(t *testing.T) {
var (
td, _ = newTritonDiscovery(conf)
dstr = `{"cns":[
{
"server_uuid":"44454c4c-5000-104d-8037-b7c04f5a5131"
},
{
"server_uuid":"a5894692-bd32-4ca1-908a-e2dda3c3a5e6"
}]
}`
)
tgts := testTritonSDRefresh(t, cnconf, dstr)
testutil.Assert(t, tgts != nil, "")
testutil.Equals(t, 2, len(tgts))
}
func TestTritonSDRefreshCNsWithHostname(t *testing.T) {
var (
dstr = `{"cns":[
{
"server_uuid":"44454c4c-5000-104d-8037-b7c04f5a5131",
"server_hostname": "server01"
},
{
"server_uuid":"a5894692-bd32-4ca1-908a-e2dda3c3a5e6",
"server_hostname": "server02"
}]
}`
)
tgts := testTritonSDRefresh(t, cnconf, dstr)
testutil.Assert(t, tgts != nil, "")
testutil.Equals(t, 2, len(tgts))
}
func testTritonSDRefresh(t *testing.T, c SDConfig, dstr string) []model.LabelSet {
var (
td, _ = newTritonDiscovery(c)
s = httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, dstr)
}))

View file

@ -1020,37 +1020,60 @@ Serverset data must be in the JSON format, the Thrift format is not currently su
scrape targets from [Container Monitor](https://github.com/joyent/rfd/blob/master/rfd/0027/README.md)
discovery endpoints.
The following meta labels are available on targets during relabeling:
One of the following `<triton_role>` types can be configured to discover targets:
#### `container`
The `container` role discovers one target per "virtual machine" owned by the `account`.
These are SmartOS zones or lx/KVM/bhyve branded zones.
The following meta labels are available on targets during [relabeling](#relabel_config):
* `__meta_triton_groups`: the list of groups belonging to the target joined by a comma separator
* `__meta_triton_machine_alias`: the alias of the target container
* `__meta_triton_machine_brand`: the brand of the target container
* `__meta_triton_machine_id`: the UUID of the target container
* `__meta_triton_machine_image`: the target containers image type
* `__meta_triton_server_id`: the server UUID for the target container
* `__meta_triton_machine_image`: the target container's image type
* `__meta_triton_server_id`: the server UUID the target container is running on
#### `cn`
The `cn` role discovers one target for per compute node (also known as "server" or "global zone") making up the Triton infrastructure.
The `account` must be a Triton operator and is currently required to own at least one `container`.
The following meta labels are available on targets during [relabeling](#relabel_config):
* `__meta_triton_machine_alias`: the hostname of the target (requires triton-cmon 1.7.0 or newer)
* `__meta_triton_machine_id`: the UUID of the target
See below for the configuration options for Triton discovery:
```yaml
# The information to access the Triton discovery API.
# The account to use for discovering new target containers.
# The account to use for discovering new targets.
account: <string>
# The DNS suffix which should be applied to target containers.
# The type of targets to discover, can be set to:
# * "container" to discover virtual machines (SmartOS zones, lx/KVM/bhyve branded zones) running on Triton
# * "cn" to discover compute nodes (servers/global zones) making up the Triton infrastructure
[ role : <string> | default = "container" ]
# The DNS suffix which should be applied to target.
dns_suffix: <string>
# The Triton discovery endpoint (e.g. 'cmon.us-east-3b.triton.zone'). This is
# often the same value as dns_suffix.
endpoint: <string>
# A list of groups for which targets are retrieved. If omitted, all containers
# available to the requesting account are scraped.
# A list of groups for which targets are retrieved, only supported when `role` == `container`.
# If omitted all containers owned by the requesting account are scraped.
groups:
[ - <string> ... ]
# The port to use for discovery and metric scraping.
[ port: <int> | default = 9163 ]
# The interval which should be used for refreshing target containers.
# The interval which should be used for refreshing targets.
[ refresh_interval: <duration> | default = 60s ]
# The Triton discovery API version.