Kubernetes SD: Refactor to handle missing Kubernetes events

This commit is contained in:
Jimmi Dyson 2016-01-15 12:36:55 +00:00
parent 308d447cd7
commit 9faa7515c6

View file

@ -70,16 +70,13 @@ type Discovery struct {
client *http.Client client *http.Client
Conf *config.KubernetesSDConfig Conf *config.KubernetesSDConfig
apiServers []config.URL apiServers []config.URL
apiServersMu sync.RWMutex apiServersMu sync.RWMutex
nodesResourceVersion string nodes map[string]*Node
servicesResourceVersion string services map[string]map[string]*Service
endpointsResourceVersion string nodesMu sync.RWMutex
nodes map[string]*Node servicesMu sync.RWMutex
services map[string]map[string]*Service runDone chan struct{}
nodesMu sync.RWMutex
servicesMu sync.RWMutex
runDone chan struct{}
} }
// Initialize sets up the discovery for usage. // Initialize sets up the discovery for usage.
@ -92,8 +89,6 @@ func (kd *Discovery) Initialize() error {
kd.apiServers = kd.Conf.APIServers kd.apiServers = kd.Conf.APIServers
kd.client = client kd.client = client
kd.nodes = map[string]*Node{}
kd.services = map[string]map[string]*Service{}
kd.runDone = make(chan struct{}) kd.runDone = make(chan struct{})
return nil return nil
@ -106,67 +101,42 @@ func (kd *Discovery) Sources() []string {
sourceNames = append(sourceNames, apiServersTargetGroupName+":"+apiServer.Host) sourceNames = append(sourceNames, apiServersTargetGroupName+":"+apiServer.Host)
} }
res, err := kd.queryAPIServerPath(nodesURL) nodes, _, err := kd.getNodes()
if err != nil { if err != nil {
// If we can't list nodes then we can't watch them. Assume this is a misconfiguration // If we can't list nodes then we can't watch them. Assume this is a misconfiguration
// & log & return empty. // & log & return empty.
log.Errorf("Unable to list Kubernetes nodes: %s", err) log.Errorf("Unable to initialize Kubernetes nodes: %s", err)
return []string{}
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
log.Errorf("Unable to list Kubernetes nodes. Unexpected response: %d %s", res.StatusCode, res.Status)
return []string{} return []string{}
} }
sourceNames = append(sourceNames, kd.nodeSources(nodes)...)
var nodes NodeList services, _, err := kd.getServices()
if err := json.NewDecoder(res.Body).Decode(&nodes); err != nil {
body, _ := ioutil.ReadAll(res.Body)
log.Errorf("Unable to list Kubernetes nodes. Unexpected response body: %s", string(body))
return []string{}
}
kd.nodesMu.Lock()
defer kd.nodesMu.Unlock()
kd.nodesResourceVersion = nodes.ResourceVersion
for idx, node := range nodes.Items {
sourceNames = append(sourceNames, nodesTargetGroupName+":"+node.ObjectMeta.Name)
kd.nodes[node.ObjectMeta.Name] = &nodes.Items[idx]
}
res, err = kd.queryAPIServerPath(servicesURL)
if err != nil { if err != nil {
// If we can't list services then we can't watch them. Assume this is a misconfiguration // If we can't list services then we can't watch them. Assume this is a misconfiguration
// & log & return empty. // & log & return empty.
log.Errorf("Unable to list Kubernetes services: %s", err) log.Errorf("Unable to initialize Kubernetes services: %s", err)
return []string{} return []string{}
} }
defer res.Body.Close() sourceNames = append(sourceNames, kd.serviceSources(services)...)
if res.StatusCode != http.StatusOK {
log.Errorf("Unable to list Kubernetes services. Unexpected response: %d %s", res.StatusCode, res.Status)
return []string{}
}
var services ServiceList
if err := json.NewDecoder(res.Body).Decode(&services); err != nil {
body, _ := ioutil.ReadAll(res.Body)
log.Errorf("Unable to list Kubernetes services. Unexpected response body: %s", string(body))
return []string{}
}
kd.servicesMu.Lock()
defer kd.servicesMu.Unlock()
kd.servicesResourceVersion = services.ResourceVersion return sourceNames
for idx, service := range services.Items { }
sourceNames = append(sourceNames, serviceSource(&service))
namespace, ok := kd.services[service.ObjectMeta.Namespace] func (kd *Discovery) nodeSources(nodes map[string]*Node) []string {
if !ok { var sourceNames []string
namespace = map[string]*Service{} for name := range nodes {
kd.services[service.ObjectMeta.Namespace] = namespace sourceNames = append(sourceNames, nodesTargetGroupName+":"+name)
}
return sourceNames
}
func (kd *Discovery) serviceSources(services map[string]map[string]*Service) []string {
var sourceNames []string
for _, ns := range services {
for _, service := range ns {
sourceNames = append(sourceNames, serviceSource(service))
} }
namespace[service.ObjectMeta.Name] = &services.Items[idx]
} }
return sourceNames return sourceNames
} }
@ -182,37 +152,12 @@ func (kd *Discovery) Run(ch chan<- config.TargetGroup, done <-chan struct{}) {
} }
} }
if tg := kd.updateNodesTargetGroup(); tg != nil {
select {
case ch <- *tg:
case <-done:
return
}
}
for _, ns := range kd.services {
for _, service := range ns {
tg := kd.addService(service)
if tg == nil {
continue
}
select {
case ch <- *tg:
case <-done:
return
}
}
}
retryInterval := time.Duration(kd.Conf.RetryInterval) retryInterval := time.Duration(kd.Conf.RetryInterval)
update := make(chan interface{}, 10) update := make(chan interface{}, 10)
go kd.watchNodes(update, done, retryInterval) go kd.watchNodes(update, done, retryInterval)
go kd.watchServices(update, done, retryInterval) go kd.startServiceWatch(update, done, retryInterval)
go kd.watchServiceEndpoints(update, done, retryInterval)
var tg *config.TargetGroup var tg *config.TargetGroup
for { for {
@ -308,8 +253,8 @@ func (kd *Discovery) updateAPIServersTargetGroup() *config.TargetGroup {
} }
func (kd *Discovery) updateNodesTargetGroup() *config.TargetGroup { func (kd *Discovery) updateNodesTargetGroup() *config.TargetGroup {
kd.nodesMu.Lock() kd.nodesMu.RLock()
defer kd.nodesMu.Unlock() defer kd.nodesMu.RUnlock()
tg := &config.TargetGroup{ tg := &config.TargetGroup{
Source: nodesTargetGroupName, Source: nodesTargetGroupName,
@ -350,17 +295,86 @@ func (kd *Discovery) updateNode(node *Node, eventType EventType) {
} }
} }
func (kd *Discovery) getNodes() (map[string]*Node, string, error) {
res, err := kd.queryAPIServerPath(nodesURL)
if err != nil {
// If we can't list nodes then we can't watch them. Assume this is a misconfiguration
// & return error.
return nil, "", fmt.Errorf("Unable to list Kubernetes nodes: %s", err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, "", fmt.Errorf("Unable to list Kubernetes nodes. Unexpected response: %d %s", res.StatusCode, res.Status)
}
var nodes NodeList
if err := json.NewDecoder(res.Body).Decode(&nodes); err != nil {
body, _ := ioutil.ReadAll(res.Body)
return nil, "", fmt.Errorf("Unable to list Kubernetes nodes. Unexpected response body: %s", string(body))
}
nodeMap := map[string]*Node{}
for idx, node := range nodes.Items {
nodeMap[node.ObjectMeta.Name] = &nodes.Items[idx]
}
return nodeMap, nodes.ResourceVersion, nil
}
func (kd *Discovery) getServices() (map[string]map[string]*Service, string, error) {
res, err := kd.queryAPIServerPath(servicesURL)
if err != nil {
// If we can't list services then we can't watch them. Assume this is a misconfiguration
// & return error.
return nil, "", fmt.Errorf("Unable to list Kubernetes services: %s", err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, "", fmt.Errorf("Unable to list Kubernetes services. Unexpected response: %d %s", res.StatusCode, res.Status)
}
var services ServiceList
if err := json.NewDecoder(res.Body).Decode(&services); err != nil {
body, _ := ioutil.ReadAll(res.Body)
return nil, "", fmt.Errorf("Unable to list Kubernetes services. Unexpected response body: %s", string(body))
}
serviceMap := map[string]map[string]*Service{}
for idx, service := range services.Items {
namespace, ok := serviceMap[service.ObjectMeta.Namespace]
if !ok {
namespace = map[string]*Service{}
serviceMap[service.ObjectMeta.Namespace] = namespace
}
namespace[service.ObjectMeta.Name] = &services.Items[idx]
}
return serviceMap, services.ResourceVersion, nil
}
// watchNodes watches nodes as they come & go. // watchNodes watches nodes as they come & go.
func (kd *Discovery) watchNodes(events chan interface{}, done <-chan struct{}, retryInterval time.Duration) { func (kd *Discovery) watchNodes(events chan interface{}, done <-chan struct{}, retryInterval time.Duration) {
until(func() { until(func() {
nodes, resourceVersion, err := kd.getNodes()
if err != nil {
log.Errorf("Cannot initialize nodes collection: %s", err)
return
}
// Reset the known nodes.
kd.nodes = map[string]*Node{}
for _, node := range nodes {
events <- &nodeEvent{added, node}
}
req, err := http.NewRequest("GET", nodesURL, nil) req, err := http.NewRequest("GET", nodesURL, nil)
if err != nil { if err != nil {
log.Errorf("Failed to watch nodes: %s", err) log.Errorf("Cannot create nodes request: %s", err)
return return
} }
values := req.URL.Query() values := req.URL.Query()
values.Add("watch", "true") values.Add("watch", "true")
values.Add("resourceVersion", kd.nodesResourceVersion) values.Add("resourceVersion", resourceVersion)
req.URL.RawQuery = values.Encode() req.URL.RawQuery = values.Encode()
res, err := kd.queryAPIServerReq(req) res, err := kd.queryAPIServerReq(req)
if err != nil { if err != nil {
@ -378,10 +392,9 @@ func (kd *Discovery) watchNodes(events chan interface{}, done <-chan struct{}, r
for { for {
var event nodeEvent var event nodeEvent
if err := d.Decode(&event); err != nil { if err := d.Decode(&event); err != nil {
log.Errorf("Failed to watch nodes: %s", err) log.Errorf("Watch nodes unexpectedly closed: %s", err)
return return
} }
kd.nodesResourceVersion = event.Node.ObjectMeta.ResourceVersion
select { select {
case events <- &event: case events <- &event:
@ -392,62 +405,146 @@ func (kd *Discovery) watchNodes(events chan interface{}, done <-chan struct{}, r
} }
// watchServices watches services as they come & go. // watchServices watches services as they come & go.
func (kd *Discovery) watchServices(events chan interface{}, done <-chan struct{}, retryInterval time.Duration) { func (kd *Discovery) startServiceWatch(events chan<- interface{}, done <-chan struct{}, retryInterval time.Duration) {
until(func() { until(func() {
req, err := http.NewRequest("GET", servicesURL, nil) // We use separate target groups for each discovered service so we'll need to clean up any if they've been deleted
// in Kubernetes while we couldn't connect - small chance of this, but worth dealing with.
existingServices := kd.services
// Reset the known services.
kd.services = map[string]map[string]*Service{}
services, resourceVersion, err := kd.getServices()
if err != nil { if err != nil {
log.Errorf("Failed to watch services: %s", err) log.Errorf("Cannot initialize services collection: %s", err)
return
}
values := req.URL.Query()
values.Add("watch", "true")
values.Add("resourceVersion", kd.servicesResourceVersion)
req.URL.RawQuery = values.Encode()
res, err := kd.queryAPIServerReq(req)
if err != nil {
log.Errorf("Failed to watch services: %s", err)
return
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
log.Errorf("Failed to watch services: %d", res.StatusCode)
return return
} }
d := json.NewDecoder(res.Body) // Now let's loop through the old services & see if they still exist in here
for oldNSName, oldNS := range existingServices {
for { if ns, ok := services[oldNSName]; !ok {
var event serviceEvent for _, service := range existingServices[oldNSName] {
if err := d.Decode(&event); err != nil { events <- &serviceEvent{deleted, service}
log.Errorf("Unable to watch services: %s", err) }
return } else {
} for oldServiceName, oldService := range oldNS {
kd.servicesResourceVersion = event.Service.ObjectMeta.ResourceVersion if _, ok := ns[oldServiceName]; !ok {
events <- &serviceEvent{deleted, oldService}
select { }
case events <- &event: }
case <-done:
} }
} }
// Discard the existing services map for GC.
existingServices = nil
for _, ns := range services {
for _, service := range ns {
events <- &serviceEvent{added, service}
}
}
var wg sync.WaitGroup
wg.Add(2)
go func() {
kd.watchServices(resourceVersion, events, done)
wg.Done()
}()
go func() {
kd.watchServiceEndpoints(resourceVersion, events, done)
wg.Done()
}()
wg.Wait()
}, retryInterval, done) }, retryInterval, done)
} }
func (kd *Discovery) watchServices(resourceVersion string, events chan<- interface{}, done <-chan struct{}) {
req, err := http.NewRequest("GET", servicesURL, nil)
if err != nil {
log.Errorf("Failed to create services request: %s", err)
return
}
values := req.URL.Query()
values.Add("watch", "true")
values.Add("resourceVersion", resourceVersion)
req.URL.RawQuery = values.Encode()
res, err := kd.queryAPIServerReq(req)
if err != nil {
log.Errorf("Failed to watch services: %s", err)
return
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
log.Errorf("Failed to watch services: %d", res.StatusCode)
return
}
d := json.NewDecoder(res.Body)
for {
var event serviceEvent
if err := d.Decode(&event); err != nil {
log.Errorf("Watch services unexpectedly closed: %s", err)
return
}
select {
case events <- &event:
case <-done:
return
}
}
}
// watchServiceEndpoints watches service endpoints as they come & go.
func (kd *Discovery) watchServiceEndpoints(resourceVersion string, events chan<- interface{}, done <-chan struct{}) {
req, err := http.NewRequest("GET", endpointsURL, nil)
if err != nil {
log.Errorf("Failed to create service endpoints request: %s", err)
return
}
values := req.URL.Query()
values.Add("watch", "true")
values.Add("resourceVersion", resourceVersion)
req.URL.RawQuery = values.Encode()
res, err := kd.queryAPIServerReq(req)
if err != nil {
log.Errorf("Failed to watch service endpoints: %s", err)
return
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
log.Errorf("Failed to watch service endpoints: %d", res.StatusCode)
return
}
d := json.NewDecoder(res.Body)
for {
var event endpointsEvent
if err := d.Decode(&event); err != nil {
log.Errorf("Watch service endpoints unexpectedly closed: %s", err)
return
}
select {
case events <- &event:
case <-done:
}
}
}
func (kd *Discovery) updateService(service *Service, eventType EventType) *config.TargetGroup { func (kd *Discovery) updateService(service *Service, eventType EventType) *config.TargetGroup {
kd.servicesMu.Lock() kd.servicesMu.Lock()
defer kd.servicesMu.Unlock() defer kd.servicesMu.Unlock()
var (
name = service.ObjectMeta.Name
namespace = service.ObjectMeta.Namespace
_, exists = kd.services[namespace][name]
)
switch eventType { switch eventType {
case deleted: case deleted:
if exists { return kd.deleteService(service)
return kd.deleteService(service)
}
case added, modified: case added, modified:
return kd.addService(service) return kd.addService(service)
} }
@ -553,48 +650,6 @@ func (kd *Discovery) updateServiceTargetGroup(service *Service, eps *Endpoints)
return tg return tg
} }
// watchServiceEndpoints watches service endpoints as they come & go.
func (kd *Discovery) watchServiceEndpoints(events chan interface{}, done <-chan struct{}, retryInterval time.Duration) {
until(func() {
req, err := http.NewRequest("GET", endpointsURL, nil)
if err != nil {
log.Errorf("Failed to watch service endpoints: %s", err)
return
}
values := req.URL.Query()
values.Add("watch", "true")
values.Add("resourceVersion", kd.servicesResourceVersion)
req.URL.RawQuery = values.Encode()
res, err := kd.queryAPIServerReq(req)
if err != nil {
log.Errorf("Failed to watch service endpoints: %s", err)
return
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
log.Errorf("Failed to watch service endpoints: %d", res.StatusCode)
return
}
d := json.NewDecoder(res.Body)
for {
var event endpointsEvent
if err := d.Decode(&event); err != nil {
log.Errorf("Unable to watch service endpoints: %s", err)
return
}
kd.servicesResourceVersion = event.Endpoints.ObjectMeta.ResourceVersion
select {
case events <- &event:
case <-done:
}
}
}, retryInterval, done)
}
func (kd *Discovery) updateServiceEndpoints(endpoints *Endpoints, eventType EventType) *config.TargetGroup { func (kd *Discovery) updateServiceEndpoints(endpoints *Endpoints, eventType EventType) *config.TargetGroup {
kd.servicesMu.Lock() kd.servicesMu.Lock()
defer kd.servicesMu.Unlock() defer kd.servicesMu.Unlock()