// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !nontp
// +build !nontp

package collector

import (
	"fmt"
	"net"
	"sync"
	"time"

	"github.com/beevik/ntp"
	"github.com/go-kit/log"
	"github.com/prometheus/client_golang/prometheus"
	"gopkg.in/alecthomas/kingpin.v2"
)

const (
	hour24       = 24 * time.Hour // `time` does not export `Day` as Day != 24h because of DST
	ntpSubsystem = "ntp"
)

var (
	ntpServer          = kingpin.Flag("collector.ntp.server", "NTP server to use for ntp collector").Default("127.0.0.1").String()
	ntpProtocolVersion = kingpin.Flag("collector.ntp.protocol-version", "NTP protocol version").Default("4").Int()
	ntpServerIsLocal   = kingpin.Flag("collector.ntp.server-is-local", "Certify that collector.ntp.server address is not a public ntp server").Default("false").Bool()
	ntpIPTTL           = kingpin.Flag("collector.ntp.ip-ttl", "IP TTL to use while sending NTP query").Default("1").Int()
	// 3.46608s ~ 1.5s + PHI * (1 << maxPoll), where 1.5s is MAXDIST from ntp.org, it is 1.0 in RFC5905
	// max-distance option is used as-is without phi*(1<<poll)
	ntpMaxDistance     = kingpin.Flag("collector.ntp.max-distance", "Max accumulated distance to the root").Default("3.46608s").Duration()
	ntpOffsetTolerance = kingpin.Flag("collector.ntp.local-offset-tolerance", "Offset between local clock and local ntpd time to tolerate").Default("1ms").Duration()

	leapMidnight      time.Time
	leapMidnightMutex = &sync.Mutex{}
)

type ntpCollector struct {
	stratum, leap, rtt, offset, reftime, rootDelay, rootDispersion, sanity typedDesc
	logger                                                                 log.Logger
}

func init() {
	registerCollector("ntp", defaultDisabled, NewNtpCollector)
}

// NewNtpCollector returns a new Collector exposing sanity of local NTP server.
// Default definition of "local" is:
// - collector.ntp.server address is a loopback address (or collector.ntp.server-is-mine flag is turned on)
// - the server is reachable with outgoin IP_TTL = 1
func NewNtpCollector(logger log.Logger) (Collector, error) {
	ipaddr := net.ParseIP(*ntpServer)
	if !*ntpServerIsLocal && (ipaddr == nil || !ipaddr.IsLoopback()) {
		return nil, fmt.Errorf("only IP address of local NTP server is valid for --collector.ntp.server")
	}

	if *ntpProtocolVersion < 2 || *ntpProtocolVersion > 4 {
		return nil, fmt.Errorf("invalid NTP protocol version %d; must be 2, 3, or 4", *ntpProtocolVersion)
	}

	if *ntpOffsetTolerance < 0 {
		return nil, fmt.Errorf("offset tolerance must be non-negative")
	}

	return &ntpCollector{
		stratum: typedDesc{prometheus.NewDesc(
			prometheus.BuildFQName(namespace, ntpSubsystem, "stratum"),
			"NTPD stratum.",
			nil, nil,
		), prometheus.GaugeValue},
		leap: typedDesc{prometheus.NewDesc(
			prometheus.BuildFQName(namespace, ntpSubsystem, "leap"),
			"NTPD leap second indicator, 2 bits.",
			nil, nil,
		), prometheus.GaugeValue},
		rtt: typedDesc{prometheus.NewDesc(
			prometheus.BuildFQName(namespace, ntpSubsystem, "rtt_seconds"),
			"RTT to NTPD.",
			nil, nil,
		), prometheus.GaugeValue},
		offset: typedDesc{prometheus.NewDesc(
			prometheus.BuildFQName(namespace, ntpSubsystem, "offset_seconds"),
			"ClockOffset between NTP and local clock.",
			nil, nil,
		), prometheus.GaugeValue},
		reftime: typedDesc{prometheus.NewDesc(
			prometheus.BuildFQName(namespace, ntpSubsystem, "reference_timestamp_seconds"),
			"NTPD ReferenceTime, UNIX timestamp.",
			nil, nil,
		), prometheus.GaugeValue},
		rootDelay: typedDesc{prometheus.NewDesc(
			prometheus.BuildFQName(namespace, ntpSubsystem, "root_delay_seconds"),
			"NTPD RootDelay.",
			nil, nil,
		), prometheus.GaugeValue},
		rootDispersion: typedDesc{prometheus.NewDesc(
			prometheus.BuildFQName(namespace, ntpSubsystem, "root_dispersion_seconds"),
			"NTPD RootDispersion.",
			nil, nil,
		), prometheus.GaugeValue},
		sanity: typedDesc{prometheus.NewDesc(
			prometheus.BuildFQName(namespace, ntpSubsystem, "sanity"),
			"NTPD sanity according to RFC5905 heuristics and configured limits.",
			nil, nil,
		), prometheus.GaugeValue},
		logger: logger,
	}, nil
}

func (c *ntpCollector) Update(ch chan<- prometheus.Metric) error {
	resp, err := ntp.QueryWithOptions(*ntpServer, ntp.QueryOptions{
		Version: *ntpProtocolVersion,
		TTL:     *ntpIPTTL,
		Timeout: time.Second, // default `ntpdate` timeout
	})
	if err != nil {
		return fmt.Errorf("couldn't get SNTP reply: %w", err)
	}

	ch <- c.stratum.mustNewConstMetric(float64(resp.Stratum))
	ch <- c.leap.mustNewConstMetric(float64(resp.Leap))
	ch <- c.rtt.mustNewConstMetric(resp.RTT.Seconds())
	ch <- c.offset.mustNewConstMetric(resp.ClockOffset.Seconds())
	if resp.ReferenceTime.Unix() > 0 {
		// Go Zero is   0001-01-01 00:00:00 UTC
		// NTP Zero is  1900-01-01 00:00:00 UTC
		// UNIX Zero is 1970-01-01 00:00:00 UTC
		// so let's keep ALL ancient `reftime` values as zero
		ch <- c.reftime.mustNewConstMetric(float64(resp.ReferenceTime.UnixNano()) / 1e9)
	} else {
		ch <- c.reftime.mustNewConstMetric(0)
	}
	ch <- c.rootDelay.mustNewConstMetric(resp.RootDelay.Seconds())
	ch <- c.rootDispersion.mustNewConstMetric(resp.RootDispersion.Seconds())

	// Here is SNTP packet sanity check that is exposed to move burden of
	// configuration from node_exporter user to the developer.

	maxerr := *ntpOffsetTolerance
	leapMidnightMutex.Lock()
	if resp.Leap == ntp.LeapAddSecond || resp.Leap == ntp.LeapDelSecond {
		// state of leapMidnight is cached as leap flag is dropped right after midnight
		leapMidnight = resp.Time.Truncate(hour24).Add(hour24)
	}
	if leapMidnight.Add(-hour24).Before(resp.Time) && resp.Time.Before(leapMidnight.Add(hour24)) {
		// tolerate leap smearing
		maxerr += time.Second
	}
	leapMidnightMutex.Unlock()

	if resp.Validate() == nil && resp.RootDistance <= *ntpMaxDistance && resp.MinError <= maxerr {
		ch <- c.sanity.mustNewConstMetric(1)
	} else {
		ch <- c.sanity.mustNewConstMetric(0)
	}

	return nil
}