2015-09-26 08:36:40 -07:00
// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2021-10-03 04:35:24 -07:00
//go:build !nontp
2014-07-28 03:36:28 -07:00
// +build !nontp
package collector
import (
"fmt"
2017-09-19 01:36:14 -07:00
"net"
2018-07-22 05:36:33 -07:00
"sync"
2017-09-19 01:36:14 -07:00
"time"
2014-07-28 03:36:28 -07:00
"github.com/beevik/ntp"
2020-11-14 02:53:51 -08:00
"github.com/go-kit/log"
2014-07-28 03:36:28 -07:00
"github.com/prometheus/client_golang/prometheus"
2017-08-12 06:07:24 -07:00
"gopkg.in/alecthomas/kingpin.v2"
2014-07-28 03:36:28 -07:00
)
2017-09-19 01:36:14 -07:00
const (
2017-09-28 06:06:26 -07:00
hour24 = 24 * time . Hour // `time` does not export `Day` as Day != 24h because of DST
2017-09-19 01:36:14 -07:00
ntpSubsystem = "ntp"
)
2014-07-28 03:36:28 -07:00
var (
2017-09-19 01:36:14 -07:00
ntpServer = kingpin . Flag ( "collector.ntp.server" , "NTP server to use for ntp collector" ) . Default ( "127.0.0.1" ) . String ( )
2022-07-27 06:41:59 -07:00
ntpServerPort = kingpin . Flag ( "collector.ntp.server-port" , "UDP port number to connect to on NTP server" ) . Default ( "123" ) . Int ( )
2017-08-12 06:07:24 -07:00
ntpProtocolVersion = kingpin . Flag ( "collector.ntp.protocol-version" , "NTP protocol version" ) . Default ( "4" ) . Int ( )
2020-04-17 14:51:11 -07:00
ntpServerIsLocal = kingpin . Flag ( "collector.ntp.server-is-local" , "Certify that collector.ntp.server address is not a public ntp server" ) . Default ( "false" ) . Bool ( )
2017-09-19 01:36:14 -07:00
ntpIPTTL = kingpin . Flag ( "collector.ntp.ip-ttl" , "IP TTL to use while sending NTP query" ) . Default ( "1" ) . Int ( )
// 3.46608s ~ 1.5s + PHI * (1 << maxPoll), where 1.5s is MAXDIST from ntp.org, it is 1.0 in RFC5905
// max-distance option is used as-is without phi*(1<<poll)
ntpMaxDistance = kingpin . Flag ( "collector.ntp.max-distance" , "Max accumulated distance to the root" ) . Default ( "3.46608s" ) . Duration ( )
ntpOffsetTolerance = kingpin . Flag ( "collector.ntp.local-offset-tolerance" , "Offset between local clock and local ntpd time to tolerate" ) . Default ( "1ms" ) . Duration ( )
2018-07-22 05:36:33 -07:00
leapMidnight time . Time
leapMidnightMutex = & sync . Mutex { }
2014-07-28 03:36:28 -07:00
)
type ntpCollector struct {
2017-09-19 01:36:14 -07:00
stratum , leap , rtt , offset , reftime , rootDelay , rootDispersion , sanity typedDesc
2019-12-31 08:19:37 -08:00
logger log . Logger
2014-07-28 03:36:28 -07:00
}
func init ( ) {
2017-09-28 06:06:26 -07:00
registerCollector ( "ntp" , defaultDisabled , NewNtpCollector )
2014-07-28 03:36:28 -07:00
}
2017-09-19 01:36:14 -07:00
// NewNtpCollector returns a new Collector exposing sanity of local NTP server.
// Default definition of "local" is:
// - collector.ntp.server address is a loopback address (or collector.ntp.server-is-mine flag is turned on)
// - the server is reachable with outgoin IP_TTL = 1
2019-12-31 08:19:37 -08:00
func NewNtpCollector ( logger log . Logger ) ( Collector , error ) {
2017-09-19 01:36:14 -07:00
ipaddr := net . ParseIP ( * ntpServer )
if ! * ntpServerIsLocal && ( ipaddr == nil || ! ipaddr . IsLoopback ( ) ) {
2017-09-28 06:06:26 -07:00
return nil , fmt . Errorf ( "only IP address of local NTP server is valid for --collector.ntp.server" )
2014-07-28 03:36:28 -07:00
}
2017-09-19 01:36:14 -07:00
2015-11-10 01:07:30 -08:00
if * ntpProtocolVersion < 2 || * ntpProtocolVersion > 4 {
2015-11-13 07:09:11 -08:00
return nil , fmt . Errorf ( "invalid NTP protocol version %d; must be 2, 3, or 4" , * ntpProtocolVersion )
2015-11-10 01:07:30 -08:00
}
2014-07-28 03:36:28 -07:00
2017-09-19 01:36:14 -07:00
if * ntpOffsetTolerance < 0 {
2019-01-04 07:58:53 -08:00
return nil , fmt . Errorf ( "offset tolerance must be non-negative" )
2017-09-19 01:36:14 -07:00
}
2022-07-27 06:41:59 -07:00
if * ntpServerPort < 1 || * ntpServerPort > 65535 {
return nil , fmt . Errorf ( "invalid NTP port number %d; must be between 1 and 65535 inclusive" , * ntpServerPort )
}
2014-11-24 18:00:17 -08:00
return & ntpCollector {
2017-09-19 01:36:14 -07:00
stratum : typedDesc { prometheus . NewDesc (
2017-09-28 06:06:26 -07:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "stratum" ) ,
2017-09-19 01:36:14 -07:00
"NTPD stratum." ,
2016-12-28 06:21:31 -08:00
nil , nil ,
) , prometheus . GaugeValue } ,
2017-09-19 01:36:14 -07:00
leap : typedDesc { prometheus . NewDesc (
2017-09-28 06:06:26 -07:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "leap" ) ,
2017-09-19 01:36:14 -07:00
"NTPD leap second indicator, 2 bits." ,
nil , nil ,
) , prometheus . GaugeValue } ,
rtt : typedDesc { prometheus . NewDesc (
2017-09-28 06:06:26 -07:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "rtt_seconds" ) ,
2017-09-19 01:36:14 -07:00
"RTT to NTPD." ,
nil , nil ,
) , prometheus . GaugeValue } ,
offset : typedDesc { prometheus . NewDesc (
2017-09-28 06:06:26 -07:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "offset_seconds" ) ,
2017-09-19 01:36:14 -07:00
"ClockOffset between NTP and local clock." ,
nil , nil ,
) , prometheus . GaugeValue } ,
reftime : typedDesc { prometheus . NewDesc (
2017-09-28 06:06:26 -07:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "reference_timestamp_seconds" ) ,
2017-09-19 01:36:14 -07:00
"NTPD ReferenceTime, UNIX timestamp." ,
nil , nil ,
) , prometheus . GaugeValue } ,
rootDelay : typedDesc { prometheus . NewDesc (
2017-09-28 06:06:26 -07:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "root_delay_seconds" ) ,
2017-09-19 01:36:14 -07:00
"NTPD RootDelay." ,
nil , nil ,
) , prometheus . GaugeValue } ,
rootDispersion : typedDesc { prometheus . NewDesc (
2017-09-28 06:06:26 -07:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "root_dispersion_seconds" ) ,
2017-09-19 01:36:14 -07:00
"NTPD RootDispersion." ,
nil , nil ,
) , prometheus . GaugeValue } ,
sanity : typedDesc { prometheus . NewDesc (
2017-09-28 06:06:26 -07:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "sanity" ) ,
2017-09-19 01:36:14 -07:00
"NTPD sanity according to RFC5905 heuristics and configured limits." ,
2016-12-28 06:21:31 -08:00
nil , nil ,
) , prometheus . GaugeValue } ,
2019-12-31 08:19:37 -08:00
logger : logger ,
2014-11-24 18:00:17 -08:00
} , nil
2014-07-28 03:36:28 -07:00
}
2017-02-28 10:47:20 -08:00
func ( c * ntpCollector ) Update ( ch chan <- prometheus . Metric ) error {
2017-09-19 01:36:14 -07:00
resp , err := ntp . QueryWithOptions ( * ntpServer , ntp . QueryOptions {
Version : * ntpProtocolVersion ,
TTL : * ntpIPTTL ,
Timeout : time . Second , // default `ntpdate` timeout
2022-07-27 06:41:59 -07:00
Port : * ntpServerPort ,
2017-09-19 01:36:14 -07:00
} )
2014-07-28 03:36:28 -07:00
if err != nil {
2020-06-15 13:27:14 -07:00
return fmt . Errorf ( "couldn't get SNTP reply: %w" , err )
2017-09-19 01:36:14 -07:00
}
ch <- c . stratum . mustNewConstMetric ( float64 ( resp . Stratum ) )
ch <- c . leap . mustNewConstMetric ( float64 ( resp . Leap ) )
ch <- c . rtt . mustNewConstMetric ( resp . RTT . Seconds ( ) )
ch <- c . offset . mustNewConstMetric ( resp . ClockOffset . Seconds ( ) )
if resp . ReferenceTime . Unix ( ) > 0 {
// Go Zero is 0001-01-01 00:00:00 UTC
// NTP Zero is 1900-01-01 00:00:00 UTC
// UNIX Zero is 1970-01-01 00:00:00 UTC
// so let's keep ALL ancient `reftime` values as zero
ch <- c . reftime . mustNewConstMetric ( float64 ( resp . ReferenceTime . UnixNano ( ) ) / 1e9 )
} else {
ch <- c . reftime . mustNewConstMetric ( 0 )
}
ch <- c . rootDelay . mustNewConstMetric ( resp . RootDelay . Seconds ( ) )
ch <- c . rootDispersion . mustNewConstMetric ( resp . RootDispersion . Seconds ( ) )
// Here is SNTP packet sanity check that is exposed to move burden of
// configuration from node_exporter user to the developer.
maxerr := * ntpOffsetTolerance
2018-07-22 05:36:33 -07:00
leapMidnightMutex . Lock ( )
2017-09-19 01:36:14 -07:00
if resp . Leap == ntp . LeapAddSecond || resp . Leap == ntp . LeapDelSecond {
// state of leapMidnight is cached as leap flag is dropped right after midnight
leapMidnight = resp . Time . Truncate ( hour24 ) . Add ( hour24 )
}
if leapMidnight . Add ( - hour24 ) . Before ( resp . Time ) && resp . Time . Before ( leapMidnight . Add ( hour24 ) ) {
// tolerate leap smearing
maxerr += time . Second
}
2018-07-22 05:36:33 -07:00
leapMidnightMutex . Unlock ( )
2017-09-19 01:36:14 -07:00
2017-10-03 23:33:49 -07:00
if resp . Validate ( ) == nil && resp . RootDistance <= * ntpMaxDistance && resp . MinError <= maxerr {
2017-09-19 01:36:14 -07:00
ch <- c . sanity . mustNewConstMetric ( 1 )
} else {
ch <- c . sanity . mustNewConstMetric ( 0 )
2014-07-28 03:36:28 -07:00
}
2016-06-03 03:25:30 -07:00
return nil
2014-07-28 03:36:28 -07:00
}