diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 30514699..01b943b9 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -26,7 +26,7 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install Go - uses: actions/setup-go@41dfa10bad2bb2ae585af6ee5bb4d7d973ad74ed # v5.1.0 + uses: actions/setup-go@3041bf56c941b39c61721a86cd11f3bb1338122a # v5.2.0 with: go-version: 1.23.x - name: Install snmp_exporter/generator dependencies @@ -36,4 +36,4 @@ jobs: uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1 with: args: --verbose - version: v1.61.0 + version: v1.62.0 diff --git a/.golangci.yml b/.golangci.yml index 1f1b0f63..8731aefd 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,17 +1,9 @@ linters: enable: - depguard + - goimports - misspell - revive - disable: - # Disable soon to deprecated[1] linters that lead to false - # positives when build tags disable certain files[2] - # 1: https://github.com/golangci/golangci-lint/issues/1841 - # 2: https://github.com/prometheus/node_exporter/issues/1545 - - deadcode - - unused - - structcheck - - varcheck issues: exclude-rules: diff --git a/Makefile.common b/Makefile.common index 09e5bff8..fc47bdbb 100644 --- a/Makefile.common +++ b/Makefile.common @@ -61,7 +61,7 @@ PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_ SKIP_GOLANGCI_LINT := GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= -GOLANGCI_LINT_VERSION ?= v1.61.0 +GOLANGCI_LINT_VERSION ?= v1.62.0 # golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) diff --git a/README.md b/README.md index 44a9d2b1..ff5d27ea 100644 --- a/README.md +++ b/README.md @@ -99,8 +99,8 @@ cpu | flags | --collector.cpu.info.flags-include | N/A diskstats | device | --collector.diskstats.device-include | --collector.diskstats.device-exclude ethtool | device | --collector.ethtool.device-include | --collector.ethtool.device-exclude ethtool | metrics | --collector.ethtool.metrics-include | N/A -filesystem | fs-types | N/A | --collector.filesystem.fs-types-exclude -filesystem | mount-points | N/A | --collector.filesystem.mount-points-exclude +filesystem | fs-types | --collector.filesystem.fs-types-include | --collector.filesystem.fs-types-exclude +filesystem | mount-points | --collector.filesystem.mount-points-include | --collector.filesystem.mount-points-exclude hwmon | chip | --collector.hwmon.chip-include | --collector.hwmon.chip-exclude hwmon | sensor | --collector.hwmon.sensor-include | --collector.hwmon.sensor-exclude interrupts | name | --collector.interrupts.name-include | --collector.interrupts.name-exclude diff --git a/collector/arp_linux.go b/collector/arp_linux.go index 29fa22ba..dfa3b431 100644 --- a/collector/arp_linux.go +++ b/collector/arp_linux.go @@ -17,13 +17,11 @@ package collector import ( - "errors" "fmt" "log/slog" - "net" "github.com/alecthomas/kingpin/v2" - "github.com/jsimonetti/rtnetlink/v2" + "github.com/jsimonetti/rtnetlink/v2/rtnl" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs" "golang.org/x/sys/unix" @@ -97,53 +95,41 @@ func getTotalArpEntries(deviceEntries []procfs.ARPEntry) map[string]uint32 { } func getArpEntriesRTNL() (map[string]uint32, map[string]map[string]int, error) { - conn, err := rtnetlink.Dial(nil) + conn, err := rtnl.Dial(nil) if err != nil { return nil, nil, err } defer conn.Close() - neighbors, err := conn.Neigh.List() + // Neighbors will also contain IPv6 neighbors, but since this is purely an ARP collector, + // restrict to AF_INET. + neighbors, err := conn.Neighbours(nil, unix.AF_INET) if err != nil { return nil, nil, err } - ifIndexEntries := make(map[uint32]uint32) - ifIndexStates := make(map[uint32]map[string]int) + // Map of interface name to ARP neighbor count. + entries := make(map[string]uint32) + // Map of map[InterfaceName]map[StateName]int + states := make(map[string]map[string]int) for _, n := range neighbors { - // Neighbors will also contain IPv6 neighbors, but since this is purely an ARP collector, - // restrict to AF_INET. Also skip entries which have state NUD_NOARP to conform to output - // of /proc/net/arp. - if n.Family == unix.AF_INET && n.State&unix.NUD_NOARP == 0 { - ifIndexEntries[n.Index]++ - - _, ok := ifIndexStates[n.Index] - if !ok { - ifIndexStates[n.Index] = make(map[string]int) - } - ifIndexStates[n.Index][neighborStatesMap[n.State]]++ - } - } - - enumEntries := make(map[string]uint32) - enumStates := make(map[string]map[string]int) - - // Convert interface indexes to names. - for ifIndex, entryCount := range ifIndexEntries { - iface, err := net.InterfaceByIndex(int(ifIndex)) - if err != nil { - if errors.Unwrap(err).Error() == "no such network interface" { - continue - } - return nil, nil, err + // Skip entries which have state NUD_NOARP to conform to output of /proc/net/arp. + if n.State&unix.NUD_NOARP != unix.NUD_NOARP { + continue } - enumEntries[iface.Name] = entryCount - enumStates[iface.Name] = ifIndexStates[ifIndex] + entries[n.Interface.Name]++ + + _, ok := states[n.Interface.Name] + if !ok { + states[n.Interface.Name] = make(map[string]int) + } + + states[n.Interface.Name][neighborStatesMap[n.State]]++ } - return enumEntries, enumStates, nil + return entries, states, nil } func (c *arpCollector) Update(ch chan<- prometheus.Metric) error { diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index 1ee7b94d..8ca70365 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -17,6 +17,7 @@ package collector import ( + "errors" "fmt" "log/slog" "os" @@ -26,15 +27,17 @@ import ( "strconv" "sync" + "golang.org/x/exp/maps" + "github.com/alecthomas/kingpin/v2" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs" "github.com/prometheus/procfs/sysfs" - "golang.org/x/exp/maps" ) type cpuCollector struct { - fs procfs.FS + procfs procfs.FS + sysfs sysfs.FS cpu *prometheus.Desc cpuInfo *prometheus.Desc cpuFrequencyHz *prometheus.Desc @@ -45,6 +48,7 @@ type cpuCollector struct { cpuPackageThrottle *prometheus.Desc cpuIsolated *prometheus.Desc logger *slog.Logger + cpuOnline *prometheus.Desc cpuStats map[int64]procfs.CPUStat cpuStatsMutex sync.Mutex isolatedCpus []uint16 @@ -70,17 +74,17 @@ func init() { // NewCPUCollector returns a new Collector exposing kernel/system statistics. func NewCPUCollector(logger *slog.Logger) (Collector, error) { - fs, err := procfs.NewFS(*procPath) + pfs, err := procfs.NewFS(*procPath) if err != nil { return nil, fmt.Errorf("failed to open procfs: %w", err) } - sysfs, err := sysfs.NewFS(*sysPath) + sfs, err := sysfs.NewFS(*sysPath) if err != nil { return nil, fmt.Errorf("failed to open sysfs: %w", err) } - isolcpus, err := sysfs.IsolatedCPUs() + isolcpus, err := sfs.IsolatedCPUs() if err != nil { if !os.IsNotExist(err) { return nil, fmt.Errorf("Unable to get isolated cpus: %w", err) @@ -89,8 +93,9 @@ func NewCPUCollector(logger *slog.Logger) (Collector, error) { } c := &cpuCollector{ - fs: fs, - cpu: nodeCPUSecondsDesc, + procfs: pfs, + sysfs: sfs, + cpu: nodeCPUSecondsDesc, cpuInfo: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "info"), "CPU information from /proc/cpuinfo.", @@ -131,6 +136,11 @@ func NewCPUCollector(logger *slog.Logger) (Collector, error) { "Whether each core is isolated, information from /sys/devices/system/cpu/isolated.", []string{"cpu"}, nil, ), + cpuOnline: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "online"), + "CPUs that are online and being scheduled.", + []string{"cpu"}, nil, + ), logger: logger, isolatedCpus: isolcpus, cpuStats: make(map[int64]procfs.CPUStat), @@ -177,12 +187,21 @@ func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { if c.isolatedCpus != nil { c.updateIsolated(ch) } - return c.updateThermalThrottle(ch) + err := c.updateThermalThrottle(ch) + if err != nil { + return err + } + err = c.updateOnline(ch) + if err != nil { + return err + } + + return nil } // updateInfo reads /proc/cpuinfo func (c *cpuCollector) updateInfo(ch chan<- prometheus.Metric) error { - info, err := c.fs.CPUInfo() + info, err := c.procfs.CPUInfo() if err != nil { return err } @@ -333,9 +352,31 @@ func (c *cpuCollector) updateIsolated(ch chan<- prometheus.Metric) { } } +// updateOnline reads /sys/devices/system/cpu/cpu*/online through sysfs and exports online status metrics. +func (c *cpuCollector) updateOnline(ch chan<- prometheus.Metric) error { + cpus, err := c.sysfs.CPUs() + if err != nil { + return err + } + // No-op if the system does not support CPU online stats. + cpu0 := cpus[0] + if _, err := cpu0.Online(); err != nil && errors.Is(err, os.ErrNotExist) { + return nil + } + for _, cpu := range cpus { + setOnline := float64(0) + if online, _ := cpu.Online(); online { + setOnline = 1 + } + ch <- prometheus.MustNewConstMetric(c.cpuOnline, prometheus.GaugeValue, setOnline, cpu.Number()) + } + + return nil +} + // updateStat reads /proc/stat through procfs and exports CPU-related metrics. func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { - stats, err := c.fs.Stat() + stats, err := c.procfs.Stat() if err != nil { return err } diff --git a/collector/cpufreq_linux.go b/collector/cpufreq_linux.go index d6b3e42d..79e2308c 100644 --- a/collector/cpufreq_linux.go +++ b/collector/cpufreq_linux.go @@ -18,10 +18,11 @@ package collector import ( "fmt" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/procfs/sysfs" "log/slog" "strings" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/procfs/sysfs" ) type cpuFreqCollector struct { diff --git a/collector/filesystem_aix.go b/collector/filesystem_aix.go index 863a26a0..a1314a0f 100644 --- a/collector/filesystem_aix.go +++ b/collector/filesystem_aix.go @@ -32,12 +32,12 @@ func (c *filesystemCollector) GetStats() (stats []filesystemStats, err error) { return nil, err } for _, stat := range fsStat { - if c.excludedMountPointsPattern.MatchString(stat.MountPoint) { + if c.mountPointFilter.ignored(stat.MountPoint) { c.logger.Debug("Ignoring mount point", "mountpoint", stat.MountPoint) continue } fstype := stat.TypeString() - if c.excludedFSTypesPattern.MatchString(fstype) { + if c.fsTypeFilter.ignored(fstype) { c.logger.Debug("Ignoring fs type", "type", fstype) continue } diff --git a/collector/filesystem_bsd.go b/collector/filesystem_bsd.go index 2db3fb8f..d586c9ca 100644 --- a/collector/filesystem_bsd.go +++ b/collector/filesystem_bsd.go @@ -48,14 +48,14 @@ func (c *filesystemCollector) GetStats() (stats []filesystemStats, err error) { stats = []filesystemStats{} for i := 0; i < int(count); i++ { mountpoint := C.GoString(&mnt[i].f_mntonname[0]) - if c.excludedMountPointsPattern.MatchString(mountpoint) { + if c.mountPointFilter.ignored(mountpoint) { c.logger.Debug("Ignoring mount point", "mountpoint", mountpoint) continue } device := C.GoString(&mnt[i].f_mntfromname[0]) fstype := C.GoString(&mnt[i].f_fstypename[0]) - if c.excludedFSTypesPattern.MatchString(fstype) { + if c.fsTypeFilter.ignored(fstype) { c.logger.Debug("Ignoring fs type", "type", fstype) continue } diff --git a/collector/filesystem_common.go b/collector/filesystem_common.go index 0ec55e8a..41d439c8 100644 --- a/collector/filesystem_common.go +++ b/collector/filesystem_common.go @@ -19,8 +19,8 @@ package collector import ( "errors" + "fmt" "log/slog" - "regexp" "github.com/alecthomas/kingpin/v2" "github.com/prometheus/client_golang/prometheus" @@ -36,7 +36,7 @@ var ( mountPointsExcludeSet bool mountPointsExclude = kingpin.Flag( "collector.filesystem.mount-points-exclude", - "Regexp of mount points to exclude for filesystem collector.", + "Regexp of mount points to exclude for filesystem collector. (mutually exclusive to mount-points-include)", ).Default(defMountPointsExcluded).PreAction(func(c *kingpin.ParseContext) error { mountPointsExcludeSet = true return nil @@ -45,11 +45,15 @@ var ( "collector.filesystem.ignored-mount-points", "Regexp of mount points to ignore for filesystem collector.", ).Hidden().String() + mountPointsInclude = kingpin.Flag( + "collector.filesystem.mount-points-include", + "Regexp of mount points to include for filesystem collector. (mutually exclusive to mount-points-exclude)", + ).String() fsTypesExcludeSet bool fsTypesExclude = kingpin.Flag( "collector.filesystem.fs-types-exclude", - "Regexp of filesystem types to exclude for filesystem collector.", + "Regexp of filesystem types to exclude for filesystem collector. (mutually exclusive to fs-types-include)", ).Default(defFSTypesExcluded).PreAction(func(c *kingpin.ParseContext) error { fsTypesExcludeSet = true return nil @@ -58,13 +62,17 @@ var ( "collector.filesystem.ignored-fs-types", "Regexp of filesystem types to ignore for filesystem collector.", ).Hidden().String() + fsTypesInclude = kingpin.Flag( + "collector.filesystem.fs-types-include", + "Regexp of filesystem types to exclude for filesystem collector. (mutually exclusive to fs-types-exclude)", + ).String() filesystemLabelNames = []string{"device", "mountpoint", "fstype", "device_error"} ) type filesystemCollector struct { - excludedMountPointsPattern *regexp.Regexp - excludedFSTypesPattern *regexp.Regexp + mountPointFilter deviceFilter + fsTypeFilter deviceFilter sizeDesc, freeDesc, availDesc *prometheus.Desc filesDesc, filesFreeDesc *prometheus.Desc roDesc, deviceErrorDesc *prometheus.Desc @@ -89,29 +97,7 @@ func init() { // NewFilesystemCollector returns a new Collector exposing filesystems stats. func NewFilesystemCollector(logger *slog.Logger) (Collector, error) { - if *oldMountPointsExcluded != "" { - if !mountPointsExcludeSet { - logger.Warn("--collector.filesystem.ignored-mount-points is DEPRECATED and will be removed in 2.0.0, use --collector.filesystem.mount-points-exclude") - *mountPointsExclude = *oldMountPointsExcluded - } else { - return nil, errors.New("--collector.filesystem.ignored-mount-points and --collector.filesystem.mount-points-exclude are mutually exclusive") - } - } - - if *oldFSTypesExcluded != "" { - if !fsTypesExcludeSet { - logger.Warn("--collector.filesystem.ignored-fs-types is DEPRECATED and will be removed in 2.0.0, use --collector.filesystem.fs-types-exclude") - *fsTypesExclude = *oldFSTypesExcluded - } else { - return nil, errors.New("--collector.filesystem.ignored-fs-types and --collector.filesystem.fs-types-exclude are mutually exclusive") - } - } - - subsystem := "filesystem" - logger.Info("Parsed flag --collector.filesystem.mount-points-exclude", "flag", *mountPointsExclude) - mountPointPattern := regexp.MustCompile(*mountPointsExclude) - logger.Info("Parsed flag --collector.filesystem.fs-types-exclude", "flag", *fsTypesExclude) - filesystemsTypesPattern := regexp.MustCompile(*fsTypesExclude) + const subsystem = "filesystem" sizeDesc := prometheus.NewDesc( prometheus.BuildFQName(namespace, subsystem, "size_bytes"), @@ -162,18 +148,28 @@ func NewFilesystemCollector(logger *slog.Logger) (Collector, error) { nil, ) + mountPointFilter, err := newMountPointsFilter(logger) + if err != nil { + return nil, fmt.Errorf("unable to parse mount points filter flags: %w", err) + } + + fsTypeFilter, err := newFSTypeFilter(logger) + if err != nil { + return nil, fmt.Errorf("unable to parse fs types filter flags: %w", err) + } + return &filesystemCollector{ - excludedMountPointsPattern: mountPointPattern, - excludedFSTypesPattern: filesystemsTypesPattern, - sizeDesc: sizeDesc, - freeDesc: freeDesc, - availDesc: availDesc, - filesDesc: filesDesc, - filesFreeDesc: filesFreeDesc, - roDesc: roDesc, - deviceErrorDesc: deviceErrorDesc, - mountInfoDesc: mountInfoDesc, - logger: logger, + mountPointFilter: mountPointFilter, + fsTypeFilter: fsTypeFilter, + sizeDesc: sizeDesc, + freeDesc: freeDesc, + availDesc: availDesc, + filesDesc: filesDesc, + filesFreeDesc: filesFreeDesc, + roDesc: roDesc, + deviceErrorDesc: deviceErrorDesc, + mountInfoDesc: mountInfoDesc, + logger: logger, }, nil } @@ -230,3 +226,61 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { } return nil } + +func newMountPointsFilter(logger *slog.Logger) (deviceFilter, error) { + if *oldMountPointsExcluded != "" { + if !mountPointsExcludeSet { + logger.Warn("--collector.filesystem.ignored-mount-points is DEPRECATED and will be removed in 2.0.0, use --collector.filesystem.mount-points-exclude") + *mountPointsExclude = *oldMountPointsExcluded + } else { + return deviceFilter{}, errors.New("--collector.filesystem.ignored-mount-points and --collector.filesystem.mount-points-exclude are mutually exclusive") + } + } + + if *mountPointsInclude != "" && !mountPointsExcludeSet { + logger.Debug("mount-points-exclude flag not set when mount-points-include flag is set, assuming include is desired") + *mountPointsExclude = "" + } + + if *mountPointsExclude != "" && *mountPointsInclude != "" { + return deviceFilter{}, errors.New("--collector.filesystem.mount-points-exclude and --collector.filesystem.mount-points-include are mutually exclusive") + } + + if *mountPointsExclude != "" { + logger.Info("Parsed flag --collector.filesystem.mount-points-exclude", "flag", *mountPointsExclude) + } + if *mountPointsInclude != "" { + logger.Info("Parsed flag --collector.filesystem.mount-points-include", "flag", *mountPointsInclude) + } + + return newDeviceFilter(*mountPointsExclude, *mountPointsInclude), nil +} + +func newFSTypeFilter(logger *slog.Logger) (deviceFilter, error) { + if *oldFSTypesExcluded != "" { + if !fsTypesExcludeSet { + logger.Warn("--collector.filesystem.ignored-fs-types is DEPRECATED and will be removed in 2.0.0, use --collector.filesystem.fs-types-exclude") + *fsTypesExclude = *oldFSTypesExcluded + } else { + return deviceFilter{}, errors.New("--collector.filesystem.ignored-fs-types and --collector.filesystem.fs-types-exclude are mutually exclusive") + } + } + + if *fsTypesInclude != "" && !fsTypesExcludeSet { + logger.Debug("fs-types-exclude flag not set when fs-types-include flag is set, assuming include is desired") + *fsTypesExclude = "" + } + + if *fsTypesExclude != "" && *fsTypesInclude != "" { + return deviceFilter{}, errors.New("--collector.filesystem.fs-types-exclude and --collector.filesystem.fs-types-include are mutually exclusive") + } + + if *fsTypesExclude != "" { + logger.Info("Parsed flag --collector.filesystem.fs-types-exclude", "flag", *fsTypesExclude) + } + if *fsTypesInclude != "" { + logger.Info("Parsed flag --collector.filesystem.fs-types-include", "flag", *fsTypesInclude) + } + + return newDeviceFilter(*fsTypesExclude, *fsTypesInclude), nil +} diff --git a/collector/filesystem_freebsd.go b/collector/filesystem_freebsd.go index 2a6026d3..502ae0a8 100644 --- a/collector/filesystem_freebsd.go +++ b/collector/filesystem_freebsd.go @@ -39,14 +39,14 @@ func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { stats := []filesystemStats{} for _, fs := range buf { mountpoint := unix.ByteSliceToString(fs.Mntonname[:]) - if c.excludedMountPointsPattern.MatchString(mountpoint) { + if c.mountPointFilter.ignored(mountpoint) { c.logger.Debug("Ignoring mount point", "mountpoint", mountpoint) continue } device := unix.ByteSliceToString(fs.Mntfromname[:]) fstype := unix.ByteSliceToString(fs.Fstypename[:]) - if c.excludedFSTypesPattern.MatchString(fstype) { + if c.fsTypeFilter.ignored(fstype) { c.logger.Debug("Ignoring fs type", "type", fstype) continue } diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index 19c39717..3a7bda4d 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -73,12 +73,12 @@ func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { go func() { for _, labels := range mps { - if c.excludedMountPointsPattern.MatchString(labels.mountPoint) { + if c.mountPointFilter.ignored(labels.mountPoint) { c.logger.Debug("Ignoring mount point", "mountpoint", labels.mountPoint) continue } - if c.excludedFSTypesPattern.MatchString(labels.fsType) { - c.logger.Debug("Ignoring fs", "type", labels.fsType) + if c.fsTypeFilter.ignored(labels.fsType) { + c.logger.Debug("Ignoring fs type", "type", labels.fsType) continue } diff --git a/collector/filesystem_netbsd.go b/collector/filesystem_netbsd.go index ba6a9f55..c7395893 100644 --- a/collector/filesystem_netbsd.go +++ b/collector/filesystem_netbsd.go @@ -97,14 +97,14 @@ func (c *filesystemCollector) GetStats() (stats []filesystemStats, err error) { stats = []filesystemStats{} for _, v := range mnt { mountpoint := unix.ByteSliceToString(v.F_mntonname[:]) - if c.excludedMountPointsPattern.MatchString(mountpoint) { + if c.mountPointFilter.ignored(mountpoint) { c.logger.Debug("msg", "Ignoring mount point", "mountpoint", mountpoint) continue } device := unix.ByteSliceToString(v.F_mntfromname[:]) fstype := unix.ByteSliceToString(v.F_fstypename[:]) - if c.excludedFSTypesPattern.MatchString(fstype) { + if c.fsTypeFilter.ignored(fstype) { c.logger.Debug("msg", "Ignoring fs type", "type", fstype) continue } diff --git a/collector/filesystem_openbsd.go b/collector/filesystem_openbsd.go index d7489364..fa9f8eb8 100644 --- a/collector/filesystem_openbsd.go +++ b/collector/filesystem_openbsd.go @@ -41,14 +41,14 @@ func (c *filesystemCollector) GetStats() (stats []filesystemStats, err error) { stats = []filesystemStats{} for _, v := range mnt { mountpoint := unix.ByteSliceToString(v.F_mntonname[:]) - if c.excludedMountPointsPattern.MatchString(mountpoint) { + if c.mountPointFilter.ignored(mountpoint) { c.logger.Debug("Ignoring mount point", "mountpoint", mountpoint) continue } device := unix.ByteSliceToString(v.F_mntfromname[:]) fstype := unix.ByteSliceToString(v.F_fstypename[:]) - if c.excludedFSTypesPattern.MatchString(fstype) { + if c.fsTypeFilter.ignored(fstype) { c.logger.Debug("Ignoring fs type", "type", fstype) continue } diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 605149da..55583bad 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -871,6 +871,10 @@ node_hwmon_fan_target_rpm{chip="nct6779",sensor="fan2"} 27000 # HELP node_hwmon_fan_tolerance Hardware monitor fan element tolerance # TYPE node_hwmon_fan_tolerance gauge node_hwmon_fan_tolerance{chip="nct6779",sensor="fan2"} 0 +# HELP node_hwmon_freq_freq_mhz Hardware monitor for GPU frequency in MHz +# TYPE node_hwmon_freq_freq_mhz gauge +node_hwmon_freq_freq_mhz{chip="hwmon4",sensor="mclk"} 300 +node_hwmon_freq_freq_mhz{chip="hwmon4",sensor="sclk"} 214 # HELP node_hwmon_in_alarm Hardware sensor alarm status (in) # TYPE node_hwmon_in_alarm gauge node_hwmon_in_alarm{chip="nct6779",sensor="in0"} 0 @@ -984,8 +988,10 @@ node_hwmon_pwm_weight_temp_step_tol{chip="nct6779",sensor="pwm1"} 0 # TYPE node_hwmon_sensor_label gauge node_hwmon_sensor_label{chip="hwmon4",label="foosensor",sensor="temp1"} 1 node_hwmon_sensor_label{chip="hwmon4",label="foosensor",sensor="temp2"} 1 -node_hwmon_sensor_label{chip="platform_applesmc_768",label="Left side ",sensor="fan1"} 1 -node_hwmon_sensor_label{chip="platform_applesmc_768",label="Right side ",sensor="fan2"} 1 +node_hwmon_sensor_label{chip="hwmon4",label="mclk",sensor="freq2"} 1 +node_hwmon_sensor_label{chip="hwmon4",label="sclk",sensor="freq1"} 1 +node_hwmon_sensor_label{chip="platform_applesmc_768",label="Left side",sensor="fan1"} 1 +node_hwmon_sensor_label{chip="platform_applesmc_768",label="Right side",sensor="fan2"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 0",sensor="temp2"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 1",sensor="temp3"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 2",sensor="temp4"} 1 diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index a80f6b63..eb6be483 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -893,6 +893,10 @@ node_hwmon_fan_target_rpm{chip="nct6779",sensor="fan2"} 27000 # HELP node_hwmon_fan_tolerance Hardware monitor fan element tolerance # TYPE node_hwmon_fan_tolerance gauge node_hwmon_fan_tolerance{chip="nct6779",sensor="fan2"} 0 +# HELP node_hwmon_freq_freq_mhz Hardware monitor for GPU frequency in MHz +# TYPE node_hwmon_freq_freq_mhz gauge +node_hwmon_freq_freq_mhz{chip="hwmon4",sensor="mclk"} 300 +node_hwmon_freq_freq_mhz{chip="hwmon4",sensor="sclk"} 214 # HELP node_hwmon_in_alarm Hardware sensor alarm status (in) # TYPE node_hwmon_in_alarm gauge node_hwmon_in_alarm{chip="nct6779",sensor="in0"} 0 @@ -1006,8 +1010,10 @@ node_hwmon_pwm_weight_temp_step_tol{chip="nct6779",sensor="pwm1"} 0 # TYPE node_hwmon_sensor_label gauge node_hwmon_sensor_label{chip="hwmon4",label="foosensor",sensor="temp1"} 1 node_hwmon_sensor_label{chip="hwmon4",label="foosensor",sensor="temp2"} 1 -node_hwmon_sensor_label{chip="platform_applesmc_768",label="Left side ",sensor="fan1"} 1 -node_hwmon_sensor_label{chip="platform_applesmc_768",label="Right side ",sensor="fan2"} 1 +node_hwmon_sensor_label{chip="hwmon4",label="mclk",sensor="freq2"} 1 +node_hwmon_sensor_label{chip="hwmon4",label="sclk",sensor="freq1"} 1 +node_hwmon_sensor_label{chip="platform_applesmc_768",label="Left side",sensor="fan1"} 1 +node_hwmon_sensor_label{chip="platform_applesmc_768",label="Right side",sensor="fan2"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 0",sensor="temp2"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 1",sensor="temp3"} 1 node_hwmon_sensor_label{chip="platform_coretemp_0",label="Core 2",sensor="temp4"} 1 diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index 6fcf094d..7b2f27b8 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -437,6 +437,26 @@ Lines: 1 100000 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/hwmon/hwmon4/freq1_input +Lines: 1 +214000000 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/hwmon/hwmon4/freq1_label +Lines: 1 +sclk +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/hwmon/hwmon4/freq2_input +Lines: 1 +300000000 +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/class/hwmon/hwmon4/freq2_label +Lines: 1 +mclk +Mode: 644 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/class/hwmon/hwmon5 SymlinkTo: ../../devices/platform/bogus.0/hwmon/hwmon5/ # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -1337,7 +1357,7 @@ Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/class/nvme/nvme0/model Lines: 1 -Samsung SSD 970 PRO 512GB +Samsung SSD 970 PRO 512GB Mode: 444 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/class/nvme/nvme0/serial @@ -2750,7 +2770,7 @@ Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/devices/platform/applesmc.768/fan1_label Lines: 1 -Left side +Left side Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/devices/platform/applesmc.768/fan1_manual @@ -2784,7 +2804,7 @@ Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/devices/platform/applesmc.768/fan2_label Lines: 1 -Right side +Right side Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Path: sys/devices/platform/applesmc.768/fan2_manual diff --git a/collector/hwmon_linux.go b/collector/hwmon_linux.go index 9c0e065c..ad8da41b 100644 --- a/collector/hwmon_linux.go +++ b/collector/hwmon_linux.go @@ -44,7 +44,7 @@ var ( hwmonSensorTypes = []string{ "vrm", "beep_enable", "update_interval", "in", "cpu", "fan", "pwm", "temp", "curr", "power", "energy", "humidity", - "intrusion", + "intrusion", "freq", } ) @@ -357,6 +357,15 @@ func (c *hwMonCollector) updateHwmon(ch chan<- prometheus.Metric, dir string) er continue } + if sensorType == "freq" && element == "input" { + if label, ok := sensorData["label"]; ok { + sensorLabel := cleanMetricName(label) + desc := prometheus.NewDesc(name+"_freq_mhz", "Hardware monitor for GPU frequency in MHz", hwmonLabelDesc, nil) + ch <- prometheus.MustNewConstMetric( + desc, prometheus.GaugeValue, parsedValue/1000000.0, append(labels[:len(labels)-1], sensorLabel)...) + } + continue + } // fallback, just dump the metric as is desc := prometheus.NewDesc(name, "Hardware monitor "+sensorType+" element "+element, hwmonLabelDesc, nil) diff --git a/collector/interrupts_common.go b/collector/interrupts_common.go index 08baa106..80cd7171 100644 --- a/collector/interrupts_common.go +++ b/collector/interrupts_common.go @@ -18,9 +18,10 @@ package collector import ( + "log/slog" + "github.com/alecthomas/kingpin/v2" "github.com/prometheus/client_golang/prometheus" - "log/slog" ) type interruptsCollector struct { diff --git a/collector/runit.go b/collector/runit.go index 8065d90c..2813bfd0 100644 --- a/collector/runit.go +++ b/collector/runit.go @@ -17,10 +17,11 @@ package collector import ( + "log/slog" + "github.com/alecthomas/kingpin/v2" "github.com/prometheus-community/go-runit/runit" "github.com/prometheus/client_golang/prometheus" - "log/slog" ) var runitServiceDir = kingpin.Flag("collector.runit.servicedir", "Path to runit service directory.").Default("/etc/service").String() diff --git a/collector/selinux_linux.go b/collector/selinux_linux.go index 9000fe0d..f10b43fa 100644 --- a/collector/selinux_linux.go +++ b/collector/selinux_linux.go @@ -17,9 +17,10 @@ package collector import ( + "log/slog" + "github.com/opencontainers/selinux/go-selinux" "github.com/prometheus/client_golang/prometheus" - "log/slog" ) type selinuxCollector struct { diff --git a/collector/softirqs_common.go b/collector/softirqs_common.go index cd9ccb94..73a9f7be 100644 --- a/collector/softirqs_common.go +++ b/collector/softirqs_common.go @@ -18,9 +18,10 @@ package collector import ( "fmt" + "log/slog" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs" - "log/slog" ) type softirqsCollector struct { diff --git a/collector/uname.go b/collector/uname.go index 6b4f06e5..c3e96b0b 100644 --- a/collector/uname.go +++ b/collector/uname.go @@ -18,8 +18,9 @@ package collector import ( - "github.com/prometheus/client_golang/prometheus" "log/slog" + + "github.com/prometheus/client_golang/prometheus" ) var unameDesc = prometheus.NewDesc( diff --git a/docs/node-mixin/alerts/alerts.libsonnet b/docs/node-mixin/alerts/alerts.libsonnet index c3464809..dc93c9a8 100644 --- a/docs/node-mixin/alerts/alerts.libsonnet +++ b/docs/node-mixin/alerts/alerts.libsonnet @@ -10,7 +10,7 @@ ( node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceFillingUpWarningThreshold)d and - predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[%(fsSpaceFillingUpPredictionWindow)s], 24*60*60) < 0 + predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[%(fsSpaceFillingUpPredictionWindow)s], %(nodeWarningWindowHours)s*60*60) < 0 and node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0 ) @@ -20,7 +20,7 @@ severity: 'warning', }, annotations: { - summary: 'Filesystem is predicted to run out of space within the next 24 hours.', + summary: 'Filesystem is predicted to run out of space within the next %(nodeWarningWindowHours)s hours.' % $._config, description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.', }, }, @@ -30,7 +30,7 @@ ( node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceFillingUpCriticalThreshold)d and - predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 4*60*60) < 0 + predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], %(nodeCriticalWindowHours)s*60*60) < 0 and node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0 ) @@ -40,7 +40,7 @@ severity: '%(nodeCriticalSeverity)s' % $._config, }, annotations: { - summary: 'Filesystem is predicted to run out of space within the next 4 hours.', + summary: 'Filesystem is predicted to run out of space within the next %(nodeCriticalWindowHours)s hours.' % $._config, description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.', }, }, @@ -86,7 +86,7 @@ ( node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 40 and - predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 24*60*60) < 0 + predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], %(nodeWarningWindowHours)s*60*60) < 0 and node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0 ) @@ -96,7 +96,7 @@ severity: 'warning', }, annotations: { - summary: 'Filesystem is predicted to run out of inodes within the next 24 hours.', + summary: 'Filesystem is predicted to run out of inodes within the next %(nodeWarningWindowHours)s hours.' % $._config, description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.', }, }, @@ -106,7 +106,7 @@ ( node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 20 and - predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 4*60*60) < 0 + predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], %(nodeCriticalWindowHours)s*60*60) < 0 and node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0 ) @@ -116,7 +116,7 @@ severity: '%(nodeCriticalSeverity)s' % $._config, }, annotations: { - summary: 'Filesystem is predicted to run out of inodes within the next 4 hours.', + summary: 'Filesystem is predicted to run out of inodes within the next %(nodeCriticalWindowHours)s hours.' % $._config, description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.', }, }, @@ -191,7 +191,7 @@ ||| % $._config, annotations: { summary: 'Number of conntrack are getting close to the limit.', - description: '{{ $value | humanizePercentage }} of conntrack entries are used.', + description: '{{ $labels.instance }} {{ $value | humanizePercentage }} of conntrack entries are used.', }, labels: { severity: 'warning', @@ -312,7 +312,7 @@ { alert: 'NodeCPUHighUsage', expr: ||| - sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode!="idle"}[2m]))) * 100 > %(cpuHighUsageThreshold)d + sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode!~"idle|iowait"}[2m]))) * 100 > %(cpuHighUsageThreshold)d ||| % $._config, 'for': '15m', labels: { diff --git a/docs/node-mixin/config.libsonnet b/docs/node-mixin/config.libsonnet index df2af23f..1a4b3caa 100644 --- a/docs/node-mixin/config.libsonnet +++ b/docs/node-mixin/config.libsonnet @@ -50,6 +50,16 @@ // 'NodeSystemSaturation' alert. systemSaturationPerCoreThreshold: 2, + // Some of the alerts use predict_linear() to fire alerts ahead of time to + // prevent unrecoverable situations (eg. no more disk space). However, the + // node may have automatic processes (cronjobs) in place to prevent that + // within a certain time window, this may not align with the default time + // window of these alerts. This can cause these alerts to start flapping. + // By reducing the time window, the system gets more time to + // resolve this before problems occur. + nodeWarningWindowHours: '24', + nodeCriticalWindowHours: '4', + // Available disk space (%) thresholds on which to trigger the // 'NodeFilesystemSpaceFillingUp' alerts. These alerts fire if the disk // usage grows in a way that it is predicted to run out in 4h or 1d diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index fce411ab..f9d9e07c 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -1,469 +1,480 @@ -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; local dashboard = grafana.dashboard; -local row = grafana.row; -local prometheus = grafana.prometheus; -local template = grafana.template; -local graphPanel = grafana.graphPanel; +local variable = dashboard.variable; +local row = grafana.panel.row; +local prometheus = grafana.query.prometheus; + +local timeSeriesPanel = grafana.panel.timeSeries; +local tsOptions = timeSeriesPanel.options; +local tsStandardOptions = timeSeriesPanel.standardOptions; +local tsQueryOptions = timeSeriesPanel.queryOptions; +local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom; +local tsLegend = tsOptions.legend; local c = import '../config.libsonnet'; -local datasourceTemplate = { - current: { - text: 'default', - value: 'default', - }, - hide: 0, - label: 'Data Source', - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', -}; +local datasource = variable.datasource.new( + 'datasource', 'prometheus' +); + +local tsCommonPanelOptions = + variable.query.withDatasourceFromVariable(datasource) + + tsCustom.stacking.withMode('normal') + + tsCustom.withFillOpacity(100) + + tsCustom.withShowPoints('never') + + tsLegend.withShowLegend(false) + + tsOptions.tooltip.withMode('multi') + + tsOptions.tooltip.withSort('desc'); local CPUUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'CPU Utilisation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local CPUSaturation = // TODO: Is this a useful panel? At least there should be some explanation how load // average relates to the "CPU saturation" in the title. - graphPanel.new( + timeSeriesPanel.new( 'CPU Saturation (Load1 per CPU)', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local memoryUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Memory Utilisation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local memorySaturation = - graphPanel.new( + timeSeriesPanel.new( 'Memory Saturation (Major Page Faults)', - datasource='$datasource', - span=6, - format='rds', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('rds'); + +local networkOverrides = tsStandardOptions.withOverrides( + [ + tsStandardOptions.override.byRegexp.new('/Transmit/') + + tsStandardOptions.override.byRegexp.withPropertiesFromOptions( + tsCustom.withTransform('negative-Y') + ), + ] +); local networkUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Network Utilisation (Bytes Receive/Transmit)', - datasource='$datasource', - span=6, - format='Bps', - stack=true, - fill=10, - legend_show=false, ) - .addSeriesOverride({ alias: '/Receive/', stack: 'A' }) - .addSeriesOverride({ alias: '/Transmit/', stack: 'B', transform: 'negative-Y' }) - { tooltip+: { sort: 2 } }; + + tsCommonPanelOptions + + tsStandardOptions.withUnit('Bps') + + networkOverrides; local networkSaturation = - graphPanel.new( + timeSeriesPanel.new( 'Network Saturation (Drops Receive/Transmit)', - datasource='$datasource', - span=6, - format='Bps', - stack=true, - fill=10, - legend_show=false, ) - .addSeriesOverride({ alias: '/ Receive/', stack: 'A' }) - .addSeriesOverride({ alias: '/ Transmit/', stack: 'B', transform: 'negative-Y' }) - { tooltip+: { sort: 2 } }; + + tsCommonPanelOptions + + tsStandardOptions.withUnit('Bps') + + networkOverrides; local diskIOUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Disk IO Utilisation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local diskIOSaturation = - graphPanel.new( + timeSeriesPanel.new( 'Disk IO Saturation', - datasource='$datasource', - span=6, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); local diskSpaceUtilisation = - graphPanel.new( + timeSeriesPanel.new( 'Disk Space Utilisation', - datasource='$datasource', - span=12, - format='percentunit', - stack=true, - fill=10, - legend_show=false, - ) { tooltip+: { sort: 2 } }; + ) + + tsCommonPanelOptions + + tsStandardOptions.withUnit('percentunit'); { - _clusterTemplate:: template.new( - name='cluster', - datasource='$datasource', - query='label_values(node_time_seconds, %s)' % $._config.clusterLabel, - current='', - hide=if $._config.showMultiCluster then '' else '2', - refresh=2, - includeAll=false, - sort=1 - ), + _clusterVariable:: + variable.query.new('cluster') + + variable.query.withDatasourceFromVariable(datasource) + + variable.query.queryTypes.withLabelValues( + $._config.clusterLabel, + 'node_time_seconds', + ) + + (if $._config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) + + variable.query.refresh.onTime() + + variable.query.selectionOptions.withIncludeAll(false) + + variable.query.withSort(asc=true), grafanaDashboards+:: { 'node-rsrc-use.json': - dashboard.new( '%sUSE Method / Node' % $._config.dashboardNamePrefix, - time_from='now-1h', - tags=($._config.dashboardTags), - timezone='utc', - refresh='30s', - graphTooltip='shared_crosshair', - uid=std.md5('node-rsrc-use.json') ) - .addTemplate(datasourceTemplate) - .addTemplate($._clusterTemplate) - .addTemplate( - template.new( + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + $._clusterVariable, + variable.query.new('instance') + + variable.query.withDatasourceFromVariable(datasource) + + variable.query.queryTypes.withLabelValues( 'instance', - '$datasource', - 'label_values(node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}, instance)' % $._config, - refresh='time', - sort=1 - ) - ) - .addRow( - row.new('CPU') - .addPanel(CPUUtilisation.addTarget(prometheus.target('instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation'))) - .addPanel(CPUSaturation.addTarget(prometheus.target('instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Saturation'))) - ) - .addRow( - row.new('Memory') - .addPanel(memoryUtilisation.addTarget(prometheus.target('instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation'))) - .addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Major page Faults'))) - ) - .addRow( - row.new('Network') - .addPanel( - networkUtilisation - .addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit')) - ) - .addPanel( - networkSaturation - .addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit')) - ) - ) - .addRow( - row.new('Disk IO') - .addPanel(diskIOUtilisation.addTarget(prometheus.target('instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}'))) - .addPanel(diskIOSaturation.addTarget(prometheus.target('instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}'))) - ) - .addRow( - row.new('Disk Space') - .addPanel( - diskSpaceUtilisation.addTarget(prometheus.target( - ||| - sort_desc(1 - - ( - max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) - / - max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) - ) != 0 - ) - ||| % $._config, legendFormat='{{device}}' - )) + 'node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, ) + + variable.query.refresh.onTime() + + variable.query.withSort(asc=true), + ]) + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]), + CPUSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Saturation')]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]), + memorySaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Major page Faults')]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'), + prometheus.new('$datasource', 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'), + prometheus.new('$datasource', 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]), + ]), + ], panelWidth=12, panelHeight=7) + + grafana.util.grid.makeGrid([ + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sort_desc(1 - + ( + max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) + / + max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) + ) != 0 + ) + ||| % $._config + ) + prometheus.withLegendFormat('{{device}}'), + ]), + ]), + ], panelWidth=24, panelHeight=7, startY=34), ), - 'node-cluster-rsrc-use.json': dashboard.new( '%sUSE Method / Cluster' % $._config.dashboardNamePrefix, - time_from='now-1h', - tags=($._config.dashboardTags), - timezone='utc', - refresh='30s', - graphTooltip='shared_crosshair', - uid=std.md5('node-cluster-rsrc-use.json') ) - .addTemplate(datasourceTemplate) - .addTemplate($._clusterTemplate) - .addRow( - row.new('CPU') - .addPanel( - CPUUtilisation - .addTarget(prometheus.target( - ||| - (( - instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - * - instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - ) != 0 ) - / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ||| % $._config, legendFormat='{{ instance }}' - )) - ) - .addPanel( - CPUSaturation - .addTarget(prometheus.target( - ||| - ( - instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}}' - )) - ) - ) - .addRow( - row.new('Memory') - .addPanel( - memoryUtilisation - .addTarget(prometheus.target( - ||| - ( - instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}}', - )) - ) - .addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, legendFormat='{{instance}}'))) - ) - .addRow( - row.new('Network') - .addPanel( - networkUtilisation - .addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) - ) - .addPanel( - networkSaturation - .addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) - .addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) - ) - ) - .addRow( - row.new('Disk IO') - .addPanel( - diskIOUtilisation - .addTarget(prometheus.target( - ||| - ( - instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} - / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}} {{device}}' - )) - ) - .addPanel( - diskIOSaturation - .addTarget(prometheus.target( - ||| - ( + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-cluster-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + $._clusterVariable, + variable.query.withDatasourceFromVariable(datasource) + + variable.query.refresh.onTime() + + variable.query.withSort(asc=true), + ]) + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + (( + instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + * + instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + ) != 0 ) + / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + CPUSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ) != 0 + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ) != 0 + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + memorySaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Receive'), + prometheus.new( + '$datasource', + 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Receive'), + prometheus.new( + '$datasource', + 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config + ) + prometheus.withLegendFormat('{{ instance }} Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }} {{device}}'), + ]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new( + '$datasource', + ||| instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) - ) != 0 - ||| % $._config, legendFormat='{{instance}} {{device}}' - )) - ) - ) - .addRow( - row.new('Disk Space') - .addPanel( - diskSpaceUtilisation - .addTarget(prometheus.target( - ||| - sum without (device) ( - max without (fstype, mountpoint) (( - node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} - - - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} - ) != 0) - ) - / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}))) - ||| % $._config, legendFormat='{{instance}}' - )) - ) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }} {{device}}')]), + ]), + ], panelWidth=12, panelHeight=7) + + grafana.util.grid.makeGrid([ + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum without (device) ( + max without (fstype, mountpoint) (( + node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} + - + node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} + ) != 0) + ) + / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}))) + ||| % $._config + ) + prometheus.withLegendFormat('{{ instance }}'), + ]), + ]), + ], panelWidth=24, panelHeight=7, startY=34), ), } + if $._config.showMultiCluster then { 'node-multicluster-rsrc-use.json': dashboard.new( '%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix, - time_from='now-1h', - tags=($._config.dashboardTags), - timezone='utc', - refresh='30s', - graphTooltip='shared_crosshair', - uid=std.md5('node-multicluster-rsrc-use.json') ) - .addTemplate(datasourceTemplate) - .addRow( - row.new('CPU') - .addPanel( - CPUUtilisation - .addTarget(prometheus.target( - ||| - sum( - (( - instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s} - * - instance:node_num_cpu:sum{%(nodeExporterSelector)s} - ) != 0) - / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) - ) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - .addPanel( - CPUSaturation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} - / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - ) - .addRow( - row.new('Memory') - .addPanel( - memoryUtilisation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} - / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - .addPanel( - memorySaturation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) - ) - .addRow( - row.new('Network') - .addPanel( - networkUtilisation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config - )) - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config - )) - ) - .addPanel( - networkSaturation - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config - )) - .addTarget(prometheus.target( - ||| - sum(( - instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} - ) != 0) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config - )) - ) - ) - .addRow( - row.new('Disk IO') - .addPanel( - diskIOUtilisation - .addTarget(prometheus.target( - ||| - sum(( - instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s, device) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config - )) - ) - .addPanel( - diskIOSaturation - .addTarget(prometheus.target( - ||| - sum(( - instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) - ) != 0) by (%(clusterLabel)s, device) - ||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config - )) - ) - ) - .addRow( - row.new('Disk Space') - .addPanel( - diskSpaceUtilisation - .addTarget(prometheus.target( - ||| - sum ( - sum without (device) ( - max without (fstype, mountpoint, instance, pod) (( - node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - ) != 0) - ) - / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}))) - ) by (%(clusterLabel)s) - ||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config - )) - ) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags($._config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withUid(std.md5('node-multicluster-rsrc-use.json')) + + dashboard.withVariables([ + datasource, + variable.query.withDatasourceFromVariable(datasource) + + variable.query.refresh.onTime() + + variable.query.withSort(asc=true), + ]) + + dashboard.withPanels( + grafana.util.grid.makeGrid([ + row.new('CPU') + + row.withPanels([ + CPUUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum( + (( + instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s} + * + instance:node_num_cpu:sum{%(nodeExporterSelector)s} + ) != 0) + / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) + ) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + CPUSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} + / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + ]), + row.new('Memory') + + row.withPanels([ + memoryUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} + / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + memorySaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| + % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + ]), + row.new('Network') + + row.withPanels([ + networkUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'), + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'), + ]), + networkSaturation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'), + prometheus.new( + '$datasource', + ||| + sum(( + instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} + ) != 0) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'), + ]), + ]), + row.new('Disk IO') + + row.withPanels([ + diskIOUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum(( + instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s, device) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}'), + ]), + diskIOSaturation + tsQueryOptions.withTargets([prometheus.new( + '$datasource', + ||| + sum(( + instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) + ) != 0) by (%(clusterLabel)s, device) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}')]), + ]), + + ], panelWidth=12, panelHeight=7) + + grafana.util.grid.makeGrid([ + row.new('Disk Space') + + row.withPanels([ + diskSpaceUtilisation + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + sum ( + sum without (device) ( + max without (fstype, mountpoint, instance, pod) (( + node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} + ) != 0) + ) + / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}))) + ) by (%(clusterLabel)s) + ||| % $._config + ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'), + ]), + ]), + ], panelWidth=24, panelHeight=7, startY=34), ), } else {}, } diff --git a/docs/node-mixin/jsonnetfile.json b/docs/node-mixin/jsonnetfile.json index 721d4833..2d56d912 100644 --- a/docs/node-mixin/jsonnetfile.json +++ b/docs/node-mixin/jsonnetfile.json @@ -4,20 +4,11 @@ { "source": { "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet" + "remote": "https://github.com/grafana/grafonnet.git", + "subdir": "gen/grafonnet-latest" } }, - "version": "master" - }, - { - "source": { - "git": { - "remote": "https://github.com/grafana/grafonnet-lib.git", - "subdir": "grafonnet-7.0" - } - }, - "version": "master" + "version": "main" } ], "legacyImports": false diff --git a/docs/node-mixin/lib/prom-mixin.libsonnet b/docs/node-mixin/lib/prom-mixin.libsonnet index bd01cfd4..f18c273c 100644 --- a/docs/node-mixin/lib/prom-mixin.libsonnet +++ b/docs/node-mixin/lib/prom-mixin.libsonnet @@ -1,560 +1,535 @@ -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; +local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; local dashboard = grafana.dashboard; -local row = grafana.row; -local prometheus = grafana.prometheus; -local template = grafana.template; -local graphPanel = grafana.graphPanel; -local grafana70 = import 'github.com/grafana/grafonnet-lib/grafonnet-7.0/grafana.libsonnet'; -local gaugePanel = grafana70.panel.gauge; -local table = grafana70.panel.table; +local row = grafana.panel.row; +local prometheus = grafana.query.prometheus; +local variable = dashboard.variable; + +local timeSeriesPanel = grafana.panel.timeSeries; +local tsOptions = timeSeriesPanel.options; +local tsStandardOptions = timeSeriesPanel.standardOptions; +local tsQueryOptions = timeSeriesPanel.queryOptions; +local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom; + +local gaugePanel = grafana.panel.gauge; +local gaugeStep = gaugePanel.standardOptions.threshold.step; + +local table = grafana.panel.table; +local tableStep = table.standardOptions.threshold.step; +local tableOverride = table.standardOptions.override; +local tableTransformation = table.queryOptions.transformation; { new(config=null, platform=null, uid=null):: { - local prometheusDatasourceTemplate = { - current: { - text: 'default', - value: 'default', - }, - hide: 0, - label: 'Data Source', - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: '', - type: 'datasource', - }, + local prometheusDatasourceVariable = variable.datasource.new( + 'datasource', 'prometheus' + ), - local clusterTemplatePrototype = - template.new( - 'cluster', - '$datasource', - '', - hide=if config.showMultiCluster then '' else '2', - refresh='time', - label='Cluster', - ), - local clusterTemplate = - if platform == 'Darwin' then - clusterTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, %(clusterLabel)s)' % config } - else - clusterTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, %(clusterLabel)s)' % config }, + local clusterVariablePrototype = + variable.query.new('cluster') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + (if config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing()) + + variable.query.refresh.onTime() + + variable.query.generalOptions.withLabel('Cluster'), - local instanceTemplatePrototype = - template.new( - 'instance', - '$datasource', - '', - refresh='time', - label='Instance', - ), - local instanceTemplate = + local clusterVariable = if platform == 'Darwin' then - instanceTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}, instance)' % config } + clusterVariablePrototype + + variable.query.queryTypes.withLabelValues( + ' %(clusterLabel)s' % config, + 'node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}' % config, + ) else - instanceTemplatePrototype - { query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}, instance)' % config }, + clusterVariablePrototype + + variable.query.queryTypes.withLabelValues( + '%(clusterLabel)s' % config, + 'node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}' % config, + ), + + local instanceVariablePrototype = + variable.query.new('instance') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + variable.query.refresh.onTime() + + variable.query.generalOptions.withLabel('Instance'), + + local instanceVariable = + if platform == 'Darwin' then + instanceVariablePrototype + + variable.query.queryTypes.withLabelValues( + 'instance', + 'node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}' % config, + ) + else + instanceVariablePrototype + + variable.query.queryTypes.withLabelValues( + 'instance', + 'node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}' % config, + ), local idleCPU = - graphPanel.new( - 'CPU Usage', - datasource='$datasource', - span=6, - format='percentunit', - max=1, - min=0, - stack=true, - ) - .addTarget(prometheus.target( - ||| - ( - (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval]))) - / ignoring(cpu) group_left - count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"}) - ) - ||| % config, - legendFormat='{{cpu}}', - intervalFactor=5, - )), - - local systemLoad = - graphPanel.new( - 'Load Average', - datasource='$datasource', - span=6, - format='short', - min=0, - fill=0, - ) - .addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='1m load average')) - .addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='5m load average')) - .addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='15m load average')) - .addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config, legendFormat='logical cores')), - - local memoryGraphPanelPrototype = - graphPanel.new( - 'Memory Usage', - datasource='$datasource', - span=9, - format='bytes', - min=0, - ), - local memoryGraph = - if platform == 'Linux' then - memoryGraphPanelPrototype { stack: true } - .addTarget(prometheus.target( + timeSeriesPanel.new('CPU Usage') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('percentunit') + + tsCustom.stacking.withMode('normal') + + tsStandardOptions.withMax(1) + + tsStandardOptions.withMin(0) + + tsOptions.tooltip.withMode('multi') + + tsCustom.withFillOpacity(10) + + tsCustom.withShowPoints('never') + + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', ||| ( - node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - - node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - - node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - - node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval]))) + / ignoring(cpu) group_left + count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"}) ) ||| % config, - legendFormat='memory used' - )) - .addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory buffers')) - .addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory cached')) - .addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory free')) + ) + + prometheus.withLegendFormat('{{cpu}}') + + prometheus.withIntervalFactor(5), + ]), + + local systemLoad = + timeSeriesPanel.new('Load Average') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('short') + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsCustom.withShowPoints('never') + + tsOptions.tooltip.withMode('multi') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('1m load average'), + prometheus.new('$datasource', 'node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('5m load average'), + prometheus.new('$datasource', 'node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('15m load average'), + prometheus.new('$datasource', 'count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config) + prometheus.withLegendFormat('logical cores'), + ]), + + local memoryGraphPanelPrototype = + timeSeriesPanel.new('Memory Usage') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('bytes') + + tsStandardOptions.withMin(0) + + tsOptions.tooltip.withMode('multi') + + tsCustom.withFillOpacity(10) + + tsCustom.withShowPoints('never'), + + local memoryGraph = + if platform == 'Linux' then + memoryGraphPanelPrototype + + tsCustom.stacking.withMode('normal') + + tsQueryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - + node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - + node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - + node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config, + ) + prometheus.withLegendFormat('memory used'), + prometheus.new('$datasource', 'node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory buffers'), + prometheus.new('$datasource', 'node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory cached'), + prometheus.new('$datasource', 'node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory free'), + ]) else if platform == 'Darwin' then // not useful to stack - memoryGraphPanelPrototype { stack: false } - .addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory')) - .addTarget(prometheus.target( - ||| - ( - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + - node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - ) - ||| % config, legendFormat='Memory Used' - )) - .addTarget(prometheus.target( - ||| - ( - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - ) - ||| % config, legendFormat='App Memory' - )) - .addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Wired Memory')) - .addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Compressed')) + memoryGraphPanelPrototype + + tsCustom.stacking.withMode('none') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Physical Memory'), + prometheus.new( + '$datasource', + ||| + ( + node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - + node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + + node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + + node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config + ) + prometheus.withLegendFormat( + 'Memory Used' + ), + prometheus.new( + '$datasource', + ||| + ( + node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - + node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config + ) + prometheus.withLegendFormat( + 'App Memory' + ), + prometheus.new('$datasource', 'node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Wired Memory'), + prometheus.new('$datasource', 'node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Compressed'), + ]) + else if platform == 'AIX' then - memoryGraphPanelPrototype { stack: false } - .addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory')) - .addTarget(prometheus.target( - ||| - ( - node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - - node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - ) - ||| % config, legendFormat='Memory Used' - )), + memoryGraphPanelPrototype + + tsCustom.stacking.withMode('none') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Physical Memory'), + prometheus.new( + '$datasource', + ||| + ( + node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - + node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + ) + ||| % config + ) + prometheus.withLegendFormat('Memory Used'), + ]), // NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout. local memoryGaugePanelPrototype = - gaugePanel.new( - title='Memory Usage', - datasource='$datasource', - ) - .addThresholdStep('rgba(50, 172, 45, 0.97)') - .addThresholdStep('rgba(237, 129, 40, 0.89)', 80) - .addThresholdStep('rgba(245, 54, 54, 0.9)', 90) - .setFieldConfig(max=100, min=0, unit='percent') - + { - span: 3, - }, + gaugePanel.new('Memory Usage') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + gaugePanel.standardOptions.thresholds.withSteps([ + gaugeStep.withColor('rgba(50, 172, 45, 0.97)'), + gaugeStep.withColor('rgba(237, 129, 40, 0.89)') + gaugeStep.withValue(80), + gaugeStep.withColor('rgba(245, 54, 54, 0.9)') + gaugeStep.withValue(90), + ]) + + gaugePanel.standardOptions.withMax(100) + + gaugePanel.standardOptions.withMin(0) + + gaugePanel.standardOptions.withUnit('percent'), local memoryGauge = if platform == 'Linux' then memoryGaugePanelPrototype - - .addTarget(prometheus.target( - ||| - 100 - - ( - avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / - avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - * 100 - ) - ||| % config, - )) + + gaugePanel.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + 100 - + ( + avg(node_memory_MemAvailable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / + avg(node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + * 100 + ) + ||| % config, + ), + ]) else if platform == 'Darwin' then memoryGaugePanelPrototype - .addTarget(prometheus.target( - ||| - ( - ( - avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - - avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + - avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + - avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - ) / - avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - ) - * - 100 - ||| % config - )) + + gaugePanel.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + ( + ( + avg(node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - + avg(node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + + avg(node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + + avg(node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + ) / + avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + ) + * + 100 + ||| % config + ), + ]) + else if platform == 'AIX' then memoryGaugePanelPrototype - .addTarget(prometheus.target( - ||| - 100 - - ( - avg(node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / - avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) - * 100 - ) - ||| % config - )), + + gaugePanel.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + 100 - + ( + avg(node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) / + avg(node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}) + * 100 + ) + ||| % config + ), + ]), local diskIO = - graphPanel.new( - 'Disk I/O', - datasource='$datasource', - span=6, - min=0, - fill=0, - ) - // TODO: Does it make sense to have those three in the same panel? - .addTarget(prometheus.target( - 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, - legendFormat='{{device}} read', - intervalFactor=1, - )) - .addTarget(prometheus.target( - 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, - legendFormat='{{device}} written', - intervalFactor=1, - )) - .addTarget(prometheus.target( - 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, - legendFormat='{{device}} io time', - intervalFactor=1, - )) + - { - seriesOverrides: [ - { - alias: '/ read| written/', - yaxis: 1, - }, - { - alias: '/ io time/', - yaxis: 2, - }, - ], - yaxes: [ - self.yaxe(format='Bps'), - self.yaxe(format='percentunit'), - ], - }, + timeSeriesPanel.new('Disk I/O') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsCustom.withShowPoints('never') + + tsOptions.tooltip.withMode('multi') + + tsQueryOptions.withTargets([ + // TODO: Does it make sense to have those three in the same panel? + prometheus.new('$datasource', 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config) + + prometheus.withLegendFormat('{{device}} read') + + prometheus.withIntervalFactor(1), + prometheus.new('$datasource', 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config) + + prometheus.withLegendFormat('{{device}} written') + + prometheus.withIntervalFactor(1), + prometheus.new('$datasource', 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config) + + prometheus.withLegendFormat('{{device}} io time') + + prometheus.withIntervalFactor(1), + ]) + + tsStandardOptions.withOverrides( + [ + tsStandardOptions.override.byRegexp.new('/ read| written/') + + tsStandardOptions.override.byRegexp.withPropertiesFromOptions( + tsStandardOptions.withUnit('Bps') + ), + tsStandardOptions.override.byRegexp.new('/ io time/') + + tsStandardOptions.override.byRegexp.withPropertiesFromOptions(tsStandardOptions.withUnit('percentunit')), + ] + ), local diskSpaceUsage = - table.new( - title='Disk Space Usage', - datasource='$datasource', - ) - .setFieldConfig(unit='decbytes') - .addThresholdStep(color='green', value=null) - .addThresholdStep(color='yellow', value=0.8) - .addThresholdStep(color='red', value=0.9) - .addTarget(prometheus.target( - ||| - max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) - ||| % config, - legendFormat='', - instant=true, - format='table' - )) - .addTarget(prometheus.target( - ||| - max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) - ||| % config, - legendFormat='', - instant=true, - format='table' - )) - .addOverride( - matcher={ - id: 'byName', - options: 'Mounted on', - }, - properties=[ - { - id: 'custom.width', - value: 260, - }, - ], - ) - .addOverride( - matcher={ - id: 'byName', - options: 'Size', - }, - properties=[ - - { - id: 'custom.width', - value: 93, - }, - - ], - ) - .addOverride( - matcher={ - id: 'byName', - options: 'Used', - }, - properties=[ - { - id: 'custom.width', - value: 72, - }, - ], - ) - .addOverride( - matcher={ - id: 'byName', - options: 'Available', - }, - properties=[ - { - id: 'custom.width', - value: 88, - }, - ], - ) - - .addOverride( - matcher={ - id: 'byName', - options: 'Used, %', - }, - properties=[ - { - id: 'unit', - value: 'percentunit', - }, - { - id: 'custom.displayMode', - value: 'gradient-gauge', - }, - { - id: 'max', - value: 1, - }, - { - id: 'min', - value: 0, - }, + table.new('Disk Space Usage') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + table.standardOptions.withUnit('decbytes') + + table.standardOptions.thresholds.withSteps( + [ + tableStep.withColor('green'), + tableStep.withColor('yellow') + gaugeStep.withValue(0.8), + tableStep.withColor('red') + gaugeStep.withValue(0.9), ] ) - + { span: 6 } - + { - transformations: [ + + table.queryOptions.withTargets([ + prometheus.new( + '$datasource', + ||| + max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) + ||| % config + ) + + prometheus.withLegendFormat('') + + prometheus.withInstant() + + prometheus.withFormat('table'), + prometheus.new( + '$datasource', + ||| + max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s}) + ||| % config + ) + + prometheus.withLegendFormat('') + + prometheus.withInstant() + + prometheus.withFormat('table'), + ]) + + table.standardOptions.withOverrides([ + tableOverride.byName.new('Mounted on') + + tableOverride.byName.withProperty('custom.width', 260), + tableOverride.byName.new('Size') + + tableOverride.byName.withProperty('custom.width', 93), + tableOverride.byName.new('Used') + + tableOverride.byName.withProperty('custom.width', 72), + tableOverride.byName.new('Available') + + tableOverride.byName.withProperty('custom.width', 88), + tableOverride.byName.new('Used, %') + + tableOverride.byName.withProperty('unit', 'percentunit') + + tableOverride.byName.withPropertiesFromOptions( + table.fieldConfig.defaults.custom.withCellOptions( + { type: 'gauge' }, + ) + ) + + tableOverride.byName.withProperty('max', 1) + + tableOverride.byName.withProperty('min', 0), + ]) + + table.queryOptions.withTransformations([ + tableTransformation.withId('groupBy') + + tableTransformation.withOptions( { - id: 'groupBy', - options: { - fields: { - 'Value #A': { - aggregations: [ - 'lastNotNull', - ], - operation: 'aggregate', - }, - 'Value #B': { - aggregations: [ - 'lastNotNull', - ], - operation: 'aggregate', - }, - mountpoint: { - aggregations: [], - operation: 'groupby', - }, + fields: { + 'Value #A': { + aggregations: [ + 'lastNotNull', + ], + operation: 'aggregate', + }, + 'Value #B': { + aggregations: [ + 'lastNotNull', + ], + operation: 'aggregate', + }, + mountpoint: { + aggregations: [], + operation: 'groupby', }, }, - }, + } + ), + tableTransformation.withId('merge'), + tableTransformation.withId('calculateField') + + tableTransformation.withOptions( { - id: 'merge', - options: {}, - }, - { - id: 'calculateField', - options: { - alias: 'Used', - binary: { - left: 'Value #A (lastNotNull)', - operator: '-', - reducer: 'sum', - right: 'Value #B (lastNotNull)', - }, - mode: 'binary', - reduce: { - reducer: 'sum', - }, + alias: 'Used', + binary: { + left: 'Value #A (lastNotNull)', + operator: '-', + reducer: 'sum', + right: 'Value #B (lastNotNull)', }, - }, - { - id: 'calculateField', - options: { - alias: 'Used, %', - binary: { - left: 'Used', - operator: '/', - reducer: 'sum', - right: 'Value #A (lastNotNull)', - }, - mode: 'binary', - reduce: { - reducer: 'sum', - }, + mode: 'binary', + reduce: { + reducer: 'sum', }, - }, + } + ), + tableTransformation.withId('calculateField') + + tableTransformation.withOptions( { - id: 'organize', - options: { - excludeByName: {}, - indexByName: {}, - renameByName: { - 'Value #A (lastNotNull)': 'Size', - 'Value #B (lastNotNull)': 'Available', - mountpoint: 'Mounted on', + alias: 'Used, %', + binary: { + left: 'Used', + operator: '/', + reducer: 'sum', + right: 'Value #A (lastNotNull)', + }, + mode: 'binary', + reduce: { + reducer: 'sum', + }, + } + ), + tableTransformation.withId('organize') + + tableTransformation.withOptions( + { + excludeByName: {}, + indexByName: {}, + renameByName: { + 'Value #A (lastNotNull)': 'Size', + 'Value #B (lastNotNull)': 'Available', + mountpoint: 'Mounted on', + }, + } + ), + tableTransformation.withId('sortBy') + + tableTransformation.withOptions( + { + fields: {}, + sort: [ + { + field: 'Mounted on', }, - }, - }, - { - id: 'sortBy', - options: { - fields: {}, - sort: [ - { - field: 'Mounted on', - }, - ], - }, - }, - ], - }, + ], + } + ), + ]), local networkReceived = - graphPanel.new( - 'Network Received', - description='Network received (bits/s)', - datasource='$datasource', - span=6, - format='bps', - min=0, - fill=0, - ) - .addTarget(prometheus.target( - 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config, - legendFormat='{{device}}', - intervalFactor=1, - )), + timeSeriesPanel.new('Network Received') + + timeSeriesPanel.panelOptions.withDescription('Network received (bits/s)') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('bps') + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsCustom.withShowPoints('never') + + tsOptions.tooltip.withMode('multi') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config) + + prometheus.withLegendFormat('{{device}}') + + prometheus.withIntervalFactor(1), + ]), local networkTransmitted = - graphPanel.new( - 'Network Transmitted', - description='Network transmitted (bits/s)', - datasource='$datasource', - span=6, - format='bps', - min=0, - fill=0, - ) - .addTarget(prometheus.target( - 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config, - legendFormat='{{device}}', - intervalFactor=1, - )), + timeSeriesPanel.new('Network Transmitted') + + timeSeriesPanel.panelOptions.withDescription('Network transmitted (bits/s)') + + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable) + + tsStandardOptions.withUnit('bps') + + tsStandardOptions.withMin(0) + + tsCustom.withFillOpacity(0) + + tsOptions.tooltip.withMode('multi') + + tsQueryOptions.withTargets([ + prometheus.new('$datasource', 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config) + + prometheus.withLegendFormat('{{device}}') + + prometheus.withIntervalFactor(1), + ]), local cpuRow = row.new('CPU') - .addPanel(idleCPU) - .addPanel(systemLoad), + + row.withPanels([ + idleCPU, + systemLoad, + ]), - local memoryRow = - row.new('Memory') - .addPanel(memoryGraph) - .addPanel(memoryGauge), + local memoryRow = [ + row.new('Memory') + row.gridPos.withY(8), + memoryGraph + row.gridPos.withX(0) + row.gridPos.withY(9) + row.gridPos.withH(7) + row.gridPos.withW(18), + memoryGauge + row.gridPos.withX(18) + row.gridPos.withY(9) + row.gridPos.withH(7) + row.gridPos.withW(6), + ], local diskRow = row.new('Disk') - .addPanel(diskIO) - .addPanel(diskSpaceUsage), + + row.withPanels([ + diskIO, + diskSpaceUsage, + ]), local networkRow = row.new('Network') - .addPanel(networkReceived) - .addPanel(networkTransmitted), + + row.withPanels([ + networkReceived, + networkTransmitted, + ]), - local rows = - [ + local panels = + grafana.util.grid.makeGrid([ cpuRow, - memoryRow, + ], panelWidth=12, panelHeight=7) + + memoryRow + + grafana.util.grid.makeGrid([ diskRow, networkRow, - ], + ], panelWidth=12, panelHeight=7, startY=18), - local templates = + local variables = [ - prometheusDatasourceTemplate, - clusterTemplate, - instanceTemplate, + prometheusDatasourceVariable, + clusterVariable, + instanceVariable, ], - dashboard: if platform == 'Linux' then dashboard.new( '%sNodes' % config.dashboardNamePrefix, - time_from='now-1h', - tags=(config.dashboardTags), - timezone='utc', - refresh='30s', - uid=std.md5(uid), - graphTooltip='shared_crosshair' ) - .addTemplates(templates) - .addRows(rows) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags(config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.withUid(std.md5(uid)) + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withVariables(variables) + + dashboard.withPanels(panels) else if platform == 'Darwin' then dashboard.new( '%sMacOS' % config.dashboardNamePrefix, - time_from='now-1h', - tags=(config.dashboardTags), - timezone='utc', - refresh='30s', - uid=std.md5(uid), - graphTooltip='shared_crosshair' ) - .addTemplates(templates) - .addRows(rows) + + dashboard.time.withFrom('now-1h') + + dashboard.withTags(config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.withUid(std.md5(uid)) + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withVariables(variables) + + dashboard.withPanels(panels) else if platform == 'AIX' then dashboard.new( '%sAIX' % config.dashboardNamePrefix, - time_from='now-1h', - tags=(config.dashboardTags), - timezone='utc', - refresh='30s', - uid=std.md5(uid), - graphTooltip='shared_crosshair' ) - .addTemplates(templates) - .addRows(rows), + + dashboard.time.withFrom('now-1h') + + dashboard.withTags(config.dashboardTags) + + dashboard.withTimezone('utc') + + dashboard.withRefresh('30s') + + dashboard.withUid(std.md5(uid)) + + dashboard.graphTooltip.withSharedCrosshair() + + dashboard.withVariables(variables) + + dashboard.withPanels(panels), }, } diff --git a/go.mod b/go.mod index e3162464..2fc10c0f 100644 --- a/go.mod +++ b/go.mod @@ -18,18 +18,18 @@ require ( github.com/mattn/go-xmlrpc v0.0.3 github.com/mdlayher/ethtool v0.2.0 github.com/mdlayher/netlink v1.7.2 - github.com/mdlayher/wifi v0.3.0 + github.com/mdlayher/wifi v0.3.1 github.com/opencontainers/selinux v1.11.1 github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 github.com/prometheus-community/go-runit v0.1.0 github.com/prometheus/client_golang v1.20.5 github.com/prometheus/client_model v0.6.1 - github.com/prometheus/common v0.60.1 - github.com/prometheus/exporter-toolkit v0.13.1 - github.com/prometheus/procfs v0.15.1 - github.com/safchain/ethtool v0.4.1 + github.com/prometheus/common v0.61.0 + github.com/prometheus/exporter-toolkit v0.13.2 + github.com/prometheus/procfs v0.15.2-0.20240603130017-1754b780536b // == v0.15.1 + https://github.com/prometheus/procfs/commit/1754b780536bb81082baa913e04cc4fff4d2baea + github.com/safchain/ethtool v0.5.9 golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 - golang.org/x/sys v0.26.0 + golang.org/x/sys v0.28.0 howett.net/plist v1.0.1 ) @@ -51,11 +51,11 @@ require ( github.com/xhit/go-str2duration/v2 v2.1.0 // indirect go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.6.0 // indirect - golang.org/x/crypto v0.28.0 // indirect - golang.org/x/net v0.29.0 // indirect - golang.org/x/oauth2 v0.23.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/text v0.19.0 // indirect - google.golang.org/protobuf v1.34.2 // indirect + golang.org/x/crypto v0.31.0 // indirect + golang.org/x/net v0.32.0 // indirect + golang.org/x/oauth2 v0.24.0 // indirect + golang.org/x/sync v0.10.0 // indirect + golang.org/x/text v0.21.0 // indirect + google.golang.org/protobuf v1.35.2 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect ) diff --git a/go.sum b/go.sum index 92307274..919dfa3e 100644 --- a/go.sum +++ b/go.sum @@ -61,8 +61,8 @@ github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA= github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ= github.com/mdlayher/vsock v1.2.1/go.mod h1:NRfCibel++DgeMD8z/hP+PPTjlNJsdPOmxcnENvE+SE= -github.com/mdlayher/wifi v0.3.0 h1:ZfS81w/7xTWBJfhM77K0k6m3sJckwoNOoZUwOW34omo= -github.com/mdlayher/wifi v0.3.0/go.mod h1:/bdkqKYl+lD4recmQM6bTHxMrEUW70reibTyr93CAd0= +github.com/mdlayher/wifi v0.3.1 h1:bZDuMI1f7z5BtUUO3NgHRdR/R88YtywIe6dsEFI0Txs= +github.com/mdlayher/wifi v0.3.1/go.mod h1:ODQaObvsglghTuNhezD9grkTB4shVNc28aJfTXmvSi8= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= @@ -79,48 +79,48 @@ github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+ github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= -github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= -github.com/prometheus/exporter-toolkit v0.13.1 h1:Evsh0gWQo2bdOHlnz9+0Nm7/OFfIwhE2Ws4A2jIlR04= -github.com/prometheus/exporter-toolkit v0.13.1/go.mod h1:ujdv2YIOxtdFxxqtloLpbqmxd5J0Le6IITUvIRSWjj0= -github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= -github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFSRQQ= +github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s= +github.com/prometheus/exporter-toolkit v0.13.2 h1:Z02fYtbqTMy2i/f+xZ+UK5jy/bl1Ex3ndzh06T/Q9DQ= +github.com/prometheus/exporter-toolkit v0.13.2/go.mod h1:tCqnfx21q6qN1KA4U3Bfb8uWzXfijIrJz3/kTIqMV7g= +github.com/prometheus/procfs v0.15.2-0.20240603130017-1754b780536b h1:4EJkx3vycI+n5JY5ht+bnSUGamkmmXkpcNeO/OBT/0A= +github.com/prometheus/procfs v0.15.2-0.20240603130017-1754b780536b/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= -github.com/safchain/ethtool v0.4.1 h1:S6mEleTADqgynileXoiapt/nKnatyR6bmIHoF+h2ADo= -github.com/safchain/ethtool v0.4.1/go.mod h1:XLLnZmy4OCRTkksP/UiMjij96YmIsBfmBQcs7H6tA48= +github.com/safchain/ethtool v0.5.9 h1://6RvaOKFf3nQ0rl5+8zBbE4/72455VC9Jq61pfq67E= +github.com/safchain/ethtool v0.5.9/go.mod h1:w8oSsZeowyRaM7xJJBAbubzzrOkwO8TBgPSEqPP/5mg= github.com/siebenmann/go-kstat v0.0.0-20210513183136-173c9b0a9973 h1:GfSdC6wKfTGcgCS7BtzF5694Amne1pGCSTY252WhlEY= github.com/siebenmann/go-kstat v0.0.0-20210513183136-173c9b0a9973/go.mod h1:G81aIFAMS9ECrwBYR9YxhlPjWgrItd+Kje78O6+uqm8= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/xhit/go-str2duration/v2 v2.1.0 h1:lxklc02Drh6ynqX+DdPyp5pCKLUQpRT8bp8Ydu2Bstc= github.com/xhit/go-str2duration/v2 v2.1.0/go.mod h1:ohY8p+0f07DiV6Em5LKB0s2YpLtXVyJfNt1+BlmyAsU= go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= -golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= -golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= -golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= -golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= +golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= +golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= +golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20211031064116-611d5d643895/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= +google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=