Fix seconds reported by schedstat (#1426)

Upstream bugfix: https://github.com/prometheus/procfs/pull/191

Signed-off-by: Phil Frost <phil@postmates.com>
This commit is contained in:
Phil Frost 2019-08-06 13:08:06 -04:00 committed by Ben Kochie
parent 75462bf4fe
commit 26d4fbdf07
10 changed files with 509 additions and 40 deletions

View file

@ -2440,16 +2440,16 @@ node_qdisc_requeues_total{device="eth0",kind="pfifo_fast"} 2
node_qdisc_requeues_total{device="wlan0",kind="fq"} 1
# HELP node_schedstat_running_seconds_total Number of seconds CPU spent running a process.
# TYPE node_schedstat_running_seconds_total counter
node_schedstat_running_seconds_total{cpu="0"} 2.045936778163039e+13
node_schedstat_running_seconds_total{cpu="1"} 1.904686152592476e+13
node_schedstat_running_seconds_total{cpu="0"} 2.045936778163039e+06
node_schedstat_running_seconds_total{cpu="1"} 1.904686152592476e+06
# HELP node_schedstat_timeslices_total Number of timeslices executed by CPU.
# TYPE node_schedstat_timeslices_total counter
node_schedstat_timeslices_total{cpu="0"} 4.767485306e+09
node_schedstat_timeslices_total{cpu="1"} 5.145567945e+09
# HELP node_schedstat_waiting_seconds_total Number of seconds spent by processing waiting for this CPU.
# TYPE node_schedstat_waiting_seconds_total counter
node_schedstat_waiting_seconds_total{cpu="0"} 3.43796328169361e+12
node_schedstat_waiting_seconds_total{cpu="1"} 3.64107263788241e+12
node_schedstat_waiting_seconds_total{cpu="0"} 343796.328169361
node_schedstat_waiting_seconds_total{cpu="1"} 364107.263788241
# HELP node_scrape_collector_duration_seconds node_exporter: Duration of a collector scrape.
# TYPE node_scrape_collector_duration_seconds gauge
# HELP node_scrape_collector_success node_exporter: Whether a collector succeeded.

View file

@ -2440,16 +2440,16 @@ node_qdisc_requeues_total{device="eth0",kind="pfifo_fast"} 2
node_qdisc_requeues_total{device="wlan0",kind="fq"} 1
# HELP node_schedstat_running_seconds_total Number of seconds CPU spent running a process.
# TYPE node_schedstat_running_seconds_total counter
node_schedstat_running_seconds_total{cpu="0"} 2.045936778163039e+13
node_schedstat_running_seconds_total{cpu="1"} 1.904686152592476e+13
node_schedstat_running_seconds_total{cpu="0"} 2.045936778163039e+06
node_schedstat_running_seconds_total{cpu="1"} 1.904686152592476e+06
# HELP node_schedstat_timeslices_total Number of timeslices executed by CPU.
# TYPE node_schedstat_timeslices_total counter
node_schedstat_timeslices_total{cpu="0"} 4.767485306e+09
node_schedstat_timeslices_total{cpu="1"} 5.145567945e+09
# HELP node_schedstat_waiting_seconds_total Number of seconds spent by processing waiting for this CPU.
# TYPE node_schedstat_waiting_seconds_total counter
node_schedstat_waiting_seconds_total{cpu="0"} 3.43796328169361e+12
node_schedstat_waiting_seconds_total{cpu="1"} 3.64107263788241e+12
node_schedstat_waiting_seconds_total{cpu="0"} 343796.328169361
node_schedstat_waiting_seconds_total{cpu="1"} 364107.263788241
# HELP node_scrape_collector_duration_seconds node_exporter: Duration of a collector scrape.
# TYPE node_scrape_collector_duration_seconds gauge
# HELP node_scrape_collector_success node_exporter: Whether a collector succeeded.

View file

@ -20,6 +20,8 @@ import (
"github.com/prometheus/procfs"
)
const nsPerSec = 1e9
var (
runningSecondsTotal = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "schedstat", "running_seconds_total"),
@ -71,14 +73,14 @@ func (c *schedstatCollector) Update(ch chan<- prometheus.Metric) error {
ch <- prometheus.MustNewConstMetric(
runningSecondsTotal,
prometheus.CounterValue,
cpu.RunningSeconds(),
float64(cpu.RunningNanoseconds)/nsPerSec,
cpu.CPUNum,
)
ch <- prometheus.MustNewConstMetric(
waitingSecondsTotal,
prometheus.CounterValue,
cpu.WaitingSeconds(),
float64(cpu.WaitingNanoseconds)/nsPerSec,
cpu.CPUNum,
)

2
go.mod
View file

@ -15,7 +15,7 @@ require (
github.com/prometheus/client_golang v1.0.0
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90
github.com/prometheus/common v0.4.1
github.com/prometheus/procfs v0.0.4-0.20190702183519-8f55e607908e
github.com/prometheus/procfs v0.0.4-0.20190731153504-5da962fa40f1
github.com/siebenmann/go-kstat v0.0.0-20160321171754-d34789b79745
github.com/sirupsen/logrus v1.4.2 // indirect
github.com/soundcloud/go-runit v0.0.0-20150630195641-06ad41a06c4a

4
go.sum
View file

@ -68,8 +68,8 @@ github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2 h1:6LJUbpNm42llc4HRCuvApCSWB/WfhuNo9K98Q9sNGfs=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.4-0.20190702183519-8f55e607908e h1:p57e/ejwNofSHhEh+d7KoCVpHSTN3efX5Aj3z0jGWIE=
github.com/prometheus/procfs v0.0.4-0.20190702183519-8f55e607908e/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ=
github.com/prometheus/procfs v0.0.4-0.20190731153504-5da962fa40f1 h1:uqK/YnaVFq1uofHlzj+IR4HhCYA/nbrvJ431l7cm7Vs=
github.com/prometheus/procfs v0.0.4-0.20190731153504-5da962fa40f1/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ=
github.com/siebenmann/go-kstat v0.0.0-20160321171754-d34789b79745 h1:IuH7WumZNax0D+rEqmy2TyhKCzrtMGqbZO0b8rO00JA=
github.com/siebenmann/go-kstat v0.0.0-20160321171754-d34789b79745/go.mod h1:G81aIFAMS9ECrwBYR9YxhlPjWgrItd+Kje78O6+uqm8=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=

View file

@ -909,6 +909,272 @@ Lines: 1
0
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: fixtures/proc/zoneinfo
Lines: 262
Node 0, zone DMA
per-node stats
nr_inactive_anon 230981
nr_active_anon 547580
nr_inactive_file 316904
nr_active_file 346282
nr_unevictable 115467
nr_slab_reclaimable 131220
nr_slab_unreclaimable 47320
nr_isolated_anon 0
nr_isolated_file 0
workingset_nodes 11627
workingset_refault 466886
workingset_activate 276925
workingset_restore 84055
workingset_nodereclaim 487
nr_anon_pages 795576
nr_mapped 215483
nr_file_pages 761874
nr_dirty 908
nr_writeback 0
nr_writeback_temp 0
nr_shmem 224925
nr_shmem_hugepages 0
nr_shmem_pmdmapped 0
nr_anon_transparent_hugepages 0
nr_unstable 0
nr_vmscan_write 12950
nr_vmscan_immediate_reclaim 3033
nr_dirtied 8007423
nr_written 7752121
nr_kernel_misc_reclaimable 0
pages free 3952
min 33
low 41
high 49
spanned 4095
present 3975
managed 3956
protection: (0, 2877, 7826, 7826, 7826)
nr_free_pages 3952
nr_zone_inactive_anon 0
nr_zone_active_anon 0
nr_zone_inactive_file 0
nr_zone_active_file 0
nr_zone_unevictable 0
nr_zone_write_pending 0
nr_mlock 0
nr_page_table_pages 0
nr_kernel_stack 0
nr_bounce 0
nr_zspages 0
nr_free_cma 0
numa_hit 1
numa_miss 0
numa_foreign 0
numa_interleave 0
numa_local 1
numa_other 0
pagesets
cpu: 0
count: 0
high: 0
batch: 1
vm stats threshold: 8
cpu: 1
count: 0
high: 0
batch: 1
vm stats threshold: 8
cpu: 2
count: 0
high: 0
batch: 1
vm stats threshold: 8
cpu: 3
count: 0
high: 0
batch: 1
vm stats threshold: 8
cpu: 4
count: 0
high: 0
batch: 1
vm stats threshold: 8
cpu: 5
count: 0
high: 0
batch: 1
vm stats threshold: 8
cpu: 6
count: 0
high: 0
batch: 1
vm stats threshold: 8
cpu: 7
count: 0
high: 0
batch: 1
vm stats threshold: 8
node_unreclaimable: 0
start_pfn: 1
Node 0, zone DMA32
pages free 204252
min 19510
low 21059
high 22608
spanned 1044480
present 759231
managed 742806
protection: (0, 0, 4949, 4949, 4949)
nr_free_pages 204252
nr_zone_inactive_anon 118558
nr_zone_active_anon 106598
nr_zone_inactive_file 75475
nr_zone_active_file 70293
nr_zone_unevictable 66195
nr_zone_write_pending 64
nr_mlock 4
nr_page_table_pages 1756
nr_kernel_stack 2208
nr_bounce 0
nr_zspages 0
nr_free_cma 0
numa_hit 113952967
numa_miss 0
numa_foreign 0
numa_interleave 0
numa_local 113952967
numa_other 0
pagesets
cpu: 0
count: 345
high: 378
batch: 63
vm stats threshold: 48
cpu: 1
count: 356
high: 378
batch: 63
vm stats threshold: 48
cpu: 2
count: 325
high: 378
batch: 63
vm stats threshold: 48
cpu: 3
count: 346
high: 378
batch: 63
vm stats threshold: 48
cpu: 4
count: 321
high: 378
batch: 63
vm stats threshold: 48
cpu: 5
count: 316
high: 378
batch: 63
vm stats threshold: 48
cpu: 6
count: 373
high: 378
batch: 63
vm stats threshold: 48
cpu: 7
count: 339
high: 378
batch: 63
vm stats threshold: 48
node_unreclaimable: 0
start_pfn: 4096
Node 0, zone Normal
pages free 18553
min 11176
low 13842
high 16508
spanned 1308160
present 1308160
managed 1268711
protection: (0, 0, 0, 0, 0)
nr_free_pages 18553
nr_zone_inactive_anon 112423
nr_zone_active_anon 440982
nr_zone_inactive_file 241429
nr_zone_active_file 275989
nr_zone_unevictable 49272
nr_zone_write_pending 844
nr_mlock 154
nr_page_table_pages 9750
nr_kernel_stack 15136
nr_bounce 0
nr_zspages 0
nr_free_cma 0
numa_hit 162718019
numa_miss 0
numa_foreign 0
numa_interleave 26812
numa_local 162718019
numa_other 0
pagesets
cpu: 0
count: 316
high: 378
batch: 63
vm stats threshold: 56
cpu: 1
count: 366
high: 378
batch: 63
vm stats threshold: 56
cpu: 2
count: 60
high: 378
batch: 63
vm stats threshold: 56
cpu: 3
count: 256
high: 378
batch: 63
vm stats threshold: 56
cpu: 4
count: 253
high: 378
batch: 63
vm stats threshold: 56
cpu: 5
count: 159
high: 378
batch: 63
vm stats threshold: 56
cpu: 6
count: 311
high: 378
batch: 63
vm stats threshold: 56
cpu: 7
count: 264
high: 378
batch: 63
vm stats threshold: 56
node_unreclaimable: 0
start_pfn: 1048576
Node 0, zone Movable
pages free 0
min 0
low 0
high 0
spanned 0
present 0
managed 0
protection: (0, 0, 0, 0, 0)
Node 0, zone Device
pages free 0
min 0
low 0
high 0
spanned 0
present 0
managed 0
protection: (0, 0, 0, 0, 0)
Mode: 444
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: fixtures/sys
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

View file

@ -49,6 +49,21 @@ func ParseUint64s(ss []string) ([]uint64, error) {
return us, nil
}
// ParsePInt64s parses a slice of strings into a slice of int64 pointers.
func ParsePInt64s(ss []string) ([]*int64, error) {
us := make([]*int64, 0, len(ss))
for _, s := range ss {
u, err := strconv.ParseInt(s, 10, 64)
if err != nil {
return nil, err
}
us = append(us, &u)
}
return us, nil
}
// ReadUintFromFile reads a file and attempts to parse a uint64 from it.
func ReadUintFromFile(path string) (uint64, error) {
data, err := ioutil.ReadFile(path)

View file

@ -31,6 +31,12 @@ var (
// See
// https://www.kernel.org/doc/Documentation/scheduler/sched-stats.txt
// for a detailed description of what these numbers mean.
//
// Note the current kernel documentation claims some of the time units are in
// jiffies when they are actually in nanoseconds since 2.6.23 with the
// introduction of CFS. A fix to the documentation is pending. See
// https://lore.kernel.org/patchwork/project/lkml/list/?series=403473
type Schedstat struct {
CPUs []*SchedstatCPU
}
@ -39,16 +45,16 @@ type Schedstat struct {
type SchedstatCPU struct {
CPUNum string
RunningJiffies uint64
WaitingJiffies uint64
RunTimeslices uint64
RunningNanoseconds uint64
WaitingNanoseconds uint64
RunTimeslices uint64
}
// ProcSchedstat contains the values from /proc/<pid>/schedstat
type ProcSchedstat struct {
RunningJiffies uint64
WaitingJiffies uint64
RunTimeslices uint64
RunningNanoseconds uint64
WaitingNanoseconds uint64
RunTimeslices uint64
}
func (fs FS) Schedstat() (*Schedstat, error) {
@ -67,12 +73,12 @@ func (fs FS) Schedstat() (*Schedstat, error) {
cpu := &SchedstatCPU{}
cpu.CPUNum = match[1]
cpu.RunningJiffies, err = strconv.ParseUint(match[8], 10, 64)
cpu.RunningNanoseconds, err = strconv.ParseUint(match[8], 10, 64)
if err != nil {
continue
}
cpu.WaitingJiffies, err = strconv.ParseUint(match[9], 10, 64)
cpu.WaitingNanoseconds, err = strconv.ParseUint(match[9], 10, 64)
if err != nil {
continue
}
@ -93,12 +99,12 @@ func parseProcSchedstat(contents string) (stats ProcSchedstat, err error) {
match := procLineRE.FindStringSubmatch(contents)
if match != nil {
stats.RunningJiffies, err = strconv.ParseUint(match[1], 10, 64)
stats.RunningNanoseconds, err = strconv.ParseUint(match[1], 10, 64)
if err != nil {
return
}
stats.WaitingJiffies, err = strconv.ParseUint(match[2], 10, 64)
stats.WaitingNanoseconds, err = strconv.ParseUint(match[2], 10, 64)
if err != nil {
return
}
@ -110,19 +116,3 @@ func parseProcSchedstat(contents string) (stats ProcSchedstat, err error) {
err = errors.New("could not parse schedstat")
return
}
func (stat *SchedstatCPU) RunningSeconds() float64 {
return float64(stat.RunningJiffies) / userHZ
}
func (stat *SchedstatCPU) WaitingSeconds() float64 {
return float64(stat.WaitingJiffies) / userHZ
}
func (stat *ProcSchedstat) RunningSeconds() float64 {
return float64(stat.RunningJiffies) / userHZ
}
func (stat *ProcSchedstat) WaitingSeconds() float64 {
return float64(stat.WaitingJiffies) / userHZ
}

196
vendor/github.com/prometheus/procfs/zoneinfo.go generated vendored Normal file
View file

@ -0,0 +1,196 @@
// Copyright 2019 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build !windows
package procfs
import (
"bytes"
"fmt"
"io/ioutil"
"regexp"
"strings"
"github.com/prometheus/procfs/internal/util"
)
// Zoneinfo holds info parsed from /proc/zoneinfo.
type Zoneinfo struct {
Node string
Zone string
NrFreePages *int64
Min *int64
Low *int64
High *int64
Scanned *int64
Spanned *int64
Present *int64
Managed *int64
NrActiveAnon *int64
NrInactiveAnon *int64
NrIsolatedAnon *int64
NrAnonPages *int64
NrAnonTransparentHugepages *int64
NrActiveFile *int64
NrInactiveFile *int64
NrIsolatedFile *int64
NrFilePages *int64
NrSlabReclaimable *int64
NrSlabUnreclaimable *int64
NrMlockStack *int64
NrKernelStack *int64
NrMapped *int64
NrDirty *int64
NrWriteback *int64
NrUnevictable *int64
NrShmem *int64
NrDirtied *int64
NrWritten *int64
NumaHit *int64
NumaMiss *int64
NumaForeign *int64
NumaInterleave *int64
NumaLocal *int64
NumaOther *int64
Protection []*int64
}
var nodeZoneRE = regexp.MustCompile(`(\d+), zone\s+(\w+)`)
// Zoneinfo parses an zoneinfo-file (/proc/zoneinfo) and returns a slice of
// structs containing the relevant info. More information available here:
// https://www.kernel.org/doc/Documentation/sysctl/vm.txt
func (fs FS) Zoneinfo() ([]Zoneinfo, error) {
data, err := ioutil.ReadFile(fs.proc.Path("zoneinfo"))
if err != nil {
return nil, fmt.Errorf("error reading zoneinfo %s: %s", fs.proc.Path("zoneinfo"), err)
}
zoneinfo, err := parseZoneinfo(data)
if err != nil {
return nil, fmt.Errorf("error parsing zoneinfo %s: %s", fs.proc.Path("zoneinfo"), err)
}
return zoneinfo, nil
}
func parseZoneinfo(zoneinfoData []byte) ([]Zoneinfo, error) {
zoneinfo := []Zoneinfo{}
zoneinfoBlocks := bytes.Split(zoneinfoData, []byte("\nNode"))
for _, block := range zoneinfoBlocks {
var zoneinfoElement Zoneinfo
lines := strings.Split(string(block), "\n")
for _, line := range lines {
if nodeZone := nodeZoneRE.FindStringSubmatch(line); nodeZone != nil {
zoneinfoElement.Node = nodeZone[1]
zoneinfoElement.Zone = nodeZone[2]
continue
}
if strings.HasPrefix(strings.TrimSpace(line), "per-node stats") {
zoneinfoElement.Zone = ""
continue
}
parts := strings.Fields(strings.TrimSpace(line))
if len(parts) < 2 {
continue
}
vp := util.NewValueParser(parts[1])
switch parts[0] {
case "nr_free_pages":
zoneinfoElement.NrFreePages = vp.PInt64()
case "min":
zoneinfoElement.Min = vp.PInt64()
case "low":
zoneinfoElement.Low = vp.PInt64()
case "high":
zoneinfoElement.High = vp.PInt64()
case "scanned":
zoneinfoElement.Scanned = vp.PInt64()
case "spanned":
zoneinfoElement.Spanned = vp.PInt64()
case "present":
zoneinfoElement.Present = vp.PInt64()
case "managed":
zoneinfoElement.Managed = vp.PInt64()
case "nr_active_anon":
zoneinfoElement.NrActiveAnon = vp.PInt64()
case "nr_inactive_anon":
zoneinfoElement.NrInactiveAnon = vp.PInt64()
case "nr_isolated_anon":
zoneinfoElement.NrIsolatedAnon = vp.PInt64()
case "nr_anon_pages":
zoneinfoElement.NrAnonPages = vp.PInt64()
case "nr_anon_transparent_hugepages":
zoneinfoElement.NrAnonTransparentHugepages = vp.PInt64()
case "nr_active_file":
zoneinfoElement.NrActiveFile = vp.PInt64()
case "nr_inactive_file":
zoneinfoElement.NrInactiveFile = vp.PInt64()
case "nr_isolated_file":
zoneinfoElement.NrIsolatedFile = vp.PInt64()
case "nr_file_pages":
zoneinfoElement.NrFilePages = vp.PInt64()
case "nr_slab_reclaimable":
zoneinfoElement.NrSlabReclaimable = vp.PInt64()
case "nr_slab_unreclaimable":
zoneinfoElement.NrSlabUnreclaimable = vp.PInt64()
case "nr_mlock_stack":
zoneinfoElement.NrMlockStack = vp.PInt64()
case "nr_kernel_stack":
zoneinfoElement.NrKernelStack = vp.PInt64()
case "nr_mapped":
zoneinfoElement.NrMapped = vp.PInt64()
case "nr_dirty":
zoneinfoElement.NrDirty = vp.PInt64()
case "nr_writeback":
zoneinfoElement.NrWriteback = vp.PInt64()
case "nr_unevictable":
zoneinfoElement.NrUnevictable = vp.PInt64()
case "nr_shmem":
zoneinfoElement.NrShmem = vp.PInt64()
case "nr_dirtied":
zoneinfoElement.NrDirtied = vp.PInt64()
case "nr_written":
zoneinfoElement.NrWritten = vp.PInt64()
case "numa_hit":
zoneinfoElement.NumaHit = vp.PInt64()
case "numa_miss":
zoneinfoElement.NumaMiss = vp.PInt64()
case "numa_foreign":
zoneinfoElement.NumaForeign = vp.PInt64()
case "numa_interleave":
zoneinfoElement.NumaInterleave = vp.PInt64()
case "numa_local":
zoneinfoElement.NumaLocal = vp.PInt64()
case "numa_other":
zoneinfoElement.NumaOther = vp.PInt64()
case "protection:":
protectionParts := strings.Split(line, ":")
protectionValues := strings.Replace(protectionParts[1], "(", "", 1)
protectionValues = strings.Replace(protectionValues, ")", "", 1)
protectionValues = strings.TrimSpace(protectionValues)
protectionStringMap := strings.Split(protectionValues, ", ")
val, err := util.ParsePInt64s(protectionStringMap)
if err == nil {
zoneinfoElement.Protection = val
}
}
}
zoneinfo = append(zoneinfo, zoneinfoElement)
}
return zoneinfo, nil
}

2
vendor/modules.txt vendored
View file

@ -45,7 +45,7 @@ github.com/prometheus/common/version
github.com/prometheus/common/expfmt
github.com/prometheus/common/model
github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg
# github.com/prometheus/procfs v0.0.4-0.20190702183519-8f55e607908e
# github.com/prometheus/procfs v0.0.4-0.20190731153504-5da962fa40f1
github.com/prometheus/procfs
github.com/prometheus/procfs/bcache
github.com/prometheus/procfs/nfs