Skip to content

Commit

Permalink
Merge pull request #5 from jimmybigcommerce/add-switch-to-disable-pro…
Browse files Browse the repository at this point in the history
…cess-level-metrics

feature: add aggregation option for processes
  • Loading branch information
jimmybigcommerce authored Sep 10, 2019
2 parents c685061 + fe830bb commit c96b57f
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 38 deletions.
28 changes: 14 additions & 14 deletions cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ import (

// Configuration variables
var (
listeningAddress string
metricsEndpoint string
scrapeURIs []string
fixProcessCount bool
includeProcessLevelMetrics bool
listeningAddress string
metricsEndpoint string
scrapeURIs []string
fixProcessCount bool
aggregateProcessLevelMetrics bool
)

// serverCmd represents the server command
Expand Down Expand Up @@ -61,9 +61,9 @@ to quickly create a Cobra application.`,
exporter.CountProcessState = true
}

if !includeProcessLevelMetrics {
log.Info("Disabling process level metrics.")
exporter.IncludeProcessLevelMetrics = false
if aggregateProcessLevelMetrics {
log.Info("Enabling process level metric aggregation.")
exporter.AggregateProcessLevelMetrics = true
}

prometheus.MustRegister(exporter)
Expand Down Expand Up @@ -126,19 +126,19 @@ func init() {
serverCmd.Flags().StringVar(&metricsEndpoint, "web.telemetry-path", "/metrics", "Path under which to expose metrics.")
serverCmd.Flags().StringSliceVar(&scrapeURIs, "phpfpm.scrape-uri", []string{"tcp://127.0.0.1:9000/status"}, "FastCGI address, e.g. unix:///tmp/php.sock;/status or tcp://127.0.0.1:9000/status")
serverCmd.Flags().BoolVar(&fixProcessCount, "phpfpm.fix-process-count", false, "Enable to calculate process numbers via php-fpm_exporter since PHP-FPM sporadically reports wrong active/idle/total process numbers.")
serverCmd.Flags().BoolVar(&includeProcessLevelMetrics, "phpfpm.include-process-metrics", true, "Enabled by default, this includes process level metrics for each process in each pool. Sometimes this is too noisy.")
serverCmd.Flags().BoolVar(&aggregateProcessLevelMetrics, "phpfpm.aggregate-process-metrics", false, "This causes the process level metrics to be aggregated rather than listing all processes with unique PID hashes, which is sometimes too noisy")

//viper.BindEnv("web.listen-address", "PHP_FPM_WEB_LISTEN_ADDRESS")
//viper.BindPFlag("web.listen-address", serverCmd.Flags().Lookup("web.listen-address"))

// Workaround since vipers BindEnv is currently not working as expected (see https://github.com/spf13/viper/issues/461)

envs := map[string]string{
"PHP_FPM_WEB_LISTEN_ADDRESS": "web.listen-address",
"PHP_FPM_WEB_TELEMETRY_PATH": "web.telemetry-path",
"PHP_FPM_SCRAPE_URI": "phpfpm.scrape-uri",
"PHP_FPM_FIX_PROCESS_COUNT": "phpfpm.fix-process-count",
"PHP_FPM_INCLUDE_PROCESS_METRICS": "phpfpm.include-process-metrics",
"PHP_FPM_WEB_LISTEN_ADDRESS": "web.listen-address",
"PHP_FPM_WEB_TELEMETRY_PATH": "web.telemetry-path",
"PHP_FPM_SCRAPE_URI": "phpfpm.scrape-uri",
"PHP_FPM_FIX_PROCESS_COUNT": "phpfpm.fix-process-count",
"PHP_FPM_AGGREGATE_PROCESS_METRICS": "phpfpm.aggregate-process-metrics",
}

mapEnvVars(envs, serverCmd)
Expand Down
149 changes: 125 additions & 24 deletions phpfpm/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,36 +30,42 @@ type Exporter struct {
mutex sync.Mutex
PoolManager PoolManager

CountProcessState bool
IncludeProcessLevelMetrics bool

up *prometheus.Desc
scrapeFailues *prometheus.Desc
startSince *prometheus.Desc
acceptedConnections *prometheus.Desc
listenQueue *prometheus.Desc
maxListenQueue *prometheus.Desc
listenQueueLength *prometheus.Desc
idleProcesses *prometheus.Desc
activeProcesses *prometheus.Desc
totalProcesses *prometheus.Desc
maxActiveProcesses *prometheus.Desc
maxChildrenReached *prometheus.Desc
slowRequests *prometheus.Desc
processRequests *prometheus.Desc
processLastRequestMemory *prometheus.Desc
processLastRequestCPU *prometheus.Desc
processRequestDuration *prometheus.Desc
processState *prometheus.Desc
CountProcessState bool
AggregateProcessLevelMetrics bool

up *prometheus.Desc
scrapeFailues *prometheus.Desc
startSince *prometheus.Desc
acceptedConnections *prometheus.Desc
listenQueue *prometheus.Desc
maxListenQueue *prometheus.Desc
listenQueueLength *prometheus.Desc
idleProcesses *prometheus.Desc
activeProcesses *prometheus.Desc
totalProcesses *prometheus.Desc
maxActiveProcesses *prometheus.Desc
maxChildrenReached *prometheus.Desc
slowRequests *prometheus.Desc
processRequests *prometheus.Desc
processLastRequestMemory *prometheus.Desc
processLastRequestCPU *prometheus.Desc
processRequestDuration *prometheus.Desc
processState *prometheus.Desc
processTotalRequests *prometheus.Desc
processAggregateRequests *prometheus.Desc
processAggregateLastRequestMemory *prometheus.Desc
processAggregateLastRequestCPU *prometheus.Desc
processAggregateRequestDuration *prometheus.Desc
processAggregateState *prometheus.Desc
}

// NewExporter creates a new Exporter for a PoolManager and configures the necessary metrics.
func NewExporter(pm PoolManager) *Exporter {
return &Exporter{
PoolManager: pm,

CountProcessState: false,
IncludeProcessLevelMetrics: true,
CountProcessState: false,
AggregateProcessLevelMetrics: false,

up: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "up"),
Expand Down Expand Up @@ -168,6 +174,42 @@ func NewExporter(pm PoolManager) *Exporter {
"The state of the process (Idle, Running, ...).",
[]string{"pool", "pid_hash", "state"},
nil),

processTotalRequests: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "process_requests_total"),
"The number of requests the process has served.",
[]string{"pool"},
nil),

processAggregateRequests: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "process_requests_average"),
"The avg number of requests per process.",
[]string{"pool"},
nil),

processAggregateLastRequestMemory: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "process_request_memory_average"),
"The avg amount of memory the last request consumed.",
[]string{"pool"},
nil),

processAggregateLastRequestCPU: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "process_request_cpu_average"),
"The avg %cpu of last requests.",
[]string{"pool"},
nil),

processAggregateRequestDuration: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "process_request_duration_average"),
"The avg duration in microseconds of the requests.",
[]string{"pool"},
nil),

processAggregateState: prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "process_state_totals"),
"The total count for each request state in the pool (Idle, Running, ...).",
[]string{"pool", "state"},
nil),
}
}

Expand Down Expand Up @@ -211,7 +253,7 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(e.maxChildrenReached, prometheus.CounterValue, float64(pool.MaxChildrenReached), pool.Name)
ch <- prometheus.MustNewConstMetric(e.slowRequests, prometheus.CounterValue, float64(pool.SlowRequests), pool.Name)

if e.IncludeProcessLevelMetrics {
if !e.AggregateProcessLevelMetrics {
for _, process := range pool.Processes {
pidHash := calculateProcessHash(process)
ch <- prometheus.MustNewConstMetric(e.processState, prometheus.GaugeValue, 1, pool.Name, pidHash, process.State)
Expand All @@ -220,10 +262,69 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(e.processLastRequestCPU, prometheus.GaugeValue, process.LastRequestCPU, pool.Name, pidHash)
ch <- prometheus.MustNewConstMetric(e.processRequestDuration, prometheus.GaugeValue, float64(process.RequestDuration), pool.Name, pidHash)
}
} else {
aggregateProcessState(pool, ch, e)
processAggregateLastRequestCPU(pool, ch, e)
processAggregateLastRequestMemory(pool, ch, e)
processAggregateRequestDuration(pool, ch, e)
processAggregateRequests(pool, ch, e)
}
}
}

func aggregateProcessState(pool Pool, ch chan<- prometheus.Metric, e *Exporter) {
var m = make(map[string]int)
for _, process := range pool.Processes {
m[process.State]++
}
for k, v := range m {
ch <- prometheus.MustNewConstMetric(e.processAggregateState, prometheus.CounterValue, float64(v), pool.Name, k)
}
}

func processAggregateLastRequestCPU(pool Pool, ch chan<- prometheus.Metric, e *Exporter) {
var total float64
var count float64
for _, process := range pool.Processes {
print(process.LastRequestCPU)
print(" ... ")
total += process.LastRequestCPU
count++
}
ch <- prometheus.MustNewConstMetric(e.processAggregateLastRequestCPU, prometheus.CounterValue, total/count, pool.Name)
}

func processAggregateLastRequestMemory(pool Pool, ch chan<- prometheus.Metric, e *Exporter) {
var total int64
var count int64
for _, process := range pool.Processes {
total += process.LastRequestMemory
count++
}
ch <- prometheus.MustNewConstMetric(e.processAggregateLastRequestMemory, prometheus.CounterValue, float64(total/count), pool.Name)
}

func processAggregateRequestDuration(pool Pool, ch chan<- prometheus.Metric, e *Exporter) {
var total float64
var count float64
for _, process := range pool.Processes {
total += float64(process.RequestDuration)
count++
}
ch <- prometheus.MustNewConstMetric(e.processAggregateRequestDuration, prometheus.CounterValue, float64(total/count), pool.Name)
}

func processAggregateRequests(pool Pool, ch chan<- prometheus.Metric, e *Exporter) {
var total float64
var count float64
for _, process := range pool.Processes {
total += float64(process.Requests)
count++
}
ch <- prometheus.MustNewConstMetric(e.processAggregateRequests, prometheus.CounterValue, float64(total/count), pool.Name)
ch <- prometheus.MustNewConstMetric(e.processTotalRequests, prometheus.CounterValue, float64(total), pool.Name)
}

// Describe exposes the metric description to Prometheus
func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
ch <- e.up
Expand Down

0 comments on commit c96b57f

Please sign in to comment.