diff --git a/pkg/config.go b/pkg/config.go index 2da6b26..431d6b1 100644 --- a/pkg/config.go +++ b/pkg/config.go @@ -263,8 +263,9 @@ type OutboundProxyConfig struct { } type MetricsConfig struct { - Disabled bool `mapstructure:"disabled" json:"disabled"` - Addr string `mapstructure:"addr" json:"addr" default:":9000"` + Disabled bool `mapstructure:"disabled" json:"disabled"` + Addr string `mapstructure:"addr" json:"addr" default:":9000"` + HealthcheckGracePeriodSeconds int `mapstructure:"healthcheckGracePeriodSeconds" json:"healthcheckGracePeriodSeconds" validate:"gte=0" default:"10"` } type Config struct { diff --git a/pkg/heartbeat.go b/pkg/heartbeat.go index 7303671..b606195 100644 --- a/pkg/heartbeat.go +++ b/pkg/heartbeat.go @@ -9,6 +9,9 @@ import ( "golang.zx2c4.com/wireguard/tun/netstack" ) +var hasSeenSuccessfulHeartbeat bool +var lastSuccessfulHeartbeat time.Time + func (config *HeartbeatConfig) Start(tnet *netstack.Net, userAgent string) (func(), error) { ticker := time.NewTicker(time.Duration(config.IntervalSeconds) * time.Second) done := make(chan bool) @@ -51,6 +54,8 @@ func (config *HeartbeatConfig) Start(tnet *netstack.Net, userAgent string) (func failures = 0 heartbeatSuccessCounter.Inc() heartbeatLastSuccessTimestamp.SetToCurrentTime() + hasSeenSuccessfulHeartbeat = true + lastSuccessfulHeartbeat = time.Now() return true } } diff --git a/pkg/metrics.go b/pkg/metrics.go index b30a386..bb1b293 100644 --- a/pkg/metrics.go +++ b/pkg/metrics.go @@ -2,8 +2,10 @@ package pkg import ( "fmt" + "io" "net" "net/http" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -47,7 +49,31 @@ func StartMetrics(config *Config) error { prometheus.MustRegister(logEventsCounter, heartbeatCounter, heartbeatLastSuccessTimestamp, proxyInFlightGauge, proxyCounter) promHandler := promhttp.Handler() - httpServer := &http.Server{Addr: config.Metrics.Addr, Handler: promHandler} + mux := http.NewServeMux() + mux.Handle("/metrics", promHandler) + mux.HandleFunc("/probes/startup", func(w http.ResponseWriter, r *http.Request) { + if hasSeenSuccessfulHeartbeat { + w.WriteHeader(200) + io.WriteString(w, "OK") + } else { + w.WriteHeader(503) + io.WriteString(w, "Not Ready") + } + }) + mux.HandleFunc("/probes/readiness", func(w http.ResponseWriter, r *http.Request) { + timeSinceLastHeartbeat := time.Since(lastSuccessfulHeartbeat) + heartbeatCutoff := time.Second * time.Duration(config.Inbound.Heartbeat.IntervalSeconds+config.Metrics.HealthcheckGracePeriodSeconds) + + if timeSinceLastHeartbeat < heartbeatCutoff { + w.WriteHeader(200) + io.WriteString(w, "OK") + } else { + w.WriteHeader(503) + io.WriteString(w, fmt.Sprintf("Not Ready: no successful heartbeat within %v", heartbeatCutoff)) + } + }) + + httpServer := &http.Server{Addr: config.Metrics.Addr, Handler: mux} listener, err := net.Listen("tcp", httpServer.Addr) if err != nil { return fmt.Errorf("failed to start external metrics server: %w", err)