Add httptrace based prober (#215)
authorJohannes 'fish' Ziemke <github@freigeist.org>
Mon, 6 Nov 2017 15:33:56 +0000 (16:33 +0100)
committerBrian Brazil <brian.brazil@robustperception.io>
Mon, 6 Nov 2017 15:33:56 +0000 (15:33 +0000)
prober/dns.go
prober/http.go
prober/http_test.go
prober/icmp.go
prober/tcp.go
prober/utils.go

index 217aeb1b9dbd58bcd0c923e0199d6fd7ecfaad64..6f778cdc2933667018e8d55cd6c912045494cb40 100644 (file)
@@ -118,7 +118,7 @@ func ProbeDNS(ctx context.Context, target string, module config.Module, registry
                        port = "53"
                        targetAddr = target
                }
-               ip, err = chooseProtocol(module.DNS.PreferredIPProtocol, targetAddr, registry, logger)
+               ip, _, err = chooseProtocol(module.DNS.PreferredIPProtocol, targetAddr, registry, logger)
                if err != nil {
                        level.Error(logger).Log("msg", "Error resolving address", "err", err)
                        return false
index afde9794cc0d3b90cb87dca70d2dd5ffc87dae99..31689445b464e8cc1005a82db5d5191da58c563e 100644 (file)
@@ -21,10 +21,12 @@ import (
        "io/ioutil"
        "net"
        "net/http"
+       "net/http/httptrace"
        "net/url"
        "regexp"
        "strconv"
        "strings"
+       "time"
 
        "github.com/go-kit/kit/log"
        "github.com/go-kit/kit/log/level"
@@ -65,9 +67,76 @@ func matchRegularExpressions(reader io.Reader, httpConfig config.HTTPProbe, logg
        return true
 }
 
+// roundTripTrace holds timings for a single HTTP roundtrip.
+type roundTripTrace struct {
+       tls           bool
+       start         time.Time
+       dnsDone       time.Time
+       connectDone   time.Time
+       gotConn       time.Time
+       responseStart time.Time
+       end           time.Time
+}
+
+// transport is a custom transport keeping traces for each HTTP roundtrip.
+type transport struct {
+       Transport http.RoundTripper
+       logger    log.Logger
+       traces    []*roundTripTrace
+       current   *roundTripTrace
+}
+
+func newTransport(rt http.RoundTripper, logger log.Logger) *transport {
+       return &transport{
+               Transport: rt,
+               logger:    logger,
+               traces:    []*roundTripTrace{},
+       }
+}
+
+// RoundTrip switches to a new trace, then runs embedded RoundTripper.
+func (t *transport) RoundTrip(req *http.Request) (*http.Response, error) {
+       trace := &roundTripTrace{}
+       if req.URL.Scheme == "https" {
+               trace.tls = true
+       }
+       t.current = trace
+       t.traces = append(t.traces, trace)
+       defer func() { trace.end = time.Now() }()
+       return t.Transport.RoundTrip(req)
+}
+
+func (t *transport) DNSStart(_ httptrace.DNSStartInfo) {
+       t.current.start = time.Now()
+}
+func (t *transport) DNSDone(_ httptrace.DNSDoneInfo) {
+       t.current.dnsDone = time.Now()
+}
+func (ts *transport) ConnectStart(_, _ string) {
+       t := ts.current
+       // No DNS resolution because we connected to IP directly.
+       if t.dnsDone.IsZero() {
+               t.start = time.Now()
+               t.dnsDone = t.start
+       }
+}
+func (t *transport) ConnectDone(net, addr string, err error) {
+       t.current.connectDone = time.Now()
+}
+func (t *transport) GotConn(_ httptrace.GotConnInfo) {
+       t.current.gotConn = time.Now()
+}
+func (t *transport) GotFirstResponseByte() {
+       t.current.responseStart = time.Now()
+}
+
 func ProbeHTTP(ctx context.Context, target string, module config.Module, registry *prometheus.Registry, logger log.Logger) (success bool) {
        var redirects int
        var (
+               durationGaugeVec = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+                       Name: "probe_http_duration_seconds",
+                       Help: "Duration of http request by phase, summed over all redirects",
+               }, []string{"phase"})
                contentLengthGauge = prometheus.NewGauge(prometheus.GaugeOpts{
                        Name: "content_length",
                        Help: "Length of http content response",
@@ -104,6 +173,11 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
                })
        )
 
+       for _, lv := range []string{"resolve", "connect", "tls", "processing", "transfer"} {
+               durationGaugeVec.WithLabelValues(lv)
+       }
+
+       registry.MustRegister(durationGaugeVec)
        registry.MustRegister(contentLengthGauge)
        registry.MustRegister(redirectsGauge)
        registry.MustRegister(isSSLGauge)
@@ -128,11 +202,12 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
                targetHost = targetURL.Host
        }
 
-       ip, err := chooseProtocol(module.HTTP.PreferredIPProtocol, targetHost, registry, logger)
+       ip, lookupTime, err := chooseProtocol(module.HTTP.PreferredIPProtocol, targetHost, registry, logger)
        if err != nil {
                level.Error(logger).Log("msg", "Error resolving address", "err", err)
                return false
        }
+       durationGaugeVec.WithLabelValues("resolve").Add(lookupTime)
 
        httpClientConfig := module.HTTP.HTTPClientConfig
        if len(httpClientConfig.TLSConfig.ServerName) == 0 {
@@ -146,6 +221,10 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
                return false
        }
 
+       // Inject transport that tracks trace for each redirect.
+       tt := newTransport(client.Transport, logger)
+       client.Transport = tt
+
        client.CheckRedirect = func(r *http.Request, via []*http.Request) error {
                level.Info(logger).Log("msg", "Received redirect", "url", r.URL.String())
                redirects = len(via)
@@ -188,6 +267,17 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
                request.Body = ioutil.NopCloser(strings.NewReader(httpConfig.Body))
        }
        level.Info(logger).Log("msg", "Making HTTP request", "url", request.URL.String(), "host", request.Host)
+
+       trace := &httptrace.ClientTrace{
+               DNSStart:             tt.DNSStart,
+               DNSDone:              tt.DNSDone,
+               ConnectStart:         tt.ConnectStart,
+               ConnectDone:          tt.ConnectDone,
+               GotConn:              tt.GotConn,
+               GotFirstResponseByte: tt.GotFirstResponseByte,
+       }
+       request = request.WithContext(httptrace.WithClientTrace(request.Context(), trace))
+
        resp, err := client.Do(request)
        // Err won't be nil if redirects were turned off. See https://github.com/golang/go/issues/3795
        if err != nil && resp == nil {
@@ -247,6 +337,40 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
        if resp == nil {
                resp = &http.Response{}
        }
+       for i, trace := range tt.traces {
+               level.Info(logger).Log(
+                       "msg", "Response timings for roundtrip",
+                       "roundtrip", i,
+                       "start", trace.start,
+                       "dnsDone", trace.dnsDone,
+                       "connectDone", trace.connectDone,
+                       "gotConn", trace.gotConn,
+                       "responseStart", trace.responseStart,
+                       "end", trace.end,
+               )
+               // We get the duration for the first request from chooseProtocol.
+               if i != 0 {
+                       durationGaugeVec.WithLabelValues("resolve").Add(trace.dnsDone.Sub(trace.start).Seconds())
+               }
+               // Continue here if we never got a connection because a request failed.
+               if trace.gotConn.IsZero() {
+                       continue
+               }
+               if trace.tls {
+                       // dnsDone must be set if gotConn was set.
+                       durationGaugeVec.WithLabelValues("connect").Add(trace.connectDone.Sub(trace.dnsDone).Seconds())
+                       durationGaugeVec.WithLabelValues("tls").Add(trace.gotConn.Sub(trace.dnsDone).Seconds())
+               } else {
+                       durationGaugeVec.WithLabelValues("connect").Add(trace.gotConn.Sub(trace.dnsDone).Seconds())
+               }
+
+               // Continue here if we never got a response from the server.
+               if trace.responseStart.IsZero() {
+                       continue
+               }
+               durationGaugeVec.WithLabelValues("processing").Add(trace.responseStart.Sub(trace.gotConn).Seconds())
+               durationGaugeVec.WithLabelValues("transfer").Add(trace.end.Sub(trace.responseStart).Seconds())
+       }
 
        if resp.TLS != nil {
                isSSLGauge.Set(float64(1))
index 3d9bd76ac31160fbeae3375813d08295e48f3ecb..7f4f4b2dd37cab9aa6577a6b55606c4551bc8f52 100644 (file)
@@ -543,3 +543,67 @@ func TestHTTPUsesTargetAsTLSServerName(t *testing.T) {
                t.Fatalf("TLS probe failed unexpectedly")
        }
 }
+
+func TestHTTPPhases(t *testing.T) {
+       ts := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+       }))
+       defer ts.Close()
+
+       // Follow redirect, should succeed with 200.
+       recorder := httptest.NewRecorder()
+       registry := prometheus.NewRegistry()
+       testCTX, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+       defer cancel()
+
+       result := ProbeHTTP(testCTX, ts.URL, config.Module{Timeout: time.Second, HTTP: config.HTTPProbe{
+               HTTPClientConfig: pconfig.HTTPClientConfig{
+                       TLSConfig: pconfig.TLSConfig{InsecureSkipVerify: true},
+               },
+       }}, registry, log.NewNopLogger())
+       body := recorder.Body.String()
+       if !result {
+               t.Fatalf("HTTP Phases test failed unexpectedly, got %s", body)
+       }
+
+       mfs, err := registry.Gather()
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       found := false
+       foundLabel := map[string]bool{
+               "connect":    false,
+               "processing": false,
+               "resolve":    false,
+               "transfer":   false,
+               "tls":        false,
+       }
+       for _, mf := range mfs {
+               if mf.GetName() == "probe_http_duration_seconds" {
+                       found = true
+                       for _, metric := range mf.GetMetric() {
+                               for _, lp := range metric.Label {
+                                       if lp.GetName() == "phase" {
+                                               f, ok := foundLabel[lp.GetValue()]
+                                               if !ok {
+                                                       t.Fatalf("Unexpected label phase=%s", lp.GetValue())
+                                               }
+                                               if f {
+                                                       t.Fatalf("Label phase=%s duplicated", lp.GetValue())
+                                               }
+                                               foundLabel[lp.GetValue()] = true
+                                       }
+                               }
+                       }
+
+               }
+       }
+       if !found {
+               t.Fatal("probe_http_duration_seconds not found")
+       }
+       for lv, found := range foundLabel {
+               if !found {
+                       t.Fatalf("Label phase=%s not found", lv)
+               }
+       }
+}
index dcb9096135ad311eded3086a0736b009843c4bf6..c222f725c51799d2fb467f647861d17c0a9fe67d 100644 (file)
@@ -52,7 +52,7 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr
        timeoutDeadline, _ := ctx.Deadline()
        deadline := time.Now().Add(timeoutDeadline.Sub(time.Now()))
 
-       ip, err := chooseProtocol(module.ICMP.PreferredIPProtocol, target, registry, logger)
+       ip, _, err := chooseProtocol(module.ICMP.PreferredIPProtocol, target, registry, logger)
        if err != nil {
                level.Warn(logger).Log("msg", "Error resolving address", "err", err)
                return false
index e9724bad183ee2c58a166b85e427669399015ec7..65174b6db9c8b4eddd49ecafc336bb1981306319 100644 (file)
@@ -38,7 +38,7 @@ func dialTCP(ctx context.Context, target string, module config.Module, registry
                return nil, err
        }
 
-       ip, err := chooseProtocol(module.TCP.PreferredIPProtocol, targetAddress, registry, logger)
+       ip, _, err := chooseProtocol(module.TCP.PreferredIPProtocol, targetAddress, registry, logger)
        if err != nil {
                level.Error(logger).Log("msg", "Error resolving address", "err", err)
                return nil, err
index 524341d9f3a338ec94815b2a69cf23313f01e658..a2228a976a3fac529a181a203a0767aabde2a8dc 100644 (file)
@@ -10,10 +10,9 @@ import (
        "github.com/prometheus/client_golang/prometheus"
 )
 
-// Returns the preferedIPProtocol, the dialProtocol, and sets the probeIPProtocolGauge.
-func chooseProtocol(preferredIPProtocol string, target string, registry *prometheus.Registry, logger log.Logger) (*net.IPAddr, error) {
+// Returns the IP for the preferedIPProtocol and lookup time.
+func chooseProtocol(preferredIPProtocol string, target string, registry *prometheus.Registry, logger log.Logger) (ip *net.IPAddr, lookupTime float64, err error) {
        var fallbackProtocol string
-
        probeDNSLookupTimeSeconds := prometheus.NewGauge(prometheus.GaugeOpts{
                Name: "probe_dns_lookup_time_seconds",
                Help: "Returns the time taken for probe dns lookup in seconds",
@@ -44,15 +43,16 @@ func chooseProtocol(preferredIPProtocol string, target string, registry *prometh
        resolveStart := time.Now()
 
        defer func() {
-               probeDNSLookupTimeSeconds.Add(time.Since(resolveStart).Seconds())
+               lookupTime = time.Since(resolveStart).Seconds()
+               probeDNSLookupTimeSeconds.Add(lookupTime)
        }()
 
-       ip, err := net.ResolveIPAddr(preferredIPProtocol, target)
+       ip, err = net.ResolveIPAddr(preferredIPProtocol, target)
        if err != nil {
                level.Warn(logger).Log("msg", "Resolution with preferred IP protocol failed, attempting fallback protocol", "fallback_protocol", fallbackProtocol, "err", err)
                ip, err = net.ResolveIPAddr(fallbackProtocol, target)
                if err != nil {
-                       return ip, err
+                       return ip, 0.0, err
                }
        }
 
@@ -63,5 +63,5 @@ func chooseProtocol(preferredIPProtocol string, target string, registry *prometh
        }
 
        level.Info(logger).Log("msg", "Resolved target address", "ip", ip)
-       return ip, nil
+       return ip, lookupTime, nil
 }