From: Johannes 'fish' Ziemke Date: Mon, 6 Nov 2017 15:33:56 +0000 (+0100) Subject: Add httptrace based prober (#215) X-Git-Url: https://vcs.fsf.org/?a=commitdiff_plain;h=3717de8215198f9b6ba4073380399ecbf48a94f2;p=blackbox_exporter.git Add httptrace based prober (#215) --- diff --git a/prober/dns.go b/prober/dns.go index 217aeb1..6f778cd 100644 --- a/prober/dns.go +++ b/prober/dns.go @@ -118,7 +118,7 @@ func ProbeDNS(ctx context.Context, target string, module config.Module, registry port = "53" targetAddr = target } - ip, err = chooseProtocol(module.DNS.PreferredIPProtocol, targetAddr, registry, logger) + ip, _, err = chooseProtocol(module.DNS.PreferredIPProtocol, targetAddr, registry, logger) if err != nil { level.Error(logger).Log("msg", "Error resolving address", "err", err) return false diff --git a/prober/http.go b/prober/http.go index afde979..3168944 100644 --- a/prober/http.go +++ b/prober/http.go @@ -21,10 +21,12 @@ import ( "io/ioutil" "net" "net/http" + "net/http/httptrace" "net/url" "regexp" "strconv" "strings" + "time" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" @@ -65,9 +67,76 @@ func matchRegularExpressions(reader io.Reader, httpConfig config.HTTPProbe, logg return true } +// roundTripTrace holds timings for a single HTTP roundtrip. +type roundTripTrace struct { + tls bool + start time.Time + dnsDone time.Time + connectDone time.Time + gotConn time.Time + responseStart time.Time + end time.Time +} + +// transport is a custom transport keeping traces for each HTTP roundtrip. +type transport struct { + Transport http.RoundTripper + logger log.Logger + traces []*roundTripTrace + current *roundTripTrace +} + +func newTransport(rt http.RoundTripper, logger log.Logger) *transport { + return &transport{ + Transport: rt, + logger: logger, + traces: []*roundTripTrace{}, + } +} + +// RoundTrip switches to a new trace, then runs embedded RoundTripper. +func (t *transport) RoundTrip(req *http.Request) (*http.Response, error) { + trace := &roundTripTrace{} + if req.URL.Scheme == "https" { + trace.tls = true + } + t.current = trace + t.traces = append(t.traces, trace) + defer func() { trace.end = time.Now() }() + return t.Transport.RoundTrip(req) +} + +func (t *transport) DNSStart(_ httptrace.DNSStartInfo) { + t.current.start = time.Now() +} +func (t *transport) DNSDone(_ httptrace.DNSDoneInfo) { + t.current.dnsDone = time.Now() +} +func (ts *transport) ConnectStart(_, _ string) { + t := ts.current + // No DNS resolution because we connected to IP directly. + if t.dnsDone.IsZero() { + t.start = time.Now() + t.dnsDone = t.start + } +} +func (t *transport) ConnectDone(net, addr string, err error) { + t.current.connectDone = time.Now() +} +func (t *transport) GotConn(_ httptrace.GotConnInfo) { + t.current.gotConn = time.Now() +} +func (t *transport) GotFirstResponseByte() { + t.current.responseStart = time.Now() +} + func ProbeHTTP(ctx context.Context, target string, module config.Module, registry *prometheus.Registry, logger log.Logger) (success bool) { var redirects int var ( + durationGaugeVec = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "probe_http_duration_seconds", + Help: "Duration of http request by phase, summed over all redirects", + }, []string{"phase"}) contentLengthGauge = prometheus.NewGauge(prometheus.GaugeOpts{ Name: "content_length", Help: "Length of http content response", @@ -104,6 +173,11 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr }) ) + for _, lv := range []string{"resolve", "connect", "tls", "processing", "transfer"} { + durationGaugeVec.WithLabelValues(lv) + } + + registry.MustRegister(durationGaugeVec) registry.MustRegister(contentLengthGauge) registry.MustRegister(redirectsGauge) registry.MustRegister(isSSLGauge) @@ -128,11 +202,12 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr targetHost = targetURL.Host } - ip, err := chooseProtocol(module.HTTP.PreferredIPProtocol, targetHost, registry, logger) + ip, lookupTime, err := chooseProtocol(module.HTTP.PreferredIPProtocol, targetHost, registry, logger) if err != nil { level.Error(logger).Log("msg", "Error resolving address", "err", err) return false } + durationGaugeVec.WithLabelValues("resolve").Add(lookupTime) httpClientConfig := module.HTTP.HTTPClientConfig if len(httpClientConfig.TLSConfig.ServerName) == 0 { @@ -146,6 +221,10 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr return false } + // Inject transport that tracks trace for each redirect. + tt := newTransport(client.Transport, logger) + client.Transport = tt + client.CheckRedirect = func(r *http.Request, via []*http.Request) error { level.Info(logger).Log("msg", "Received redirect", "url", r.URL.String()) redirects = len(via) @@ -188,6 +267,17 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr request.Body = ioutil.NopCloser(strings.NewReader(httpConfig.Body)) } level.Info(logger).Log("msg", "Making HTTP request", "url", request.URL.String(), "host", request.Host) + + trace := &httptrace.ClientTrace{ + DNSStart: tt.DNSStart, + DNSDone: tt.DNSDone, + ConnectStart: tt.ConnectStart, + ConnectDone: tt.ConnectDone, + GotConn: tt.GotConn, + GotFirstResponseByte: tt.GotFirstResponseByte, + } + request = request.WithContext(httptrace.WithClientTrace(request.Context(), trace)) + resp, err := client.Do(request) // Err won't be nil if redirects were turned off. See https://github.com/golang/go/issues/3795 if err != nil && resp == nil { @@ -247,6 +337,40 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr if resp == nil { resp = &http.Response{} } + for i, trace := range tt.traces { + level.Info(logger).Log( + "msg", "Response timings for roundtrip", + "roundtrip", i, + "start", trace.start, + "dnsDone", trace.dnsDone, + "connectDone", trace.connectDone, + "gotConn", trace.gotConn, + "responseStart", trace.responseStart, + "end", trace.end, + ) + // We get the duration for the first request from chooseProtocol. + if i != 0 { + durationGaugeVec.WithLabelValues("resolve").Add(trace.dnsDone.Sub(trace.start).Seconds()) + } + // Continue here if we never got a connection because a request failed. + if trace.gotConn.IsZero() { + continue + } + if trace.tls { + // dnsDone must be set if gotConn was set. + durationGaugeVec.WithLabelValues("connect").Add(trace.connectDone.Sub(trace.dnsDone).Seconds()) + durationGaugeVec.WithLabelValues("tls").Add(trace.gotConn.Sub(trace.dnsDone).Seconds()) + } else { + durationGaugeVec.WithLabelValues("connect").Add(trace.gotConn.Sub(trace.dnsDone).Seconds()) + } + + // Continue here if we never got a response from the server. + if trace.responseStart.IsZero() { + continue + } + durationGaugeVec.WithLabelValues("processing").Add(trace.responseStart.Sub(trace.gotConn).Seconds()) + durationGaugeVec.WithLabelValues("transfer").Add(trace.end.Sub(trace.responseStart).Seconds()) + } if resp.TLS != nil { isSSLGauge.Set(float64(1)) diff --git a/prober/http_test.go b/prober/http_test.go index 3d9bd76..7f4f4b2 100644 --- a/prober/http_test.go +++ b/prober/http_test.go @@ -543,3 +543,67 @@ func TestHTTPUsesTargetAsTLSServerName(t *testing.T) { t.Fatalf("TLS probe failed unexpectedly") } } + +func TestHTTPPhases(t *testing.T) { + ts := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + })) + defer ts.Close() + + // Follow redirect, should succeed with 200. + recorder := httptest.NewRecorder() + registry := prometheus.NewRegistry() + testCTX, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + result := ProbeHTTP(testCTX, ts.URL, config.Module{Timeout: time.Second, HTTP: config.HTTPProbe{ + HTTPClientConfig: pconfig.HTTPClientConfig{ + TLSConfig: pconfig.TLSConfig{InsecureSkipVerify: true}, + }, + }}, registry, log.NewNopLogger()) + body := recorder.Body.String() + if !result { + t.Fatalf("HTTP Phases test failed unexpectedly, got %s", body) + } + + mfs, err := registry.Gather() + if err != nil { + t.Fatal(err) + } + + found := false + foundLabel := map[string]bool{ + "connect": false, + "processing": false, + "resolve": false, + "transfer": false, + "tls": false, + } + for _, mf := range mfs { + if mf.GetName() == "probe_http_duration_seconds" { + found = true + for _, metric := range mf.GetMetric() { + for _, lp := range metric.Label { + if lp.GetName() == "phase" { + f, ok := foundLabel[lp.GetValue()] + if !ok { + t.Fatalf("Unexpected label phase=%s", lp.GetValue()) + } + if f { + t.Fatalf("Label phase=%s duplicated", lp.GetValue()) + } + foundLabel[lp.GetValue()] = true + } + } + } + + } + } + if !found { + t.Fatal("probe_http_duration_seconds not found") + } + for lv, found := range foundLabel { + if !found { + t.Fatalf("Label phase=%s not found", lv) + } + } +} diff --git a/prober/icmp.go b/prober/icmp.go index dcb9096..c222f72 100644 --- a/prober/icmp.go +++ b/prober/icmp.go @@ -52,7 +52,7 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr timeoutDeadline, _ := ctx.Deadline() deadline := time.Now().Add(timeoutDeadline.Sub(time.Now())) - ip, err := chooseProtocol(module.ICMP.PreferredIPProtocol, target, registry, logger) + ip, _, err := chooseProtocol(module.ICMP.PreferredIPProtocol, target, registry, logger) if err != nil { level.Warn(logger).Log("msg", "Error resolving address", "err", err) return false diff --git a/prober/tcp.go b/prober/tcp.go index e9724ba..65174b6 100644 --- a/prober/tcp.go +++ b/prober/tcp.go @@ -38,7 +38,7 @@ func dialTCP(ctx context.Context, target string, module config.Module, registry return nil, err } - ip, err := chooseProtocol(module.TCP.PreferredIPProtocol, targetAddress, registry, logger) + ip, _, err := chooseProtocol(module.TCP.PreferredIPProtocol, targetAddress, registry, logger) if err != nil { level.Error(logger).Log("msg", "Error resolving address", "err", err) return nil, err diff --git a/prober/utils.go b/prober/utils.go index 524341d..a2228a9 100644 --- a/prober/utils.go +++ b/prober/utils.go @@ -10,10 +10,9 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -// Returns the preferedIPProtocol, the dialProtocol, and sets the probeIPProtocolGauge. -func chooseProtocol(preferredIPProtocol string, target string, registry *prometheus.Registry, logger log.Logger) (*net.IPAddr, error) { +// Returns the IP for the preferedIPProtocol and lookup time. +func chooseProtocol(preferredIPProtocol string, target string, registry *prometheus.Registry, logger log.Logger) (ip *net.IPAddr, lookupTime float64, err error) { var fallbackProtocol string - probeDNSLookupTimeSeconds := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "probe_dns_lookup_time_seconds", Help: "Returns the time taken for probe dns lookup in seconds", @@ -44,15 +43,16 @@ func chooseProtocol(preferredIPProtocol string, target string, registry *prometh resolveStart := time.Now() defer func() { - probeDNSLookupTimeSeconds.Add(time.Since(resolveStart).Seconds()) + lookupTime = time.Since(resolveStart).Seconds() + probeDNSLookupTimeSeconds.Add(lookupTime) }() - ip, err := net.ResolveIPAddr(preferredIPProtocol, target) + ip, err = net.ResolveIPAddr(preferredIPProtocol, target) if err != nil { level.Warn(logger).Log("msg", "Resolution with preferred IP protocol failed, attempting fallback protocol", "fallback_protocol", fallbackProtocol, "err", err) ip, err = net.ResolveIPAddr(fallbackProtocol, target) if err != nil { - return ip, err + return ip, 0.0, err } } @@ -63,5 +63,5 @@ func chooseProtocol(preferredIPProtocol string, target string, registry *prometh } level.Info(logger).Log("msg", "Resolved target address", "ip", ip) - return ip, nil + return ip, lookupTime, nil }