Expand logging for probes.
authorBrian Brazil <brian.brazil@robustperception.io>
Mon, 4 Sep 2017 16:51:41 +0000 (17:51 +0100)
committerBrian Brazil <brian.brazil@robustperception.io>
Tue, 12 Sep 2017 11:01:48 +0000 (12:01 +0100)
Now that we have somewhere to put logs other
than the general application logs, we can provide
much more information.

main.go
prober/dns.go
prober/http.go
prober/icmp.go
prober/tcp.go
prober/utils.go

diff --git a/main.go b/main.go
index 2495d98e53a8bb427f2a0e07203f5ba27abeafa7..9adf6ac4bf4272b6c75f03ca3bd1ba38a4495161 100644 (file)
--- a/main.go
+++ b/main.go
@@ -85,7 +85,8 @@ func probeHandler(w http.ResponseWriter, r *http.Request, c *config.Config, logg
        if module.Timeout.Seconds() < timeoutSeconds && module.Timeout.Seconds() > 0 {
                timeoutSeconds = module.Timeout.Seconds()
        }
-       ctx, cancel := context.WithTimeout(context.Background(), time.Duration((timeoutSeconds-*timeoutOffset)*1e9))
+       timeoutSeconds -= *timeoutOffset
+       ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeoutSeconds*1e9))
        defer cancel()
        r = r.WithContext(ctx)
 
@@ -111,19 +112,20 @@ func probeHandler(w http.ResponseWriter, r *http.Request, c *config.Config, logg
        }
 
        sl := newScrapeLogger(logger, moduleName, target)
-       level.Info(sl).Log("msg", "Beginning probe", "probe", module.Prober)
+       level.Info(sl).Log("msg", "Beginning probe", "probe", module.Prober, "timeout_seconds", timeoutSeconds)
 
        start := time.Now()
        registry := prometheus.NewRegistry()
        registry.MustRegister(probeSuccessGauge)
        registry.MustRegister(probeDurationGauge)
        success := prober(ctx, target, module, registry, sl)
-       probeDurationGauge.Set(time.Since(start).Seconds())
+       duration := time.Since(start).Seconds()
+       probeDurationGauge.Set(duration)
        if success {
                probeSuccessGauge.Set(1)
-               level.Info(sl).Log("msg", "Probe succeeded")
+               level.Info(sl).Log("msg", "Probe succeeded", "duration_seconds", duration)
        } else {
-               level.Error(sl).Log("msg", "Probe failed")
+               level.Error(sl).Log("msg", "Probe failed", "duration_seconds", duration)
        }
 
        debug := false
@@ -139,11 +141,16 @@ func probeHandler(w http.ResponseWriter, r *http.Request, c *config.Config, logg
                mfs, err := registry.Gather()
                if err != nil {
                        fmt.Fprintf(w, "Error gathering metrics: %s\n", err)
-                       return
                }
                for _, mf := range mfs {
                        expfmt.MetricFamilyToText(w, mf)
                }
+               fmt.Fprintf(w, "\n\n\nModule configuration:\n")
+               c, err := yaml.Marshal(module)
+               if err != nil {
+                       fmt.Fprintf(w, "Error marshalling config: %s\n", err)
+               }
+               w.Write(c)
                return
        }
 
index 2418a6a1c72b9d5c8bc3aafdcf58bc9ba5ce3305..217aeb1b9dbd58bcd0c923e0199d6fd7ecfaad64 100644 (file)
@@ -32,27 +32,30 @@ func validRRs(rrs *[]dns.RR, v *config.DNSRRValidator, logger log.Logger) bool {
        // Fail the probe if there are no RRs of a given type, but a regexp match is required
        // (i.e. FailIfNotMatchesRegexp is set).
        if len(*rrs) == 0 && len(v.FailIfNotMatchesRegexp) > 0 {
+               level.Error(logger).Log("msg", "fail_if_not_matches_regexp specified but no RRs returned")
                return false
        }
        for _, rr := range *rrs {
-               level.Debug(logger).Log("msg", "Validating RR: %q", rr)
+               level.Info(logger).Log("msg", "Validating RR", "rr", rr)
                for _, re := range v.FailIfMatchesRegexp {
                        match, err := regexp.MatchString(re, rr.String())
                        if err != nil {
-                               level.Error(logger).Log("msg", "Error matching regexp %q: %s", re, err)
+                               level.Error(logger).Log("msg", "Error matching regexp", "regexp", re, "err", err)
                                return false
                        }
                        if match {
+                               level.Error(logger).Log("msg", "RR matched regexp", "regexp", re, "rr", rr)
                                return false
                        }
                }
                for _, re := range v.FailIfNotMatchesRegexp {
                        match, err := regexp.MatchString(re, rr.String())
                        if err != nil {
-                               level.Error(logger).Log("msg", "Error matching regexp %q: %s", re, err)
+                               level.Error(logger).Log("msg", "Error matching regexp", "regexp", re, "err", err)
                                return false
                        }
                        if !match {
+                               level.Error(logger).Log("msg", "RR did not match regexp", "regexp", re, "rr", rr)
                                return false
                        }
                }
@@ -78,15 +81,15 @@ func validRcode(rcode int, valid []string, logger log.Logger) bool {
        }
        for _, rc := range validRcodes {
                if rcode == rc {
+                       level.Info(logger).Log("msg", "Rcode is valid", "rcode", rcode, "string_rcode", dns.RcodeToString[rcode])
                        return true
                }
        }
-       level.Error(logger).Log("msg", "Rcode is not one of the valid rcodes", "string_rcode", dns.RcodeToString[rcode], "rcode", rcode, "valid_rcodes", validRcodes)
+       level.Error(logger).Log("msg", "Rcode is not one of the valid rcodes", "rcode", rcode, "string_rcode", dns.RcodeToString[rcode], "valid_rcodes", validRcodes)
        return false
 }
 
 func ProbeDNS(ctx context.Context, target string, module config.Module, registry *prometheus.Registry, logger log.Logger) bool {
-       var numAnswer, numAuthority, numAdditional int
        var dialProtocol string
        probeDNSAnswerRRSGauge := prometheus.NewGauge(prometheus.GaugeOpts{
                Name: "probe_dns_answer_rrs",
@@ -104,14 +107,6 @@ func ProbeDNS(ctx context.Context, target string, module config.Module, registry
        registry.MustRegister(probeDNSAuthorityRRSGauge)
        registry.MustRegister(probeDNSAdditionalRRSGauge)
 
-       defer func() {
-               // These metrics can be used to build additional alerting based on the number of replies.
-               // They should be returned even in case of errors.
-               probeDNSAnswerRRSGauge.Set(float64(numAnswer))
-               probeDNSAuthorityRRSGauge.Set(float64(numAuthority))
-               probeDNSAdditionalRRSGauge.Set(float64(numAdditional))
-       }()
-
        var ip *net.IPAddr
        if module.DNS.TransportProtocol == "" {
                module.DNS.TransportProtocol = "udp"
@@ -123,7 +118,7 @@ func ProbeDNS(ctx context.Context, target string, module config.Module, registry
                        port = "53"
                        targetAddr = target
                }
-               ip, err = chooseProtocol(module.DNS.PreferredIPProtocol, targetAddr, registry)
+               ip, err = chooseProtocol(module.DNS.PreferredIPProtocol, targetAddr, registry, logger)
                if err != nil {
                        level.Error(logger).Log("msg", "Error resolving address", "err", err)
                        return false
@@ -154,30 +149,36 @@ func ProbeDNS(ctx context.Context, target string, module config.Module, registry
        msg := new(dns.Msg)
        msg.SetQuestion(dns.Fqdn(module.DNS.QueryName), qt)
 
+       level.Info(logger).Log("msg", "Making DNS query", "target", target, "dial_protocol", dialProtocol, "query", module.DNS.QueryName, "type", qt)
        timeoutDeadline, _ := ctx.Deadline()
        client.Timeout = timeoutDeadline.Sub(time.Now())
        response, _, err := client.Exchange(msg, target)
        if err != nil {
-               level.Warn(logger).Log("msg", "Error while sending a DNS query", "err", err)
+               level.Error(logger).Log("msg", "Error while sending a DNS query", "err", err)
                return false
        }
-       level.Debug(logger).Log("msg", "Got response: %#v", response)
+       level.Info(logger).Log("msg", "Got response", "response", response)
 
-       numAnswer, numAuthority, numAdditional = len(response.Answer), len(response.Ns), len(response.Extra)
+       probeDNSAnswerRRSGauge.Set(float64(len(response.Answer)))
+       probeDNSAuthorityRRSGauge.Set(float64(len(response.Ns)))
+       probeDNSAdditionalRRSGauge.Set(float64(len(response.Extra)))
 
        if !validRcode(response.Rcode, module.DNS.ValidRcodes, logger) {
                return false
        }
+       level.Info(logger).Log("msg", "Validating Answer RRs")
        if !validRRs(&response.Answer, &module.DNS.ValidateAnswer, logger) {
-               level.Debug(logger).Log("msg", "Answer RRs validation failed")
+               level.Error(logger).Log("msg", "Answer RRs validation failed")
                return false
        }
+       level.Info(logger).Log("msg", "Validating Authority RRs")
        if !validRRs(&response.Ns, &module.DNS.ValidateAuthority, logger) {
-               level.Debug(logger).Log("msg", "Authority RRs validation failed")
+               level.Error(logger).Log("msg", "Authority RRs validation failed")
                return false
        }
+       level.Info(logger).Log("msg", "Validating Additional RRs")
        if !validRRs(&response.Extra, &module.DNS.ValidateAdditional, logger) {
-               level.Debug(logger).Log("msg", "Additional RRs validation failed")
+               level.Error(logger).Log("msg", "Additional RRs validation failed")
                return false
        }
        return true
index 7bc284b0858c5e1e3dfd1389b98ac15a6ac8c270..d03566061ec96c5254ab396e26356f875493961f 100644 (file)
@@ -16,6 +16,7 @@ package prober
 import (
        "context"
        "errors"
+       "fmt"
        "io"
        "io/ioutil"
        "net"
@@ -46,6 +47,7 @@ func matchRegularExpressions(reader io.Reader, httpConfig config.HTTPProbe, logg
                        return false
                }
                if re.Match(body) {
+                       level.Error(logger).Log("msg", "Body matched regular expression", "regexp", expression)
                        return false
                }
        }
@@ -56,6 +58,7 @@ func matchRegularExpressions(reader io.Reader, httpConfig config.HTTPProbe, logg
                        return false
                }
                if !re.Match(body) {
+                       level.Error(logger).Log("msg", "Body did not match regular expression", "regexp", expression)
                        return false
                }
        }
@@ -116,6 +119,7 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
 
        targetURL, err := url.Parse(target)
        if err != nil {
+               level.Error(logger).Log("msg", "Could not parse target URL", "err", err)
                return false
        }
        targetHost, targetPort, err := net.SplitHostPort(targetURL.Host)
@@ -124,8 +128,9 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
                targetHost = targetURL.Host
        }
 
-       ip, err := chooseProtocol(module.HTTP.PreferredIPProtocol, targetHost, registry)
+       ip, err := chooseProtocol(module.HTTP.PreferredIPProtocol, targetHost, registry, logger)
        if err != nil {
+               level.Error(logger).Log("msg", "Error resolving address", "err", err)
                return false
        }
 
@@ -137,9 +142,11 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
                return false
        }
 
-       client.CheckRedirect = func(_ *http.Request, via []*http.Request) error {
+       client.CheckRedirect = func(r *http.Request, via []*http.Request) error {
+               level.Info(logger).Log("msg", "Received redirect", "url", r.URL.String())
                redirects = len(via)
                if redirects > 10 || httpConfig.NoFollowRedirects {
+                       level.Info(logger).Log("msg", "Not following redirect")
                        return errors.New("Don't follow redirects")
                }
                return nil
@@ -175,13 +182,14 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
        if httpConfig.Body != "" {
                request.Body = ioutil.NopCloser(strings.NewReader(httpConfig.Body))
        }
+       level.Info(logger).Log("msg", "Making HTTP request", "url", request.URL.String())
        resp, err := client.Do(request)
        // Err won't be nil if redirects were turned off. See https://github.com/golang/go/issues/3795
        if err != nil && resp == nil {
                level.Error(logger).Log("msg", "Error for HTTP request", "err", err)
-               success = false
        } else {
                defer resp.Body.Close()
+               level.Info(logger).Log("msg", "Received HTTP response", "status_code", resp.StatusCode)
                if len(httpConfig.ValidStatusCodes) != 0 {
                        for _, code := range httpConfig.ValidStatusCodes {
                                if resp.StatusCode == code {
@@ -189,8 +197,14 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
                                        break
                                }
                        }
+                       if !success {
+                               level.Info(logger).Log("msg", "Invalid HTTP response status code", "status_code", resp.StatusCode,
+                                       "valid_status_codes", fmt.Sprintf("%v", httpConfig.ValidStatusCodes))
+                       }
                } else if 200 <= resp.StatusCode && resp.StatusCode < 300 {
                        success = true
+               } else {
+                       level.Info(logger).Log("msg", "Invalid HTTP response status code, wanted 2xx", "status_code", resp.StatusCode)
                }
 
                if success && (len(httpConfig.FailIfMatchesRegexp) > 0 || len(httpConfig.FailIfNotMatchesRegexp) > 0) {
@@ -218,6 +232,7 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
                                }
                        }
                        if !found {
+                               level.Error(logger).Log("msg", "Invalid HTTP version number", "version", httpVersionNumber)
                                success = false
                        }
                }
@@ -233,9 +248,11 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr
                registry.MustRegister(probeSSLEarliestCertExpiryGauge)
                probeSSLEarliestCertExpiryGauge.Set(float64(getEarliestCertExpiry(resp.TLS).UnixNano() / 1e9))
                if httpConfig.FailIfSSL {
+                       level.Error(logger).Log("msg", "Final request was over SSL")
                        success = false
                }
        } else if httpConfig.FailIfNotSSL {
+               level.Error(logger).Log("msg", "Final request was not over SSL")
                success = false
        }
 
index d62c20edf79e713a13d507ae9ef5fb6a7493766c..052cae54e64f7567a574a7778084525b9996d9d2 100644 (file)
@@ -52,12 +52,13 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr
        timeoutDeadline, _ := ctx.Deadline()
        deadline := time.Now().Add(timeoutDeadline.Sub(time.Now()))
 
-       ip, err := chooseProtocol(module.ICMP.PreferredIPProtocol, target, registry)
+       ip, err := chooseProtocol(module.ICMP.PreferredIPProtocol, target, registry, logger)
        if err != nil {
                level.Warn(logger).Log("msg", "Error resolving address", "err", err)
                return false
        }
 
+       level.Info(logger).Log("msg", "Creating socket")
        if ip.IP.To4() == nil {
                requestType = ipv6.ICMPTypeEchoRequest
                replyType = ipv6.ICMPTypeEchoReply
@@ -74,14 +75,16 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr
        }
        defer socket.Close()
 
+       body := &icmp.Echo{
+               ID:   os.Getpid() & 0xffff,
+               Seq:  int(getICMPSequence()),
+               Data: []byte("Prometheus Blackbox Exporter"),
+       }
+       level.Info(logger).Log("msg", "Creating ICMP packet", "seq", body.Seq, "id", body.ID)
        wm := icmp.Message{
                Type: requestType,
                Code: 0,
-               Body: &icmp.Echo{
-                       ID:   os.Getpid() & 0xffff,
-                       Seq:  int(getICMPSequence()),
-                       Data: []byte("Prometheus Blackbox Exporter"),
-               },
+               Body: body,
        }
 
        wb, err := wm.Marshal(nil)
@@ -89,6 +92,7 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr
                level.Error(logger).Log("msg", "Error marshalling packet", "err", err)
                return
        }
+       level.Info(logger).Log("msg", "Writing out packet")
        if _, err = socket.WriteTo(wb, ip); err != nil {
                level.Warn(logger).Log("msg", "Error writing to socket", "err", err)
                return
@@ -107,11 +111,12 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr
                level.Error(logger).Log("msg", "Error setting socket deadline", "err", err)
                return
        }
+       level.Info(logger).Log("msg", "Waiting for reply packets")
        for {
                n, peer, err := socket.ReadFrom(rb)
                if err != nil {
                        if nerr, ok := err.(net.Error); ok && nerr.Timeout() {
-                               level.Warn(logger).Log("msg", "Timeout reading from socket for", "err", err)
+                               level.Warn(logger).Log("msg", "Timeout reading from socket", "err", err)
                                return
                        }
                        level.Error(logger).Log("msg", "Error reading from socket", "err", err)
@@ -126,6 +131,7 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr
                        rb[3] = 0
                }
                if bytes.Compare(rb[:n], wb) == 0 {
+                       level.Info(logger).Log("msg", "Found matching reply packet")
                        return true
                }
        }
index e5bf716621f39334300135306b51ddb86b0bf196..3e64410caef32205d2232ce7439fcc77eb85fcc8 100644 (file)
@@ -38,7 +38,7 @@ func dialTCP(ctx context.Context, target string, module config.Module, registry
                return nil, err
        }
 
-       ip, err := chooseProtocol(module.TCP.PreferredIPProtocol, targetAddress, registry)
+       ip, err := chooseProtocol(module.TCP.PreferredIPProtocol, targetAddress, registry, logger)
        if err != nil {
                level.Error(logger).Log("msg", "Error resolving address", "err", err)
                return nil, err
@@ -52,6 +52,7 @@ func dialTCP(ctx context.Context, target string, module config.Module, registry
        dialTarget = net.JoinHostPort(ip.String(), port)
 
        if !module.TCP.TLS {
+               level.Info(logger).Log("msg", "Dialing TCP without TLS")
                return dialer.DialContext(ctx, dialProtocol, dialTarget)
        }
        tlsConfig, err := pconfig.NewTLSConfig(&module.TCP.TLSConfig)
@@ -62,6 +63,7 @@ func dialTCP(ctx context.Context, target string, module config.Module, registry
        timeoutDeadline, _ := ctx.Deadline()
        dialer.Deadline = timeoutDeadline
 
+       level.Info(logger).Log("msg", "Dialing TCP with TLS")
        return tls.DialWithDialer(dialer, dialProtocol, dialTarget, tlsConfig)
 }
 
@@ -76,12 +78,14 @@ func ProbeTCP(ctx context.Context, target string, module config.Module, registry
        })
        registry.MustRegister(probeFailedDueToRegex)
        deadline, _ := ctx.Deadline()
+
        conn, err := dialTCP(ctx, target, module, registry, logger)
        if err != nil {
                level.Error(logger).Log("msg", "Error dialing TCP", "err", err)
                return false
        }
        defer conn.Close()
+       level.Info(logger).Log("msg", "Successfully dialed")
 
        // Set a deadline to prevent the following code from blocking forever.
        // If a deadline cannot be set, better fail the probe by returning an error
@@ -97,7 +101,7 @@ func ProbeTCP(ctx context.Context, target string, module config.Module, registry
        }
        scanner := bufio.NewScanner(conn)
        for i, qr := range module.TCP.QueryResponse {
-               level.Debug(logger).Log("msg", "Processing query response entry", "entry_number", i)
+               level.Info(logger).Log("msg", "Processing query response entry", "entry_number", i)
                send := qr.Send
                if qr.Expect != "" {
                        re, err := regexp.Compile(qr.Expect)
@@ -111,7 +115,7 @@ func ProbeTCP(ctx context.Context, target string, module config.Module, registry
                                level.Debug(logger).Log("msg", "Read line", "line", scanner.Text())
                                match = re.FindSubmatchIndex(scanner.Bytes())
                                if match != nil {
-                                       level.Debug(logger).Log("msg", "Regexp matched", "regexp", re, "line", scanner.Text())
+                                       level.Info(logger).Log("msg", "Regexp matched", "regexp", re, "line", scanner.Text())
                                        break
                                }
                        }
@@ -121,14 +125,16 @@ func ProbeTCP(ctx context.Context, target string, module config.Module, registry
                        }
                        if match == nil {
                                probeFailedDueToRegex.Set(1)
+                               level.Error(logger).Log("msg", "Regexp did not match", "regexp", re, "line", scanner.Text())
                                return false
                        }
                        probeFailedDueToRegex.Set(0)
                        send = string(re.Expand(nil, []byte(send), scanner.Bytes(), match))
                }
                if send != "" {
-                       level.Debug(logger).Log("msg", "Sending %q", send)
+                       level.Debug(logger).Log("msg", "Sending line", "line", send)
                        if _, err := fmt.Fprintf(conn, "%s\n", send); err != nil {
+                               level.Error(logger).Log("msg", "Failed to send", "err", err)
                                return false
                        }
                }
index fb3890c2917677a846b0c9b462cc8d068770ac04..42a93ba4842f73d15c0d9e1faa3ca5ab9ec788bf 100644 (file)
@@ -4,11 +4,14 @@ import (
        "net"
        "time"
 
+       "github.com/go-kit/kit/log"
+       "github.com/go-kit/kit/log/level"
+
        "github.com/prometheus/client_golang/prometheus"
 )
 
 // Returns the preferedIPProtocol, the dialProtocol, and sets the probeIPProtocolGauge.
-func chooseProtocol(preferredIPProtocol string, target string, registry *prometheus.Registry) (*net.IPAddr, error) {
+func chooseProtocol(preferredIPProtocol string, target string, registry *prometheus.Registry, logger log.Logger) (*net.IPAddr, error) {
        var fallbackProtocol string
 
        probeDNSLookupTimeSeconds := prometheus.NewGauge(prometheus.GaugeOpts{
@@ -37,12 +40,14 @@ func chooseProtocol(preferredIPProtocol string, target string, registry *prometh
                fallbackProtocol = "ip6"
        }
 
+       level.Info(logger).Log("msg", "Resolving target address", "preferred_ip_protocol", preferredIPProtocol)
        resolveStart := time.Now()
 
        defer probeDNSLookupTimeSeconds.Add(time.Since(resolveStart).Seconds())
 
        ip, err := net.ResolveIPAddr(preferredIPProtocol, target)
        if err != nil {
+               level.Warn(logger).Log("msg", "Resolution with preferred IP protocol failed, attempting fallback protocol", "fallback_protocol", fallbackProtocol, "err", err)
                ip, err = net.ResolveIPAddr(fallbackProtocol, target)
                if err != nil {
                        return ip, err
@@ -55,6 +60,6 @@ func chooseProtocol(preferredIPProtocol string, target string, registry *prometh
                probeIPProtocolGauge.Set(4)
        }
 
+       level.Info(logger).Log("msg", "Resolved target address", "ip", ip)
        return ip, nil
-
 }