From f8290c43c54e449899070c019aa723641c82da19 Mon Sep 17 00:00:00 2001 From: Brian Brazil Date: Mon, 4 Sep 2017 17:51:41 +0100 Subject: [PATCH] Expand logging for probes. Now that we have somewhere to put logs other than the general application logs, we can provide much more information. --- main.go | 19 +++++++++++++------ prober/dns.go | 41 +++++++++++++++++++++-------------------- prober/http.go | 23 ++++++++++++++++++++--- prober/icmp.go | 20 +++++++++++++------- prober/tcp.go | 14 ++++++++++---- prober/utils.go | 9 +++++++-- 6 files changed, 84 insertions(+), 42 deletions(-) diff --git a/main.go b/main.go index 2495d98..9adf6ac 100644 --- a/main.go +++ b/main.go @@ -85,7 +85,8 @@ func probeHandler(w http.ResponseWriter, r *http.Request, c *config.Config, logg if module.Timeout.Seconds() < timeoutSeconds && module.Timeout.Seconds() > 0 { timeoutSeconds = module.Timeout.Seconds() } - ctx, cancel := context.WithTimeout(context.Background(), time.Duration((timeoutSeconds-*timeoutOffset)*1e9)) + timeoutSeconds -= *timeoutOffset + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeoutSeconds*1e9)) defer cancel() r = r.WithContext(ctx) @@ -111,19 +112,20 @@ func probeHandler(w http.ResponseWriter, r *http.Request, c *config.Config, logg } sl := newScrapeLogger(logger, moduleName, target) - level.Info(sl).Log("msg", "Beginning probe", "probe", module.Prober) + level.Info(sl).Log("msg", "Beginning probe", "probe", module.Prober, "timeout_seconds", timeoutSeconds) start := time.Now() registry := prometheus.NewRegistry() registry.MustRegister(probeSuccessGauge) registry.MustRegister(probeDurationGauge) success := prober(ctx, target, module, registry, sl) - probeDurationGauge.Set(time.Since(start).Seconds()) + duration := time.Since(start).Seconds() + probeDurationGauge.Set(duration) if success { probeSuccessGauge.Set(1) - level.Info(sl).Log("msg", "Probe succeeded") + level.Info(sl).Log("msg", "Probe succeeded", "duration_seconds", duration) } else { - level.Error(sl).Log("msg", "Probe failed") + level.Error(sl).Log("msg", "Probe failed", "duration_seconds", duration) } debug := false @@ -139,11 +141,16 @@ func probeHandler(w http.ResponseWriter, r *http.Request, c *config.Config, logg mfs, err := registry.Gather() if err != nil { fmt.Fprintf(w, "Error gathering metrics: %s\n", err) - return } for _, mf := range mfs { expfmt.MetricFamilyToText(w, mf) } + fmt.Fprintf(w, "\n\n\nModule configuration:\n") + c, err := yaml.Marshal(module) + if err != nil { + fmt.Fprintf(w, "Error marshalling config: %s\n", err) + } + w.Write(c) return } diff --git a/prober/dns.go b/prober/dns.go index 2418a6a..217aeb1 100644 --- a/prober/dns.go +++ b/prober/dns.go @@ -32,27 +32,30 @@ func validRRs(rrs *[]dns.RR, v *config.DNSRRValidator, logger log.Logger) bool { // Fail the probe if there are no RRs of a given type, but a regexp match is required // (i.e. FailIfNotMatchesRegexp is set). if len(*rrs) == 0 && len(v.FailIfNotMatchesRegexp) > 0 { + level.Error(logger).Log("msg", "fail_if_not_matches_regexp specified but no RRs returned") return false } for _, rr := range *rrs { - level.Debug(logger).Log("msg", "Validating RR: %q", rr) + level.Info(logger).Log("msg", "Validating RR", "rr", rr) for _, re := range v.FailIfMatchesRegexp { match, err := regexp.MatchString(re, rr.String()) if err != nil { - level.Error(logger).Log("msg", "Error matching regexp %q: %s", re, err) + level.Error(logger).Log("msg", "Error matching regexp", "regexp", re, "err", err) return false } if match { + level.Error(logger).Log("msg", "RR matched regexp", "regexp", re, "rr", rr) return false } } for _, re := range v.FailIfNotMatchesRegexp { match, err := regexp.MatchString(re, rr.String()) if err != nil { - level.Error(logger).Log("msg", "Error matching regexp %q: %s", re, err) + level.Error(logger).Log("msg", "Error matching regexp", "regexp", re, "err", err) return false } if !match { + level.Error(logger).Log("msg", "RR did not match regexp", "regexp", re, "rr", rr) return false } } @@ -78,15 +81,15 @@ func validRcode(rcode int, valid []string, logger log.Logger) bool { } for _, rc := range validRcodes { if rcode == rc { + level.Info(logger).Log("msg", "Rcode is valid", "rcode", rcode, "string_rcode", dns.RcodeToString[rcode]) return true } } - level.Error(logger).Log("msg", "Rcode is not one of the valid rcodes", "string_rcode", dns.RcodeToString[rcode], "rcode", rcode, "valid_rcodes", validRcodes) + level.Error(logger).Log("msg", "Rcode is not one of the valid rcodes", "rcode", rcode, "string_rcode", dns.RcodeToString[rcode], "valid_rcodes", validRcodes) return false } func ProbeDNS(ctx context.Context, target string, module config.Module, registry *prometheus.Registry, logger log.Logger) bool { - var numAnswer, numAuthority, numAdditional int var dialProtocol string probeDNSAnswerRRSGauge := prometheus.NewGauge(prometheus.GaugeOpts{ Name: "probe_dns_answer_rrs", @@ -104,14 +107,6 @@ func ProbeDNS(ctx context.Context, target string, module config.Module, registry registry.MustRegister(probeDNSAuthorityRRSGauge) registry.MustRegister(probeDNSAdditionalRRSGauge) - defer func() { - // These metrics can be used to build additional alerting based on the number of replies. - // They should be returned even in case of errors. - probeDNSAnswerRRSGauge.Set(float64(numAnswer)) - probeDNSAuthorityRRSGauge.Set(float64(numAuthority)) - probeDNSAdditionalRRSGauge.Set(float64(numAdditional)) - }() - var ip *net.IPAddr if module.DNS.TransportProtocol == "" { module.DNS.TransportProtocol = "udp" @@ -123,7 +118,7 @@ func ProbeDNS(ctx context.Context, target string, module config.Module, registry port = "53" targetAddr = target } - ip, err = chooseProtocol(module.DNS.PreferredIPProtocol, targetAddr, registry) + ip, err = chooseProtocol(module.DNS.PreferredIPProtocol, targetAddr, registry, logger) if err != nil { level.Error(logger).Log("msg", "Error resolving address", "err", err) return false @@ -154,30 +149,36 @@ func ProbeDNS(ctx context.Context, target string, module config.Module, registry msg := new(dns.Msg) msg.SetQuestion(dns.Fqdn(module.DNS.QueryName), qt) + level.Info(logger).Log("msg", "Making DNS query", "target", target, "dial_protocol", dialProtocol, "query", module.DNS.QueryName, "type", qt) timeoutDeadline, _ := ctx.Deadline() client.Timeout = timeoutDeadline.Sub(time.Now()) response, _, err := client.Exchange(msg, target) if err != nil { - level.Warn(logger).Log("msg", "Error while sending a DNS query", "err", err) + level.Error(logger).Log("msg", "Error while sending a DNS query", "err", err) return false } - level.Debug(logger).Log("msg", "Got response: %#v", response) + level.Info(logger).Log("msg", "Got response", "response", response) - numAnswer, numAuthority, numAdditional = len(response.Answer), len(response.Ns), len(response.Extra) + probeDNSAnswerRRSGauge.Set(float64(len(response.Answer))) + probeDNSAuthorityRRSGauge.Set(float64(len(response.Ns))) + probeDNSAdditionalRRSGauge.Set(float64(len(response.Extra))) if !validRcode(response.Rcode, module.DNS.ValidRcodes, logger) { return false } + level.Info(logger).Log("msg", "Validating Answer RRs") if !validRRs(&response.Answer, &module.DNS.ValidateAnswer, logger) { - level.Debug(logger).Log("msg", "Answer RRs validation failed") + level.Error(logger).Log("msg", "Answer RRs validation failed") return false } + level.Info(logger).Log("msg", "Validating Authority RRs") if !validRRs(&response.Ns, &module.DNS.ValidateAuthority, logger) { - level.Debug(logger).Log("msg", "Authority RRs validation failed") + level.Error(logger).Log("msg", "Authority RRs validation failed") return false } + level.Info(logger).Log("msg", "Validating Additional RRs") if !validRRs(&response.Extra, &module.DNS.ValidateAdditional, logger) { - level.Debug(logger).Log("msg", "Additional RRs validation failed") + level.Error(logger).Log("msg", "Additional RRs validation failed") return false } return true diff --git a/prober/http.go b/prober/http.go index 7bc284b..d035660 100644 --- a/prober/http.go +++ b/prober/http.go @@ -16,6 +16,7 @@ package prober import ( "context" "errors" + "fmt" "io" "io/ioutil" "net" @@ -46,6 +47,7 @@ func matchRegularExpressions(reader io.Reader, httpConfig config.HTTPProbe, logg return false } if re.Match(body) { + level.Error(logger).Log("msg", "Body matched regular expression", "regexp", expression) return false } } @@ -56,6 +58,7 @@ func matchRegularExpressions(reader io.Reader, httpConfig config.HTTPProbe, logg return false } if !re.Match(body) { + level.Error(logger).Log("msg", "Body did not match regular expression", "regexp", expression) return false } } @@ -116,6 +119,7 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr targetURL, err := url.Parse(target) if err != nil { + level.Error(logger).Log("msg", "Could not parse target URL", "err", err) return false } targetHost, targetPort, err := net.SplitHostPort(targetURL.Host) @@ -124,8 +128,9 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr targetHost = targetURL.Host } - ip, err := chooseProtocol(module.HTTP.PreferredIPProtocol, targetHost, registry) + ip, err := chooseProtocol(module.HTTP.PreferredIPProtocol, targetHost, registry, logger) if err != nil { + level.Error(logger).Log("msg", "Error resolving address", "err", err) return false } @@ -137,9 +142,11 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr return false } - client.CheckRedirect = func(_ *http.Request, via []*http.Request) error { + client.CheckRedirect = func(r *http.Request, via []*http.Request) error { + level.Info(logger).Log("msg", "Received redirect", "url", r.URL.String()) redirects = len(via) if redirects > 10 || httpConfig.NoFollowRedirects { + level.Info(logger).Log("msg", "Not following redirect") return errors.New("Don't follow redirects") } return nil @@ -175,13 +182,14 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr if httpConfig.Body != "" { request.Body = ioutil.NopCloser(strings.NewReader(httpConfig.Body)) } + level.Info(logger).Log("msg", "Making HTTP request", "url", request.URL.String()) resp, err := client.Do(request) // Err won't be nil if redirects were turned off. See https://github.com/golang/go/issues/3795 if err != nil && resp == nil { level.Error(logger).Log("msg", "Error for HTTP request", "err", err) - success = false } else { defer resp.Body.Close() + level.Info(logger).Log("msg", "Received HTTP response", "status_code", resp.StatusCode) if len(httpConfig.ValidStatusCodes) != 0 { for _, code := range httpConfig.ValidStatusCodes { if resp.StatusCode == code { @@ -189,8 +197,14 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr break } } + if !success { + level.Info(logger).Log("msg", "Invalid HTTP response status code", "status_code", resp.StatusCode, + "valid_status_codes", fmt.Sprintf("%v", httpConfig.ValidStatusCodes)) + } } else if 200 <= resp.StatusCode && resp.StatusCode < 300 { success = true + } else { + level.Info(logger).Log("msg", "Invalid HTTP response status code, wanted 2xx", "status_code", resp.StatusCode) } if success && (len(httpConfig.FailIfMatchesRegexp) > 0 || len(httpConfig.FailIfNotMatchesRegexp) > 0) { @@ -218,6 +232,7 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr } } if !found { + level.Error(logger).Log("msg", "Invalid HTTP version number", "version", httpVersionNumber) success = false } } @@ -233,9 +248,11 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr registry.MustRegister(probeSSLEarliestCertExpiryGauge) probeSSLEarliestCertExpiryGauge.Set(float64(getEarliestCertExpiry(resp.TLS).UnixNano() / 1e9)) if httpConfig.FailIfSSL { + level.Error(logger).Log("msg", "Final request was over SSL") success = false } } else if httpConfig.FailIfNotSSL { + level.Error(logger).Log("msg", "Final request was not over SSL") success = false } diff --git a/prober/icmp.go b/prober/icmp.go index d62c20e..052cae5 100644 --- a/prober/icmp.go +++ b/prober/icmp.go @@ -52,12 +52,13 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr timeoutDeadline, _ := ctx.Deadline() deadline := time.Now().Add(timeoutDeadline.Sub(time.Now())) - ip, err := chooseProtocol(module.ICMP.PreferredIPProtocol, target, registry) + ip, err := chooseProtocol(module.ICMP.PreferredIPProtocol, target, registry, logger) if err != nil { level.Warn(logger).Log("msg", "Error resolving address", "err", err) return false } + level.Info(logger).Log("msg", "Creating socket") if ip.IP.To4() == nil { requestType = ipv6.ICMPTypeEchoRequest replyType = ipv6.ICMPTypeEchoReply @@ -74,14 +75,16 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr } defer socket.Close() + body := &icmp.Echo{ + ID: os.Getpid() & 0xffff, + Seq: int(getICMPSequence()), + Data: []byte("Prometheus Blackbox Exporter"), + } + level.Info(logger).Log("msg", "Creating ICMP packet", "seq", body.Seq, "id", body.ID) wm := icmp.Message{ Type: requestType, Code: 0, - Body: &icmp.Echo{ - ID: os.Getpid() & 0xffff, - Seq: int(getICMPSequence()), - Data: []byte("Prometheus Blackbox Exporter"), - }, + Body: body, } wb, err := wm.Marshal(nil) @@ -89,6 +92,7 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr level.Error(logger).Log("msg", "Error marshalling packet", "err", err) return } + level.Info(logger).Log("msg", "Writing out packet") if _, err = socket.WriteTo(wb, ip); err != nil { level.Warn(logger).Log("msg", "Error writing to socket", "err", err) return @@ -107,11 +111,12 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr level.Error(logger).Log("msg", "Error setting socket deadline", "err", err) return } + level.Info(logger).Log("msg", "Waiting for reply packets") for { n, peer, err := socket.ReadFrom(rb) if err != nil { if nerr, ok := err.(net.Error); ok && nerr.Timeout() { - level.Warn(logger).Log("msg", "Timeout reading from socket for", "err", err) + level.Warn(logger).Log("msg", "Timeout reading from socket", "err", err) return } level.Error(logger).Log("msg", "Error reading from socket", "err", err) @@ -126,6 +131,7 @@ func ProbeICMP(ctx context.Context, target string, module config.Module, registr rb[3] = 0 } if bytes.Compare(rb[:n], wb) == 0 { + level.Info(logger).Log("msg", "Found matching reply packet") return true } } diff --git a/prober/tcp.go b/prober/tcp.go index e5bf716..3e64410 100644 --- a/prober/tcp.go +++ b/prober/tcp.go @@ -38,7 +38,7 @@ func dialTCP(ctx context.Context, target string, module config.Module, registry return nil, err } - ip, err := chooseProtocol(module.TCP.PreferredIPProtocol, targetAddress, registry) + ip, err := chooseProtocol(module.TCP.PreferredIPProtocol, targetAddress, registry, logger) if err != nil { level.Error(logger).Log("msg", "Error resolving address", "err", err) return nil, err @@ -52,6 +52,7 @@ func dialTCP(ctx context.Context, target string, module config.Module, registry dialTarget = net.JoinHostPort(ip.String(), port) if !module.TCP.TLS { + level.Info(logger).Log("msg", "Dialing TCP without TLS") return dialer.DialContext(ctx, dialProtocol, dialTarget) } tlsConfig, err := pconfig.NewTLSConfig(&module.TCP.TLSConfig) @@ -62,6 +63,7 @@ func dialTCP(ctx context.Context, target string, module config.Module, registry timeoutDeadline, _ := ctx.Deadline() dialer.Deadline = timeoutDeadline + level.Info(logger).Log("msg", "Dialing TCP with TLS") return tls.DialWithDialer(dialer, dialProtocol, dialTarget, tlsConfig) } @@ -76,12 +78,14 @@ func ProbeTCP(ctx context.Context, target string, module config.Module, registry }) registry.MustRegister(probeFailedDueToRegex) deadline, _ := ctx.Deadline() + conn, err := dialTCP(ctx, target, module, registry, logger) if err != nil { level.Error(logger).Log("msg", "Error dialing TCP", "err", err) return false } defer conn.Close() + level.Info(logger).Log("msg", "Successfully dialed") // Set a deadline to prevent the following code from blocking forever. // If a deadline cannot be set, better fail the probe by returning an error @@ -97,7 +101,7 @@ func ProbeTCP(ctx context.Context, target string, module config.Module, registry } scanner := bufio.NewScanner(conn) for i, qr := range module.TCP.QueryResponse { - level.Debug(logger).Log("msg", "Processing query response entry", "entry_number", i) + level.Info(logger).Log("msg", "Processing query response entry", "entry_number", i) send := qr.Send if qr.Expect != "" { re, err := regexp.Compile(qr.Expect) @@ -111,7 +115,7 @@ func ProbeTCP(ctx context.Context, target string, module config.Module, registry level.Debug(logger).Log("msg", "Read line", "line", scanner.Text()) match = re.FindSubmatchIndex(scanner.Bytes()) if match != nil { - level.Debug(logger).Log("msg", "Regexp matched", "regexp", re, "line", scanner.Text()) + level.Info(logger).Log("msg", "Regexp matched", "regexp", re, "line", scanner.Text()) break } } @@ -121,14 +125,16 @@ func ProbeTCP(ctx context.Context, target string, module config.Module, registry } if match == nil { probeFailedDueToRegex.Set(1) + level.Error(logger).Log("msg", "Regexp did not match", "regexp", re, "line", scanner.Text()) return false } probeFailedDueToRegex.Set(0) send = string(re.Expand(nil, []byte(send), scanner.Bytes(), match)) } if send != "" { - level.Debug(logger).Log("msg", "Sending %q", send) + level.Debug(logger).Log("msg", "Sending line", "line", send) if _, err := fmt.Fprintf(conn, "%s\n", send); err != nil { + level.Error(logger).Log("msg", "Failed to send", "err", err) return false } } diff --git a/prober/utils.go b/prober/utils.go index fb3890c..42a93ba 100644 --- a/prober/utils.go +++ b/prober/utils.go @@ -4,11 +4,14 @@ import ( "net" "time" + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/prometheus/client_golang/prometheus" ) // Returns the preferedIPProtocol, the dialProtocol, and sets the probeIPProtocolGauge. -func chooseProtocol(preferredIPProtocol string, target string, registry *prometheus.Registry) (*net.IPAddr, error) { +func chooseProtocol(preferredIPProtocol string, target string, registry *prometheus.Registry, logger log.Logger) (*net.IPAddr, error) { var fallbackProtocol string probeDNSLookupTimeSeconds := prometheus.NewGauge(prometheus.GaugeOpts{ @@ -37,12 +40,14 @@ func chooseProtocol(preferredIPProtocol string, target string, registry *prometh fallbackProtocol = "ip6" } + level.Info(logger).Log("msg", "Resolving target address", "preferred_ip_protocol", preferredIPProtocol) resolveStart := time.Now() defer probeDNSLookupTimeSeconds.Add(time.Since(resolveStart).Seconds()) ip, err := net.ResolveIPAddr(preferredIPProtocol, target) if err != nil { + level.Warn(logger).Log("msg", "Resolution with preferred IP protocol failed, attempting fallback protocol", "fallback_protocol", fallbackProtocol, "err", err) ip, err = net.ResolveIPAddr(fallbackProtocol, target) if err != nil { return ip, err @@ -55,6 +60,6 @@ func chooseProtocol(preferredIPProtocol string, target string, registry *prometh probeIPProtocolGauge.Set(4) } + level.Info(logger).Log("msg", "Resolved target address", "ip", ip) return ip, nil - } -- 2.25.1