From 7bd393321504f66c551e1e49cc65ee09d95e4136 Mon Sep 17 00:00:00 2001 From: "Codex, agent ChatGPT" Date: Thu, 12 Mar 2026 02:00:41 +0100 Subject: [PATCH] Add lazy IP enrichment and bot hints --- internal/investigation/service.go | 42 ++++++++++++++++++++++++++ internal/investigation/service_test.go | 38 +++++++++++++++++++++++ internal/web/handler.go | 2 +- 3 files changed, 81 insertions(+), 1 deletion(-) diff --git a/internal/investigation/service.go b/internal/investigation/service.go index ad79215..73dd237 100644 --- a/internal/investigation/service.go +++ b/internal/investigation/service.go @@ -139,6 +139,9 @@ func (s *Service) Investigate(ctx context.Context, ip string, userAgents []strin investigation.ReverseDNS = reverseDNSInfo return investigation, nil } + if hint := detectBotHint(userAgents); hint != nil { + investigation.Bot = hint + } warnings := make([]string, 0, 2) if reverseDNSInfo == nil { @@ -599,6 +602,45 @@ func normalizeUserAgents(userAgents []string) []string { return uniqueStrings(items) } +func detectBotHint(userAgents []string) *model.BotMatch { + for _, userAgent := range userAgents { + name := extractBotHintName(userAgent) + if name == "" { + continue + } + return &model.BotMatch{ + ProviderID: strings.ToLower(name), + Name: name, + Icon: "🤖", + Method: "user_agent_hint", + Verified: false, + } + } + return nil +} + +func extractBotHintName(userAgent string) string { + parts := strings.FieldsFunc(userAgent, func(value rune) bool { + switch value { + case ' ', ';', '(', ')', ',', '\t': + return true + default: + return false + } + }) + for _, part := range parts { + base := strings.TrimSpace(strings.SplitN(part, "/", 2)[0]) + if base == "" { + continue + } + normalized := strings.ToLower(base) + if strings.Contains(normalized, "bot") || strings.Contains(normalized, "crawler") || strings.Contains(normalized, "spider") || strings.Contains(normalized, "slurp") || strings.Contains(normalized, "fetcher") || strings.Contains(normalized, "indexer") || strings.Contains(normalized, "preview") || strings.Contains(normalized, "externalhit") { + return base + } + } + return "" +} + func extractPrefix(payload map[string]any) string { items, ok := payload["cidr0_cidrs"].([]any) if !ok { diff --git a/internal/investigation/service_test.go b/internal/investigation/service_test.go index 95469a6..841f585 100644 --- a/internal/investigation/service_test.go +++ b/internal/investigation/service_test.go @@ -138,6 +138,44 @@ func TestInvestigateLoadsRegistrationAndSpamhausForNonBot(t *testing.T) { } } +func TestInvestigateAddsBotHintFromUserAgent(t *testing.T) { + t.Parallel() + + serverURL := "" + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/bootstrap-v4.json": + _, _ = w.Write([]byte(`{"services":[[["216.73.216.0/22"],["` + serverURL + `/rdap/"]]]}`)) + case "/rdap/ip/216.73.216.112": + _, _ = w.Write([]byte(`{"handle":"NET-216-73-216-0-1","name":"AWS-ANTHROPIC","entities":[{"roles":["abuse"],"vcardArray":["vcard",[["fn",{},"text","Anthropic, PBC"]]]}]}`)) + default: + http.NotFound(w, r) + } + })) + serverURL = server.URL + defer server.Close() + + svc := newService( + config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true}, + server.Client(), + &fakeResolver{}, + log.New(testWriter{t}, "", 0), + nil, + map[string]string{"ipv4": server.URL + "/bootstrap-v4.json", "ipv6": server.URL + "/bootstrap-v6.json"}, + ) + + investigation, err := svc.Investigate(context.Background(), "216.73.216.112", []string{"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)"}) + if err != nil { + t.Fatalf("investigate ip: %v", err) + } + if investigation.Bot == nil || investigation.Bot.Name != "ClaudeBot" || investigation.Bot.Verified { + t.Fatalf("expected unverified ClaudeBot hint, got %+v", investigation.Bot) + } + if investigation.Registration == nil || investigation.Registration.Organization != "Anthropic, PBC" { + t.Fatalf("expected registration enrichment to continue after bot hint, got %+v", investigation.Registration) + } +} + func TestPublishedNetworksAreCached(t *testing.T) { t.Parallel() diff --git a/internal/web/handler.go b/internal/web/handler.go index 3cbc23f..3ae9427 100644 --- a/internal/web/handler.go +++ b/internal/web/handler.go @@ -579,7 +579,7 @@ const ipDetailsHTML = ` } const rows = []; if (investigation.bot) { - rows.push('
Bot: ' + escapeHtml(investigation.bot.icon || '🤖') + ' ' + escapeHtml(investigation.bot.name) + ' via ' + escapeHtml(investigation.bot.method) + '
'); + rows.push('
' + (investigation.bot.verified ? 'Bot' : 'Possible bot') + ': ' + escapeHtml(investigation.bot.icon || '🤖') + ' ' + escapeHtml(investigation.bot.name) + ' via ' + escapeHtml(investigation.bot.method) + (investigation.bot.verified ? '' : ' (not verified)') + '
'); } else { rows.push('
Bot: no verified bot match
'); }