2

Add lazy IP enrichment and bot hints

This commit is contained in:
2026-03-12 02:00:41 +01:00
parent c5e1c4ff36
commit 7bd3933215
3 changed files with 81 additions and 1 deletions

View File

@@ -139,6 +139,9 @@ func (s *Service) Investigate(ctx context.Context, ip string, userAgents []strin
investigation.ReverseDNS = reverseDNSInfo
return investigation, nil
}
if hint := detectBotHint(userAgents); hint != nil {
investigation.Bot = hint
}
warnings := make([]string, 0, 2)
if reverseDNSInfo == nil {
@@ -599,6 +602,45 @@ func normalizeUserAgents(userAgents []string) []string {
return uniqueStrings(items)
}
func detectBotHint(userAgents []string) *model.BotMatch {
for _, userAgent := range userAgents {
name := extractBotHintName(userAgent)
if name == "" {
continue
}
return &model.BotMatch{
ProviderID: strings.ToLower(name),
Name: name,
Icon: "🤖",
Method: "user_agent_hint",
Verified: false,
}
}
return nil
}
func extractBotHintName(userAgent string) string {
parts := strings.FieldsFunc(userAgent, func(value rune) bool {
switch value {
case ' ', ';', '(', ')', ',', '\t':
return true
default:
return false
}
})
for _, part := range parts {
base := strings.TrimSpace(strings.SplitN(part, "/", 2)[0])
if base == "" {
continue
}
normalized := strings.ToLower(base)
if strings.Contains(normalized, "bot") || strings.Contains(normalized, "crawler") || strings.Contains(normalized, "spider") || strings.Contains(normalized, "slurp") || strings.Contains(normalized, "fetcher") || strings.Contains(normalized, "indexer") || strings.Contains(normalized, "preview") || strings.Contains(normalized, "externalhit") {
return base
}
}
return ""
}
func extractPrefix(payload map[string]any) string {
items, ok := payload["cidr0_cidrs"].([]any)
if !ok {

View File

@@ -138,6 +138,44 @@ func TestInvestigateLoadsRegistrationAndSpamhausForNonBot(t *testing.T) {
}
}
func TestInvestigateAddsBotHintFromUserAgent(t *testing.T) {
t.Parallel()
serverURL := ""
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/bootstrap-v4.json":
_, _ = w.Write([]byte(`{"services":[[["216.73.216.0/22"],["` + serverURL + `/rdap/"]]]}`))
case "/rdap/ip/216.73.216.112":
_, _ = w.Write([]byte(`{"handle":"NET-216-73-216-0-1","name":"AWS-ANTHROPIC","entities":[{"roles":["abuse"],"vcardArray":["vcard",[["fn",{},"text","Anthropic, PBC"]]]}]}`))
default:
http.NotFound(w, r)
}
}))
serverURL = server.URL
defer server.Close()
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
server.Client(),
&fakeResolver{},
log.New(testWriter{t}, "", 0),
nil,
map[string]string{"ipv4": server.URL + "/bootstrap-v4.json", "ipv6": server.URL + "/bootstrap-v6.json"},
)
investigation, err := svc.Investigate(context.Background(), "216.73.216.112", []string{"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)"})
if err != nil {
t.Fatalf("investigate ip: %v", err)
}
if investigation.Bot == nil || investigation.Bot.Name != "ClaudeBot" || investigation.Bot.Verified {
t.Fatalf("expected unverified ClaudeBot hint, got %+v", investigation.Bot)
}
if investigation.Registration == nil || investigation.Registration.Organization != "Anthropic, PBC" {
t.Fatalf("expected registration enrichment to continue after bot hint, got %+v", investigation.Registration)
}
}
func TestPublishedNetworksAreCached(t *testing.T) {
t.Parallel()