2

Harden verified bot detection

This commit is contained in:
2026-03-12 16:45:11 +01:00
parent 0bc2d2b689
commit b7943e69db
4 changed files with 265 additions and 17 deletions

View File

@@ -176,6 +176,147 @@ func TestInvestigateAddsBotHintFromUserAgent(t *testing.T) {
}
}
func TestParsePublishedNetworksSupportsCommentedGeofeed(t *testing.T) {
t.Parallel()
prefixes, err := parsePublishedNetworks([]byte(strings.Join([]string{
"# Publication date: Thu Mar 12 2026",
"31.13.78.0/24,NZ,NZ-AUK,Auckland,",
"2a03:2880:f061::/48,NZ,NZ-AUK,Auckland,",
"31.13.72.0/24,SE,,Bromma,",
"",
}, "\n")), "geofeed_csv", "https://example.test/geofeed")
if err != nil {
t.Fatalf("parse geofeed: %v", err)
}
if len(prefixes) != 3 {
t.Fatalf("expected 3 prefixes, got %d", len(prefixes))
}
got := []string{prefixes[0].String(), prefixes[1].String(), prefixes[2].String()}
want := []string{"31.13.78.0/24", "2a03:2880:f061::/48", "31.13.72.0/24"}
if strings.Join(got, ",") != strings.Join(want, ",") {
t.Fatalf("unexpected geofeed prefixes: got %v want %v", got, want)
}
}
func TestInvestigateRecognizesOpenAIBotViaEmbeddedUserAgentToken(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/gptbot.json" {
http.NotFound(w, r)
return
}
_, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"203.0.113.0/24"}]}`))
}))
defer server.Close()
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
server.Client(),
&fakeResolver{},
log.New(testWriter{t}, "", 0),
[]botProvider{{
ID: "openai_gptbot_official",
Name: "GPTBot",
Icon: "🤖",
SourceFormat: "json_prefixes",
CacheTTL: time.Hour,
IPRangeURLs: []string{server.URL + "/gptbot.json"},
UserAgentPrefixes: []string{
"gptbot",
},
}},
map[string]string{},
)
investigation, err := svc.Investigate(context.Background(), "203.0.113.10", []string{"Mozilla/5.0 (compatible; GPTBot/1.0; +https://openai.com/gptbot)"})
if err != nil {
t.Fatalf("investigate ip: %v", err)
}
if investigation.Bot == nil || investigation.Bot.Name != "GPTBot" || !investigation.Bot.Verified {
t.Fatalf("expected verified GPTBot match, got %+v", investigation.Bot)
}
if investigation.Bot.Method != "user_agent+published_ranges" {
t.Fatalf("expected combined method, got %+v", investigation.Bot)
}
}
func TestInvestigateRecognizesPerplexityBotViaEmbeddedUserAgentToken(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/perplexitybot.json" {
http.NotFound(w, r)
return
}
_, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"198.51.100.0/24"}]}`))
}))
defer server.Close()
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
server.Client(),
&fakeResolver{},
log.New(testWriter{t}, "", 0),
[]botProvider{{
ID: "perplexitybot_official",
Name: "PerplexityBot",
Icon: "🤖",
SourceFormat: "json_prefixes",
CacheTTL: time.Hour,
IPRangeURLs: []string{server.URL + "/perplexitybot.json"},
UserAgentPrefixes: []string{
"perplexitybot",
},
}},
map[string]string{},
)
investigation, err := svc.Investigate(context.Background(), "198.51.100.42", []string{"Mozilla/5.0 (compatible; PerplexityBot/1.0; +https://www.perplexity.ai/perplexitybot)"})
if err != nil {
t.Fatalf("investigate ip: %v", err)
}
if investigation.Bot == nil || investigation.Bot.Name != "PerplexityBot" || !investigation.Bot.Verified {
t.Fatalf("expected verified PerplexityBot match, got %+v", investigation.Bot)
}
}
func TestInvestigateRecognizesYandexViaReverseDNS(t *testing.T) {
t.Parallel()
resolver := &fakeResolver{
reverse: map[string][]string{"203.0.113.55": {"spider-55.search.yandex.ru."}},
forward: map[string][]net.IPAddr{"spider-55.search.yandex.ru": {{IP: net.ParseIP("203.0.113.55")}}},
}
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
http.DefaultClient,
resolver,
log.New(testWriter{t}, "", 0),
[]botProvider{{
ID: "yandex_official",
Name: "YandexBot",
Icon: "🤖",
CacheTTL: time.Hour,
ReverseDNSSuffixes: []string{".yandex.ru", ".yandex.net", ".yandex.com"},
}},
map[string]string{},
)
investigation, err := svc.Investigate(context.Background(), "203.0.113.55", []string{"Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)"})
if err != nil {
t.Fatalf("investigate ip: %v", err)
}
if investigation.Bot == nil || investigation.Bot.Name != "YandexBot" || !investigation.Bot.Verified {
t.Fatalf("expected verified YandexBot match, got %+v", investigation.Bot)
}
if investigation.Bot.Method != "reverse_dns+fcrdns" {
t.Fatalf("expected reverse DNS verification, got %+v", investigation.Bot)
}
}
func TestPublishedNetworksAreCached(t *testing.T) {
t.Parallel()