2
Files

409 lines
13 KiB
Go

package investigation
import (
"context"
"log"
"net"
"net/http"
"net/http/httptest"
"net/netip"
"strings"
"sync"
"testing"
"time"
"git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/config"
)
func TestInvestigateRecognizesBotViaPublishedRanges(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/ranges.json" {
http.NotFound(w, r)
return
}
_, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"203.0.113.0/24"}]}`))
}))
defer server.Close()
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
server.Client(),
&fakeResolver{},
log.New(testWriter{t}, "", 0),
[]botProvider{{
ID: "google_official",
Name: "Googlebot",
Icon: "🤖",
SourceFormat: "json_prefixes",
CacheTTL: time.Hour,
IPRangeURLs: []string{server.URL + "/ranges.json"},
}},
map[string]string{"ipv4": server.URL + "/bootstrap-v4.json", "ipv6": server.URL + "/bootstrap-v6.json"},
)
investigation, err := svc.Investigate(context.Background(), "203.0.113.10", []string{"Mozilla/5.0"})
if err != nil {
t.Fatalf("investigate ip: %v", err)
}
if investigation.Bot == nil || investigation.Bot.Name != "Googlebot" {
t.Fatalf("expected Googlebot match, got %+v", investigation)
}
if investigation.Registration != nil || investigation.Reputation != nil {
t.Fatalf("expected bot investigation to stop before deep lookups, got %+v", investigation)
}
}
func TestInvestigateRecognizesBotViaReverseDNS(t *testing.T) {
t.Parallel()
resolver := &fakeResolver{
reverse: map[string][]string{"198.51.100.20": {"crawl.search.example.test."}},
forward: map[string][]net.IPAddr{"crawl.search.example.test": {{IP: net.ParseIP("198.51.100.20")}}},
}
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
http.DefaultClient,
resolver,
log.New(testWriter{t}, "", 0),
[]botProvider{{
ID: "bing_official",
Name: "Bingbot",
Icon: "🤖",
CacheTTL: time.Hour,
ReverseDNSSuffixes: []string{".search.example.test"},
}},
map[string]string{},
)
investigation, err := svc.Investigate(context.Background(), "198.51.100.20", nil)
if err != nil {
t.Fatalf("investigate ip: %v", err)
}
if investigation.Bot == nil || investigation.Bot.Name != "Bingbot" || investigation.ReverseDNS == nil || !investigation.ReverseDNS.ForwardConfirmed {
t.Fatalf("expected reverse DNS bot match, got %+v", investigation)
}
}
func TestInvestigateLoadsRegistrationAndSpamhausForNonBot(t *testing.T) {
t.Parallel()
serverURL := ""
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/bootstrap-v4.json":
_, _ = w.Write([]byte(`{"services":[[["198.51.100.0/24"],["` + serverURL + `/rdap/"]]]}`))
case "/rdap/ip/198.51.100.30":
_, _ = w.Write([]byte(`{
"handle":"NET-198-51-100-0-1",
"name":"Example Network",
"country":"FR",
"cidr0_cidrs":[{"v4prefix":"198.51.100.0","length":24}],
"entities":[{"roles":["abuse"],"vcardArray":["vcard",[["email",{},"text","abuse@example.test"],["fn",{},"text","Example ISP"]]]}]
}`))
default:
http.NotFound(w, r)
}
}))
serverURL = server.URL
defer server.Close()
resolver := &fakeResolver{
hosts: map[string][]string{spamhausQuery(t, "198.51.100.30"): {"127.0.0.2"}},
}
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: 2 * time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
server.Client(),
resolver,
log.New(testWriter{t}, "", 0),
nil,
map[string]string{"ipv4": server.URL + "/bootstrap-v4.json", "ipv6": server.URL + "/bootstrap-v6.json"},
)
investigation, err := svc.Investigate(context.Background(), "198.51.100.30", []string{"curl/8.0"})
if err != nil {
t.Fatalf("investigate ip: %v", err)
}
if investigation.Bot != nil {
t.Fatalf("expected no bot match, got %+v", investigation.Bot)
}
if investigation.Registration == nil || investigation.Registration.Organization != "Example ISP" || investigation.Registration.Prefix != "198.51.100.0/24" {
t.Fatalf("unexpected registration info: %+v", investigation.Registration)
}
if investigation.Reputation == nil || !investigation.Reputation.SpamhausListed {
t.Fatalf("expected spamhaus listing, got %+v", investigation.Reputation)
}
}
func TestInvestigateAddsBotHintFromUserAgent(t *testing.T) {
t.Parallel()
serverURL := ""
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch r.URL.Path {
case "/bootstrap-v4.json":
_, _ = w.Write([]byte(`{"services":[[["216.73.216.0/22"],["` + serverURL + `/rdap/"]]]}`))
case "/rdap/ip/216.73.216.112":
_, _ = w.Write([]byte(`{"handle":"NET-216-73-216-0-1","name":"AWS-ANTHROPIC","entities":[{"roles":["abuse"],"vcardArray":["vcard",[["fn",{},"text","Anthropic, PBC"]]]}]}`))
default:
http.NotFound(w, r)
}
}))
serverURL = server.URL
defer server.Close()
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
server.Client(),
&fakeResolver{},
log.New(testWriter{t}, "", 0),
nil,
map[string]string{"ipv4": server.URL + "/bootstrap-v4.json", "ipv6": server.URL + "/bootstrap-v6.json"},
)
investigation, err := svc.Investigate(context.Background(), "216.73.216.112", []string{"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)"})
if err != nil {
t.Fatalf("investigate ip: %v", err)
}
if investigation.Bot == nil || investigation.Bot.Name != "ClaudeBot" || investigation.Bot.Verified {
t.Fatalf("expected unverified ClaudeBot hint, got %+v", investigation.Bot)
}
if investigation.Registration == nil || investigation.Registration.Organization != "Anthropic, PBC" {
t.Fatalf("expected registration enrichment to continue after bot hint, got %+v", investigation.Registration)
}
}
func TestParsePublishedNetworksSupportsCommentedGeofeed(t *testing.T) {
t.Parallel()
prefixes, err := parsePublishedNetworks([]byte(strings.Join([]string{
"# Publication date: Thu Mar 12 2026",
"31.13.78.0/24,NZ,NZ-AUK,Auckland,",
"2a03:2880:f061::/48,NZ,NZ-AUK,Auckland,",
"31.13.72.0/24,SE,,Bromma,",
"",
}, "\n")), "geofeed_csv", "https://example.test/geofeed")
if err != nil {
t.Fatalf("parse geofeed: %v", err)
}
if len(prefixes) != 3 {
t.Fatalf("expected 3 prefixes, got %d", len(prefixes))
}
got := []string{prefixes[0].String(), prefixes[1].String(), prefixes[2].String()}
want := []string{"31.13.78.0/24", "2a03:2880:f061::/48", "31.13.72.0/24"}
if strings.Join(got, ",") != strings.Join(want, ",") {
t.Fatalf("unexpected geofeed prefixes: got %v want %v", got, want)
}
}
func TestInvestigateRecognizesOpenAIBotViaEmbeddedUserAgentToken(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/gptbot.json" {
http.NotFound(w, r)
return
}
_, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"203.0.113.0/24"}]}`))
}))
defer server.Close()
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
server.Client(),
&fakeResolver{},
log.New(testWriter{t}, "", 0),
[]botProvider{{
ID: "openai_gptbot_official",
Name: "GPTBot",
Icon: "🤖",
SourceFormat: "json_prefixes",
CacheTTL: time.Hour,
IPRangeURLs: []string{server.URL + "/gptbot.json"},
UserAgentPrefixes: []string{
"gptbot",
},
}},
map[string]string{},
)
investigation, err := svc.Investigate(context.Background(), "203.0.113.10", []string{"Mozilla/5.0 (compatible; GPTBot/1.0; +https://openai.com/gptbot)"})
if err != nil {
t.Fatalf("investigate ip: %v", err)
}
if investigation.Bot == nil || investigation.Bot.Name != "GPTBot" || !investigation.Bot.Verified {
t.Fatalf("expected verified GPTBot match, got %+v", investigation.Bot)
}
if investigation.Bot.Method != "user_agent+published_ranges" {
t.Fatalf("expected combined method, got %+v", investigation.Bot)
}
}
func TestInvestigateRecognizesPerplexityBotViaEmbeddedUserAgentToken(t *testing.T) {
t.Parallel()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/perplexitybot.json" {
http.NotFound(w, r)
return
}
_, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"198.51.100.0/24"}]}`))
}))
defer server.Close()
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
server.Client(),
&fakeResolver{},
log.New(testWriter{t}, "", 0),
[]botProvider{{
ID: "perplexitybot_official",
Name: "PerplexityBot",
Icon: "🤖",
SourceFormat: "json_prefixes",
CacheTTL: time.Hour,
IPRangeURLs: []string{server.URL + "/perplexitybot.json"},
UserAgentPrefixes: []string{
"perplexitybot",
},
}},
map[string]string{},
)
investigation, err := svc.Investigate(context.Background(), "198.51.100.42", []string{"Mozilla/5.0 (compatible; PerplexityBot/1.0; +https://www.perplexity.ai/perplexitybot)"})
if err != nil {
t.Fatalf("investigate ip: %v", err)
}
if investigation.Bot == nil || investigation.Bot.Name != "PerplexityBot" || !investigation.Bot.Verified {
t.Fatalf("expected verified PerplexityBot match, got %+v", investigation.Bot)
}
}
func TestInvestigateRecognizesYandexViaReverseDNS(t *testing.T) {
t.Parallel()
resolver := &fakeResolver{
reverse: map[string][]string{"203.0.113.55": {"spider-55.search.yandex.ru."}},
forward: map[string][]net.IPAddr{"spider-55.search.yandex.ru": {{IP: net.ParseIP("203.0.113.55")}}},
}
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
http.DefaultClient,
resolver,
log.New(testWriter{t}, "", 0),
[]botProvider{{
ID: "yandex_official",
Name: "YandexBot",
Icon: "🤖",
CacheTTL: time.Hour,
ReverseDNSSuffixes: []string{".yandex.ru", ".yandex.net", ".yandex.com"},
}},
map[string]string{},
)
investigation, err := svc.Investigate(context.Background(), "203.0.113.55", []string{"Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)"})
if err != nil {
t.Fatalf("investigate ip: %v", err)
}
if investigation.Bot == nil || investigation.Bot.Name != "YandexBot" || !investigation.Bot.Verified {
t.Fatalf("expected verified YandexBot match, got %+v", investigation.Bot)
}
if investigation.Bot.Method != "reverse_dns+fcrdns" {
t.Fatalf("expected reverse DNS verification, got %+v", investigation.Bot)
}
}
func TestPublishedNetworksAreCached(t *testing.T) {
t.Parallel()
var mu sync.Mutex
requestCount := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
mu.Lock()
requestCount++
mu.Unlock()
_, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"203.0.113.0/24"}]}`))
}))
defer server.Close()
svc := newService(
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
server.Client(),
&fakeResolver{},
log.New(testWriter{t}, "", 0),
[]botProvider{{
ID: "provider",
Name: "Provider bot",
Icon: "🤖",
SourceFormat: "json_prefixes",
CacheTTL: time.Hour,
IPRangeURLs: []string{server.URL},
}},
map[string]string{},
)
for index := 0; index < 2; index++ {
if _, err := svc.Investigate(context.Background(), "203.0.113.10", nil); err != nil {
t.Fatalf("investigate ip #%d: %v", index, err)
}
}
mu.Lock()
defer mu.Unlock()
if requestCount != 1 {
t.Fatalf("expected exactly one published-range request, got %d", requestCount)
}
}
type fakeResolver struct {
reverse map[string][]string
forward map[string][]net.IPAddr
hosts map[string][]string
}
func (r *fakeResolver) LookupAddr(_ context.Context, addr string) ([]string, error) {
if values, ok := r.reverse[addr]; ok {
return values, nil
}
return nil, &net.DNSError{IsNotFound: true}
}
func (r *fakeResolver) LookupIPAddr(_ context.Context, host string) ([]net.IPAddr, error) {
if values, ok := r.forward[host]; ok {
return values, nil
}
return nil, &net.DNSError{IsNotFound: true}
}
func (r *fakeResolver) LookupHost(_ context.Context, host string) ([]string, error) {
if values, ok := r.hosts[host]; ok {
return values, nil
}
return nil, &net.DNSError{IsNotFound: true}
}
type testWriter struct{ t *testing.T }
func (w testWriter) Write(payload []byte) (int, error) {
w.t.Helper()
w.t.Log(strings.TrimSpace(string(payload)))
return len(payload), nil
}
func spamhausQuery(t *testing.T, ip string) string {
t.Helper()
addr, err := netip.ParseAddr(ip)
if err != nil {
t.Fatalf("parse ip: %v", err)
}
query, err := spamhausLookupName(addr)
if err != nil {
t.Fatalf("build spamhaus query: %v", err)
}
return query
}