You've already forked caddy-opnsense-blocker
409 lines
13 KiB
Go
409 lines
13 KiB
Go
package investigation
|
|
|
|
import (
|
|
"context"
|
|
"log"
|
|
"net"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"net/netip"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/config"
|
|
)
|
|
|
|
func TestInvestigateRecognizesBotViaPublishedRanges(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/ranges.json" {
|
|
http.NotFound(w, r)
|
|
return
|
|
}
|
|
_, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"203.0.113.0/24"}]}`))
|
|
}))
|
|
defer server.Close()
|
|
|
|
svc := newService(
|
|
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
|
|
server.Client(),
|
|
&fakeResolver{},
|
|
log.New(testWriter{t}, "", 0),
|
|
[]botProvider{{
|
|
ID: "google_official",
|
|
Name: "Googlebot",
|
|
Icon: "🤖",
|
|
SourceFormat: "json_prefixes",
|
|
CacheTTL: time.Hour,
|
|
IPRangeURLs: []string{server.URL + "/ranges.json"},
|
|
}},
|
|
map[string]string{"ipv4": server.URL + "/bootstrap-v4.json", "ipv6": server.URL + "/bootstrap-v6.json"},
|
|
)
|
|
|
|
investigation, err := svc.Investigate(context.Background(), "203.0.113.10", []string{"Mozilla/5.0"})
|
|
if err != nil {
|
|
t.Fatalf("investigate ip: %v", err)
|
|
}
|
|
if investigation.Bot == nil || investigation.Bot.Name != "Googlebot" {
|
|
t.Fatalf("expected Googlebot match, got %+v", investigation)
|
|
}
|
|
if investigation.Registration != nil || investigation.Reputation != nil {
|
|
t.Fatalf("expected bot investigation to stop before deep lookups, got %+v", investigation)
|
|
}
|
|
}
|
|
|
|
func TestInvestigateRecognizesBotViaReverseDNS(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
resolver := &fakeResolver{
|
|
reverse: map[string][]string{"198.51.100.20": {"crawl.search.example.test."}},
|
|
forward: map[string][]net.IPAddr{"crawl.search.example.test": {{IP: net.ParseIP("198.51.100.20")}}},
|
|
}
|
|
|
|
svc := newService(
|
|
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
|
|
http.DefaultClient,
|
|
resolver,
|
|
log.New(testWriter{t}, "", 0),
|
|
[]botProvider{{
|
|
ID: "bing_official",
|
|
Name: "Bingbot",
|
|
Icon: "🤖",
|
|
CacheTTL: time.Hour,
|
|
ReverseDNSSuffixes: []string{".search.example.test"},
|
|
}},
|
|
map[string]string{},
|
|
)
|
|
|
|
investigation, err := svc.Investigate(context.Background(), "198.51.100.20", nil)
|
|
if err != nil {
|
|
t.Fatalf("investigate ip: %v", err)
|
|
}
|
|
if investigation.Bot == nil || investigation.Bot.Name != "Bingbot" || investigation.ReverseDNS == nil || !investigation.ReverseDNS.ForwardConfirmed {
|
|
t.Fatalf("expected reverse DNS bot match, got %+v", investigation)
|
|
}
|
|
}
|
|
|
|
func TestInvestigateLoadsRegistrationAndSpamhausForNonBot(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
serverURL := ""
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
switch r.URL.Path {
|
|
case "/bootstrap-v4.json":
|
|
_, _ = w.Write([]byte(`{"services":[[["198.51.100.0/24"],["` + serverURL + `/rdap/"]]]}`))
|
|
case "/rdap/ip/198.51.100.30":
|
|
_, _ = w.Write([]byte(`{
|
|
"handle":"NET-198-51-100-0-1",
|
|
"name":"Example Network",
|
|
"country":"FR",
|
|
"cidr0_cidrs":[{"v4prefix":"198.51.100.0","length":24}],
|
|
"entities":[{"roles":["abuse"],"vcardArray":["vcard",[["email",{},"text","abuse@example.test"],["fn",{},"text","Example ISP"]]]}]
|
|
}`))
|
|
default:
|
|
http.NotFound(w, r)
|
|
}
|
|
}))
|
|
serverURL = server.URL
|
|
defer server.Close()
|
|
|
|
resolver := &fakeResolver{
|
|
hosts: map[string][]string{spamhausQuery(t, "198.51.100.30"): {"127.0.0.2"}},
|
|
}
|
|
|
|
svc := newService(
|
|
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: 2 * time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
|
|
server.Client(),
|
|
resolver,
|
|
log.New(testWriter{t}, "", 0),
|
|
nil,
|
|
map[string]string{"ipv4": server.URL + "/bootstrap-v4.json", "ipv6": server.URL + "/bootstrap-v6.json"},
|
|
)
|
|
|
|
investigation, err := svc.Investigate(context.Background(), "198.51.100.30", []string{"curl/8.0"})
|
|
if err != nil {
|
|
t.Fatalf("investigate ip: %v", err)
|
|
}
|
|
if investigation.Bot != nil {
|
|
t.Fatalf("expected no bot match, got %+v", investigation.Bot)
|
|
}
|
|
if investigation.Registration == nil || investigation.Registration.Organization != "Example ISP" || investigation.Registration.Prefix != "198.51.100.0/24" {
|
|
t.Fatalf("unexpected registration info: %+v", investigation.Registration)
|
|
}
|
|
if investigation.Reputation == nil || !investigation.Reputation.SpamhausListed {
|
|
t.Fatalf("expected spamhaus listing, got %+v", investigation.Reputation)
|
|
}
|
|
}
|
|
|
|
func TestInvestigateAddsBotHintFromUserAgent(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
serverURL := ""
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
switch r.URL.Path {
|
|
case "/bootstrap-v4.json":
|
|
_, _ = w.Write([]byte(`{"services":[[["216.73.216.0/22"],["` + serverURL + `/rdap/"]]]}`))
|
|
case "/rdap/ip/216.73.216.112":
|
|
_, _ = w.Write([]byte(`{"handle":"NET-216-73-216-0-1","name":"AWS-ANTHROPIC","entities":[{"roles":["abuse"],"vcardArray":["vcard",[["fn",{},"text","Anthropic, PBC"]]]}]}`))
|
|
default:
|
|
http.NotFound(w, r)
|
|
}
|
|
}))
|
|
serverURL = server.URL
|
|
defer server.Close()
|
|
|
|
svc := newService(
|
|
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
|
|
server.Client(),
|
|
&fakeResolver{},
|
|
log.New(testWriter{t}, "", 0),
|
|
nil,
|
|
map[string]string{"ipv4": server.URL + "/bootstrap-v4.json", "ipv6": server.URL + "/bootstrap-v6.json"},
|
|
)
|
|
|
|
investigation, err := svc.Investigate(context.Background(), "216.73.216.112", []string{"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)"})
|
|
if err != nil {
|
|
t.Fatalf("investigate ip: %v", err)
|
|
}
|
|
if investigation.Bot == nil || investigation.Bot.Name != "ClaudeBot" || investigation.Bot.Verified {
|
|
t.Fatalf("expected unverified ClaudeBot hint, got %+v", investigation.Bot)
|
|
}
|
|
if investigation.Registration == nil || investigation.Registration.Organization != "Anthropic, PBC" {
|
|
t.Fatalf("expected registration enrichment to continue after bot hint, got %+v", investigation.Registration)
|
|
}
|
|
}
|
|
|
|
func TestParsePublishedNetworksSupportsCommentedGeofeed(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
prefixes, err := parsePublishedNetworks([]byte(strings.Join([]string{
|
|
"# Publication date: Thu Mar 12 2026",
|
|
"31.13.78.0/24,NZ,NZ-AUK,Auckland,",
|
|
"2a03:2880:f061::/48,NZ,NZ-AUK,Auckland,",
|
|
"31.13.72.0/24,SE,,Bromma,",
|
|
"",
|
|
}, "\n")), "geofeed_csv", "https://example.test/geofeed")
|
|
if err != nil {
|
|
t.Fatalf("parse geofeed: %v", err)
|
|
}
|
|
if len(prefixes) != 3 {
|
|
t.Fatalf("expected 3 prefixes, got %d", len(prefixes))
|
|
}
|
|
got := []string{prefixes[0].String(), prefixes[1].String(), prefixes[2].String()}
|
|
want := []string{"31.13.78.0/24", "2a03:2880:f061::/48", "31.13.72.0/24"}
|
|
if strings.Join(got, ",") != strings.Join(want, ",") {
|
|
t.Fatalf("unexpected geofeed prefixes: got %v want %v", got, want)
|
|
}
|
|
}
|
|
|
|
func TestInvestigateRecognizesOpenAIBotViaEmbeddedUserAgentToken(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/gptbot.json" {
|
|
http.NotFound(w, r)
|
|
return
|
|
}
|
|
_, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"203.0.113.0/24"}]}`))
|
|
}))
|
|
defer server.Close()
|
|
|
|
svc := newService(
|
|
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
|
|
server.Client(),
|
|
&fakeResolver{},
|
|
log.New(testWriter{t}, "", 0),
|
|
[]botProvider{{
|
|
ID: "openai_gptbot_official",
|
|
Name: "GPTBot",
|
|
Icon: "🤖",
|
|
SourceFormat: "json_prefixes",
|
|
CacheTTL: time.Hour,
|
|
IPRangeURLs: []string{server.URL + "/gptbot.json"},
|
|
UserAgentPrefixes: []string{
|
|
"gptbot",
|
|
},
|
|
}},
|
|
map[string]string{},
|
|
)
|
|
|
|
investigation, err := svc.Investigate(context.Background(), "203.0.113.10", []string{"Mozilla/5.0 (compatible; GPTBot/1.0; +https://openai.com/gptbot)"})
|
|
if err != nil {
|
|
t.Fatalf("investigate ip: %v", err)
|
|
}
|
|
if investigation.Bot == nil || investigation.Bot.Name != "GPTBot" || !investigation.Bot.Verified {
|
|
t.Fatalf("expected verified GPTBot match, got %+v", investigation.Bot)
|
|
}
|
|
if investigation.Bot.Method != "user_agent+published_ranges" {
|
|
t.Fatalf("expected combined method, got %+v", investigation.Bot)
|
|
}
|
|
}
|
|
|
|
func TestInvestigateRecognizesPerplexityBotViaEmbeddedUserAgentToken(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
if r.URL.Path != "/perplexitybot.json" {
|
|
http.NotFound(w, r)
|
|
return
|
|
}
|
|
_, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"198.51.100.0/24"}]}`))
|
|
}))
|
|
defer server.Close()
|
|
|
|
svc := newService(
|
|
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
|
|
server.Client(),
|
|
&fakeResolver{},
|
|
log.New(testWriter{t}, "", 0),
|
|
[]botProvider{{
|
|
ID: "perplexitybot_official",
|
|
Name: "PerplexityBot",
|
|
Icon: "🤖",
|
|
SourceFormat: "json_prefixes",
|
|
CacheTTL: time.Hour,
|
|
IPRangeURLs: []string{server.URL + "/perplexitybot.json"},
|
|
UserAgentPrefixes: []string{
|
|
"perplexitybot",
|
|
},
|
|
}},
|
|
map[string]string{},
|
|
)
|
|
|
|
investigation, err := svc.Investigate(context.Background(), "198.51.100.42", []string{"Mozilla/5.0 (compatible; PerplexityBot/1.0; +https://www.perplexity.ai/perplexitybot)"})
|
|
if err != nil {
|
|
t.Fatalf("investigate ip: %v", err)
|
|
}
|
|
if investigation.Bot == nil || investigation.Bot.Name != "PerplexityBot" || !investigation.Bot.Verified {
|
|
t.Fatalf("expected verified PerplexityBot match, got %+v", investigation.Bot)
|
|
}
|
|
}
|
|
|
|
func TestInvestigateRecognizesYandexViaReverseDNS(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
resolver := &fakeResolver{
|
|
reverse: map[string][]string{"203.0.113.55": {"spider-55.search.yandex.ru."}},
|
|
forward: map[string][]net.IPAddr{"spider-55.search.yandex.ru": {{IP: net.ParseIP("203.0.113.55")}}},
|
|
}
|
|
|
|
svc := newService(
|
|
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
|
|
http.DefaultClient,
|
|
resolver,
|
|
log.New(testWriter{t}, "", 0),
|
|
[]botProvider{{
|
|
ID: "yandex_official",
|
|
Name: "YandexBot",
|
|
Icon: "🤖",
|
|
CacheTTL: time.Hour,
|
|
ReverseDNSSuffixes: []string{".yandex.ru", ".yandex.net", ".yandex.com"},
|
|
}},
|
|
map[string]string{},
|
|
)
|
|
|
|
investigation, err := svc.Investigate(context.Background(), "203.0.113.55", []string{"Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)"})
|
|
if err != nil {
|
|
t.Fatalf("investigate ip: %v", err)
|
|
}
|
|
if investigation.Bot == nil || investigation.Bot.Name != "YandexBot" || !investigation.Bot.Verified {
|
|
t.Fatalf("expected verified YandexBot match, got %+v", investigation.Bot)
|
|
}
|
|
if investigation.Bot.Method != "reverse_dns+fcrdns" {
|
|
t.Fatalf("expected reverse DNS verification, got %+v", investigation.Bot)
|
|
}
|
|
}
|
|
|
|
func TestPublishedNetworksAreCached(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
var mu sync.Mutex
|
|
requestCount := 0
|
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
mu.Lock()
|
|
requestCount++
|
|
mu.Unlock()
|
|
_, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"203.0.113.0/24"}]}`))
|
|
}))
|
|
defer server.Close()
|
|
|
|
svc := newService(
|
|
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
|
|
server.Client(),
|
|
&fakeResolver{},
|
|
log.New(testWriter{t}, "", 0),
|
|
[]botProvider{{
|
|
ID: "provider",
|
|
Name: "Provider bot",
|
|
Icon: "🤖",
|
|
SourceFormat: "json_prefixes",
|
|
CacheTTL: time.Hour,
|
|
IPRangeURLs: []string{server.URL},
|
|
}},
|
|
map[string]string{},
|
|
)
|
|
|
|
for index := 0; index < 2; index++ {
|
|
if _, err := svc.Investigate(context.Background(), "203.0.113.10", nil); err != nil {
|
|
t.Fatalf("investigate ip #%d: %v", index, err)
|
|
}
|
|
}
|
|
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
if requestCount != 1 {
|
|
t.Fatalf("expected exactly one published-range request, got %d", requestCount)
|
|
}
|
|
}
|
|
|
|
type fakeResolver struct {
|
|
reverse map[string][]string
|
|
forward map[string][]net.IPAddr
|
|
hosts map[string][]string
|
|
}
|
|
|
|
func (r *fakeResolver) LookupAddr(_ context.Context, addr string) ([]string, error) {
|
|
if values, ok := r.reverse[addr]; ok {
|
|
return values, nil
|
|
}
|
|
return nil, &net.DNSError{IsNotFound: true}
|
|
}
|
|
|
|
func (r *fakeResolver) LookupIPAddr(_ context.Context, host string) ([]net.IPAddr, error) {
|
|
if values, ok := r.forward[host]; ok {
|
|
return values, nil
|
|
}
|
|
return nil, &net.DNSError{IsNotFound: true}
|
|
}
|
|
|
|
func (r *fakeResolver) LookupHost(_ context.Context, host string) ([]string, error) {
|
|
if values, ok := r.hosts[host]; ok {
|
|
return values, nil
|
|
}
|
|
return nil, &net.DNSError{IsNotFound: true}
|
|
}
|
|
|
|
type testWriter struct{ t *testing.T }
|
|
|
|
func (w testWriter) Write(payload []byte) (int, error) {
|
|
w.t.Helper()
|
|
w.t.Log(strings.TrimSpace(string(payload)))
|
|
return len(payload), nil
|
|
}
|
|
|
|
func spamhausQuery(t *testing.T, ip string) string {
|
|
t.Helper()
|
|
addr, err := netip.ParseAddr(ip)
|
|
if err != nil {
|
|
t.Fatalf("parse ip: %v", err)
|
|
}
|
|
query, err := spamhausLookupName(addr)
|
|
if err != nil {
|
|
t.Fatalf("build spamhaus query: %v", err)
|
|
}
|
|
return query
|
|
}
|