You've already forked caddy-opnsense-blocker
Add lazy IP enrichment and bot hints
This commit is contained in:
@@ -139,6 +139,9 @@ func (s *Service) Investigate(ctx context.Context, ip string, userAgents []strin
|
|||||||
investigation.ReverseDNS = reverseDNSInfo
|
investigation.ReverseDNS = reverseDNSInfo
|
||||||
return investigation, nil
|
return investigation, nil
|
||||||
}
|
}
|
||||||
|
if hint := detectBotHint(userAgents); hint != nil {
|
||||||
|
investigation.Bot = hint
|
||||||
|
}
|
||||||
|
|
||||||
warnings := make([]string, 0, 2)
|
warnings := make([]string, 0, 2)
|
||||||
if reverseDNSInfo == nil {
|
if reverseDNSInfo == nil {
|
||||||
@@ -599,6 +602,45 @@ func normalizeUserAgents(userAgents []string) []string {
|
|||||||
return uniqueStrings(items)
|
return uniqueStrings(items)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func detectBotHint(userAgents []string) *model.BotMatch {
|
||||||
|
for _, userAgent := range userAgents {
|
||||||
|
name := extractBotHintName(userAgent)
|
||||||
|
if name == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return &model.BotMatch{
|
||||||
|
ProviderID: strings.ToLower(name),
|
||||||
|
Name: name,
|
||||||
|
Icon: "🤖",
|
||||||
|
Method: "user_agent_hint",
|
||||||
|
Verified: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractBotHintName(userAgent string) string {
|
||||||
|
parts := strings.FieldsFunc(userAgent, func(value rune) bool {
|
||||||
|
switch value {
|
||||||
|
case ' ', ';', '(', ')', ',', '\t':
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
})
|
||||||
|
for _, part := range parts {
|
||||||
|
base := strings.TrimSpace(strings.SplitN(part, "/", 2)[0])
|
||||||
|
if base == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
normalized := strings.ToLower(base)
|
||||||
|
if strings.Contains(normalized, "bot") || strings.Contains(normalized, "crawler") || strings.Contains(normalized, "spider") || strings.Contains(normalized, "slurp") || strings.Contains(normalized, "fetcher") || strings.Contains(normalized, "indexer") || strings.Contains(normalized, "preview") || strings.Contains(normalized, "externalhit") {
|
||||||
|
return base
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
func extractPrefix(payload map[string]any) string {
|
func extractPrefix(payload map[string]any) string {
|
||||||
items, ok := payload["cidr0_cidrs"].([]any)
|
items, ok := payload["cidr0_cidrs"].([]any)
|
||||||
if !ok {
|
if !ok {
|
||||||
|
|||||||
@@ -138,6 +138,44 @@ func TestInvestigateLoadsRegistrationAndSpamhausForNonBot(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestInvestigateAddsBotHintFromUserAgent(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
serverURL := ""
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch r.URL.Path {
|
||||||
|
case "/bootstrap-v4.json":
|
||||||
|
_, _ = w.Write([]byte(`{"services":[[["216.73.216.0/22"],["` + serverURL + `/rdap/"]]]}`))
|
||||||
|
case "/rdap/ip/216.73.216.112":
|
||||||
|
_, _ = w.Write([]byte(`{"handle":"NET-216-73-216-0-1","name":"AWS-ANTHROPIC","entities":[{"roles":["abuse"],"vcardArray":["vcard",[["fn",{},"text","Anthropic, PBC"]]]}]}`))
|
||||||
|
default:
|
||||||
|
http.NotFound(w, r)
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
serverURL = server.URL
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
svc := newService(
|
||||||
|
config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true},
|
||||||
|
server.Client(),
|
||||||
|
&fakeResolver{},
|
||||||
|
log.New(testWriter{t}, "", 0),
|
||||||
|
nil,
|
||||||
|
map[string]string{"ipv4": server.URL + "/bootstrap-v4.json", "ipv6": server.URL + "/bootstrap-v6.json"},
|
||||||
|
)
|
||||||
|
|
||||||
|
investigation, err := svc.Investigate(context.Background(), "216.73.216.112", []string{"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("investigate ip: %v", err)
|
||||||
|
}
|
||||||
|
if investigation.Bot == nil || investigation.Bot.Name != "ClaudeBot" || investigation.Bot.Verified {
|
||||||
|
t.Fatalf("expected unverified ClaudeBot hint, got %+v", investigation.Bot)
|
||||||
|
}
|
||||||
|
if investigation.Registration == nil || investigation.Registration.Organization != "Anthropic, PBC" {
|
||||||
|
t.Fatalf("expected registration enrichment to continue after bot hint, got %+v", investigation.Registration)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestPublishedNetworksAreCached(t *testing.T) {
|
func TestPublishedNetworksAreCached(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|||||||
@@ -579,7 +579,7 @@ const ipDetailsHTML = `<!doctype html>
|
|||||||
}
|
}
|
||||||
const rows = [];
|
const rows = [];
|
||||||
if (investigation.bot) {
|
if (investigation.bot) {
|
||||||
rows.push('<div><strong>Bot</strong>: <span class="badge">' + escapeHtml(investigation.bot.icon || '🤖') + ' ' + escapeHtml(investigation.bot.name) + '</span> via ' + escapeHtml(investigation.bot.method) + '</div>');
|
rows.push('<div><strong>' + (investigation.bot.verified ? 'Bot' : 'Possible bot') + '</strong>: <span class="badge">' + escapeHtml(investigation.bot.icon || '🤖') + ' ' + escapeHtml(investigation.bot.name) + '</span> via ' + escapeHtml(investigation.bot.method) + (investigation.bot.verified ? '' : ' (not verified)') + '</div>');
|
||||||
} else {
|
} else {
|
||||||
rows.push('<div><strong>Bot</strong>: no verified bot match</div>');
|
rows.push('<div><strong>Bot</strong>: no verified bot match</div>');
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user