You've already forked caddy-opnsense-blocker
Harden verified bot detection
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
package investigation
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/csv"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
@@ -293,17 +294,14 @@ func parsePublishedNetworks(payload []byte, sourceFormat string, sourceURL strin
|
||||
}
|
||||
return networks, nil
|
||||
case "geofeed_csv":
|
||||
reader := csv.NewReader(strings.NewReader(string(payload)))
|
||||
rows, err := reader.ReadAll()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decode geofeed payload from %s: %w", sourceURL, err)
|
||||
}
|
||||
networks := make([]netip.Prefix, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
if len(row) == 0 {
|
||||
scanner := bufio.NewScanner(bytes.NewReader(payload))
|
||||
networks := make([]netip.Prefix, 0, 64)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
candidate := strings.TrimSpace(row[0])
|
||||
candidate := strings.TrimSpace(strings.SplitN(line, ",", 2)[0])
|
||||
if candidate == "" || strings.HasPrefix(candidate, "#") {
|
||||
continue
|
||||
}
|
||||
@@ -313,6 +311,9 @@ func parsePublishedNetworks(payload []byte, sourceFormat string, sourceURL strin
|
||||
}
|
||||
networks = append(networks, prefix.Masked())
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("scan geofeed payload from %s: %w", sourceURL, err)
|
||||
}
|
||||
return networks, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported source format %q for %s", sourceFormat, sourceURL)
|
||||
@@ -533,6 +534,7 @@ func defaultBotProviders() []botProvider {
|
||||
Icon: "🤖",
|
||||
SourceFormat: "json_prefixes",
|
||||
CacheTTL: 24 * time.Hour,
|
||||
IPRangeURLs: []string{"https://www.bing.com/toolbox/bingbot.json"},
|
||||
ReverseDNSSuffixes: []string{".search.msn.com"},
|
||||
},
|
||||
{
|
||||
@@ -552,11 +554,11 @@ func defaultBotProviders() []botProvider {
|
||||
CacheTTL: 24 * time.Hour,
|
||||
IPRangeURLs: []string{"https://www.facebook.com/peering/geofeed"},
|
||||
UserAgentPrefixes: []string{
|
||||
"facebookexternalhit/",
|
||||
"meta-webindexer/",
|
||||
"meta-externalads/",
|
||||
"meta-externalagent/",
|
||||
"meta-externalfetcher/",
|
||||
"facebookexternalhit",
|
||||
"meta-webindexer",
|
||||
"meta-externalads",
|
||||
"meta-externalagent",
|
||||
"meta-externalfetcher",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -567,6 +569,68 @@ func defaultBotProviders() []botProvider {
|
||||
CacheTTL: 24 * time.Hour,
|
||||
IPRangeURLs: []string{"https://duckduckgo.com/duckduckbot.json"},
|
||||
},
|
||||
{
|
||||
ID: "openai_gptbot_official",
|
||||
Name: "GPTBot",
|
||||
Icon: "🤖",
|
||||
SourceFormat: "json_prefixes",
|
||||
CacheTTL: 24 * time.Hour,
|
||||
IPRangeURLs: []string{"https://openai.com/gptbot.json"},
|
||||
UserAgentPrefixes: []string{
|
||||
"gptbot",
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "openai_chatgpt_user_official",
|
||||
Name: "ChatGPT-User",
|
||||
Icon: "🤖",
|
||||
SourceFormat: "json_prefixes",
|
||||
CacheTTL: 24 * time.Hour,
|
||||
IPRangeURLs: []string{"https://openai.com/chatgpt-user.json"},
|
||||
UserAgentPrefixes: []string{
|
||||
"chatgpt-user",
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "openai_oai_searchbot_official",
|
||||
Name: "OAI-SearchBot",
|
||||
Icon: "🤖",
|
||||
SourceFormat: "json_prefixes",
|
||||
CacheTTL: 24 * time.Hour,
|
||||
IPRangeURLs: []string{"https://openai.com/searchbot.json"},
|
||||
UserAgentPrefixes: []string{
|
||||
"oai-searchbot",
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "perplexitybot_official",
|
||||
Name: "PerplexityBot",
|
||||
Icon: "🤖",
|
||||
SourceFormat: "json_prefixes",
|
||||
CacheTTL: 24 * time.Hour,
|
||||
IPRangeURLs: []string{"https://www.perplexity.com/perplexitybot.json"},
|
||||
UserAgentPrefixes: []string{
|
||||
"perplexitybot",
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "perplexity_user_official",
|
||||
Name: "Perplexity-User",
|
||||
Icon: "🤖",
|
||||
SourceFormat: "json_prefixes",
|
||||
CacheTTL: 24 * time.Hour,
|
||||
IPRangeURLs: []string{"https://www.perplexity.com/perplexity-user.json"},
|
||||
UserAgentPrefixes: []string{
|
||||
"perplexity-user",
|
||||
},
|
||||
},
|
||||
{
|
||||
ID: "yandex_official",
|
||||
Name: "YandexBot",
|
||||
Icon: "🤖",
|
||||
CacheTTL: 24 * time.Hour,
|
||||
ReverseDNSSuffixes: []string{".yandex.ru", ".yandex.net", ".yandex.com"},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -582,14 +646,49 @@ func ipMatchesPrefixes(ip netip.Addr, prefixes []netip.Prefix) bool {
|
||||
func userAgentMatchesPrefixes(userAgents []string, prefixes []string) bool {
|
||||
for _, agent := range userAgents {
|
||||
for _, prefix := range prefixes {
|
||||
if strings.HasPrefix(agent, prefix) {
|
||||
candidate := strings.ToLower(strings.TrimSpace(strings.TrimSuffix(prefix, "/")))
|
||||
if candidate == "" {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(agent, candidate) {
|
||||
return true
|
||||
}
|
||||
for _, token := range splitUserAgentTokens(agent) {
|
||||
if token == candidate {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func splitUserAgentTokens(userAgent string) []string {
|
||||
parts := strings.FieldsFunc(userAgent, func(value rune) bool {
|
||||
switch value {
|
||||
case ' ', ';', '(', ')', ',', '\t':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
})
|
||||
items := make([]string, 0, len(parts))
|
||||
seen := make(map[string]struct{}, len(parts))
|
||||
for _, part := range parts {
|
||||
base := strings.TrimSpace(strings.SplitN(part, "/", 2)[0])
|
||||
if base == "" {
|
||||
continue
|
||||
}
|
||||
normalized := strings.ToLower(base)
|
||||
if _, ok := seen[normalized]; ok {
|
||||
continue
|
||||
}
|
||||
seen[normalized] = struct{}{}
|
||||
items = append(items, normalized)
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
func normalizeUserAgents(userAgents []string) []string {
|
||||
items := make([]string, 0, len(userAgents))
|
||||
for _, userAgent := range userAgents {
|
||||
@@ -634,6 +733,9 @@ func extractBotHintName(userAgent string) string {
|
||||
continue
|
||||
}
|
||||
normalized := strings.ToLower(base)
|
||||
if strings.HasPrefix(normalized, "+") || strings.Contains(normalized, "@") {
|
||||
continue
|
||||
}
|
||||
if strings.Contains(normalized, "bot") || strings.Contains(normalized, "crawler") || strings.Contains(normalized, "spider") || strings.Contains(normalized, "slurp") || strings.Contains(normalized, "fetcher") || strings.Contains(normalized, "indexer") || strings.Contains(normalized, "preview") || strings.Contains(normalized, "externalhit") {
|
||||
return base
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user