You've already forked caddy-opnsense-blocker
865 lines
23 KiB
Go
865 lines
23 KiB
Go
package investigation
|
|
|
|
import (
|
|
"context"
|
|
"encoding/csv"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"net"
|
|
"net/http"
|
|
"net/netip"
|
|
"net/url"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/config"
|
|
"git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/model"
|
|
)
|
|
|
|
const (
|
|
defaultRDAPBootstrapIPv4 = "https://data.iana.org/rdap/ipv4.json"
|
|
defaultRDAPBootstrapIPv6 = "https://data.iana.org/rdap/ipv6.json"
|
|
spamhausLookupZone = "zen.spamhaus.org"
|
|
)
|
|
|
|
type dnsResolver interface {
|
|
LookupAddr(ctx context.Context, addr string) ([]string, error)
|
|
LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, error)
|
|
LookupHost(ctx context.Context, host string) ([]string, error)
|
|
}
|
|
|
|
type httpClient interface {
|
|
Do(req *http.Request) (*http.Response, error)
|
|
}
|
|
|
|
type Service struct {
|
|
cfg config.InvestigationConfig
|
|
logger *log.Logger
|
|
client httpClient
|
|
resolver dnsResolver
|
|
|
|
mu sync.Mutex
|
|
networkCache map[string]networkCacheEntry
|
|
bootstrapCache map[string]bootstrapCacheEntry
|
|
providers []botProvider
|
|
bootstrapURLs map[string]string
|
|
}
|
|
|
|
type networkCacheEntry struct {
|
|
updatedAt time.Time
|
|
networks []netip.Prefix
|
|
}
|
|
|
|
type bootstrapCacheEntry struct {
|
|
updatedAt time.Time
|
|
services []rdapService
|
|
}
|
|
|
|
type rdapService struct {
|
|
prefixes []netip.Prefix
|
|
urls []string
|
|
}
|
|
|
|
type botProvider struct {
|
|
ID string
|
|
Name string
|
|
Icon string
|
|
SourceFormat string
|
|
CacheTTL time.Duration
|
|
IPRangeURLs []string
|
|
ReverseDNSSuffixes []string
|
|
UserAgentPrefixes []string
|
|
}
|
|
|
|
func New(cfg config.InvestigationConfig, logger *log.Logger) *Service {
|
|
return newService(
|
|
cfg,
|
|
&http.Client{Timeout: cfg.Timeout.Duration},
|
|
net.DefaultResolver,
|
|
logger,
|
|
defaultBotProviders(),
|
|
map[string]string{
|
|
"ipv4": defaultRDAPBootstrapIPv4,
|
|
"ipv6": defaultRDAPBootstrapIPv6,
|
|
},
|
|
)
|
|
}
|
|
|
|
func newService(
|
|
cfg config.InvestigationConfig,
|
|
client httpClient,
|
|
resolver dnsResolver,
|
|
logger *log.Logger,
|
|
providers []botProvider,
|
|
bootstrapURLs map[string]string,
|
|
) *Service {
|
|
if logger == nil {
|
|
logger = log.New(io.Discard, "", 0)
|
|
}
|
|
return &Service{
|
|
cfg: cfg,
|
|
logger: logger,
|
|
client: client,
|
|
resolver: resolver,
|
|
networkCache: map[string]networkCacheEntry{},
|
|
bootstrapCache: map[string]bootstrapCacheEntry{},
|
|
providers: providers,
|
|
bootstrapURLs: bootstrapURLs,
|
|
}
|
|
}
|
|
|
|
func (s *Service) Investigate(ctx context.Context, ip string, userAgents []string) (model.IPInvestigation, error) {
|
|
parsed, err := netip.ParseAddr(strings.TrimSpace(ip))
|
|
if err != nil {
|
|
return model.IPInvestigation{}, fmt.Errorf("invalid ip address %q: %w", ip, err)
|
|
}
|
|
|
|
investigation := model.IPInvestigation{
|
|
IP: parsed.String(),
|
|
UpdatedAt: time.Now().UTC(),
|
|
}
|
|
if !s.cfg.Enabled {
|
|
return investigation, nil
|
|
}
|
|
|
|
lookupCtx, cancel := context.WithTimeout(ctx, s.cfg.Timeout.Duration)
|
|
defer cancel()
|
|
|
|
normalizedUserAgents := normalizeUserAgents(userAgents)
|
|
botMatch, reverseDNSInfo := s.identifyBot(lookupCtx, parsed, normalizedUserAgents)
|
|
if botMatch != nil {
|
|
investigation.Bot = botMatch
|
|
investigation.ReverseDNS = reverseDNSInfo
|
|
return investigation, nil
|
|
}
|
|
if hint := detectBotHint(userAgents); hint != nil {
|
|
investigation.Bot = hint
|
|
}
|
|
|
|
warnings := make([]string, 0, 2)
|
|
if reverseDNSInfo == nil {
|
|
reverseDNSInfo, err = s.lookupReverseDNS(lookupCtx, parsed)
|
|
if err != nil {
|
|
warnings = append(warnings, err.Error())
|
|
}
|
|
}
|
|
if reverseDNSInfo != nil {
|
|
investigation.ReverseDNS = reverseDNSInfo
|
|
}
|
|
|
|
registration, err := s.lookupRegistration(lookupCtx, parsed)
|
|
if err != nil {
|
|
warnings = append(warnings, err.Error())
|
|
} else if registration != nil {
|
|
investigation.Registration = registration
|
|
}
|
|
|
|
if s.cfg.SpamhausEnabled {
|
|
reputation, err := s.lookupSpamhaus(lookupCtx, parsed)
|
|
if err != nil {
|
|
warnings = append(warnings, err.Error())
|
|
} else if reputation != nil {
|
|
investigation.Reputation = reputation
|
|
}
|
|
}
|
|
|
|
if len(warnings) > 0 {
|
|
investigation.Error = strings.Join(uniqueStrings(warnings), "; ")
|
|
}
|
|
return investigation, nil
|
|
}
|
|
|
|
func (s *Service) identifyBot(ctx context.Context, ip netip.Addr, userAgents []string) (*model.BotMatch, *model.ReverseDNSInfo) {
|
|
var reverseDNSInfo *model.ReverseDNSInfo
|
|
for _, provider := range s.providers {
|
|
if len(provider.IPRangeURLs) > 0 {
|
|
networks, err := s.loadPublishedNetworks(ctx, provider)
|
|
if err != nil {
|
|
s.logger.Printf("bot provider %s: %v", provider.ID, err)
|
|
} else if ipMatchesPrefixes(ip, networks) {
|
|
if len(provider.UserAgentPrefixes) == 0 || userAgentMatchesPrefixes(userAgents, provider.UserAgentPrefixes) {
|
|
method := "published_ranges"
|
|
if len(provider.UserAgentPrefixes) > 0 {
|
|
method = "user_agent+published_ranges"
|
|
}
|
|
return &model.BotMatch{
|
|
ProviderID: provider.ID,
|
|
Name: provider.Name,
|
|
Icon: provider.Icon,
|
|
Method: method,
|
|
Verified: true,
|
|
}, reverseDNSInfo
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(provider.ReverseDNSSuffixes) == 0 {
|
|
continue
|
|
}
|
|
info, err := s.lookupReverseDNS(ctx, ip)
|
|
if err != nil {
|
|
s.logger.Printf("bot provider %s reverse DNS: %v", provider.ID, err)
|
|
continue
|
|
}
|
|
if info == nil {
|
|
continue
|
|
}
|
|
reverseDNSInfo = info
|
|
ptr := strings.ToLower(strings.TrimSuffix(info.PTR, "."))
|
|
if ptr == "" || !info.ForwardConfirmed {
|
|
continue
|
|
}
|
|
for _, suffix := range provider.ReverseDNSSuffixes {
|
|
if strings.HasSuffix(ptr, suffix) {
|
|
return &model.BotMatch{
|
|
ProviderID: provider.ID,
|
|
Name: provider.Name,
|
|
Icon: provider.Icon,
|
|
Method: "reverse_dns+fcrdns",
|
|
Verified: true,
|
|
}, reverseDNSInfo
|
|
}
|
|
}
|
|
}
|
|
return nil, reverseDNSInfo
|
|
}
|
|
|
|
func (s *Service) loadPublishedNetworks(ctx context.Context, provider botProvider) ([]netip.Prefix, error) {
|
|
s.mu.Lock()
|
|
entry, found := s.networkCache[provider.ID]
|
|
s.mu.Unlock()
|
|
if found && time.Since(entry.updatedAt) < provider.CacheTTL {
|
|
return append([]netip.Prefix(nil), entry.networks...), nil
|
|
}
|
|
|
|
networks := make([]netip.Prefix, 0, 64)
|
|
errMessages := make([]string, 0, len(provider.IPRangeURLs))
|
|
for _, sourceURL := range provider.IPRangeURLs {
|
|
payload, err := s.fetchDocument(ctx, sourceURL)
|
|
if err != nil {
|
|
errMessages = append(errMessages, err.Error())
|
|
continue
|
|
}
|
|
parsed, err := parsePublishedNetworks(payload, provider.SourceFormat, sourceURL)
|
|
if err != nil {
|
|
errMessages = append(errMessages, err.Error())
|
|
continue
|
|
}
|
|
networks = append(networks, parsed...)
|
|
}
|
|
if len(networks) == 0 && len(errMessages) > 0 {
|
|
return nil, fmt.Errorf("load published ranges for %s: %s", provider.ID, strings.Join(uniqueStrings(errMessages), "; "))
|
|
}
|
|
networks = uniquePrefixes(networks)
|
|
s.mu.Lock()
|
|
s.networkCache[provider.ID] = networkCacheEntry{updatedAt: time.Now().UTC(), networks: append([]netip.Prefix(nil), networks...)}
|
|
s.mu.Unlock()
|
|
return networks, nil
|
|
}
|
|
|
|
func parsePublishedNetworks(payload []byte, sourceFormat string, sourceURL string) ([]netip.Prefix, error) {
|
|
switch sourceFormat {
|
|
case "json_prefixes":
|
|
var document struct {
|
|
Prefixes []struct {
|
|
IPv4Prefix string `json:"ipv4Prefix"`
|
|
IPv6Prefix string `json:"ipv6Prefix"`
|
|
} `json:"prefixes"`
|
|
}
|
|
if err := json.Unmarshal(payload, &document); err != nil {
|
|
return nil, fmt.Errorf("decode published prefix payload from %s: %w", sourceURL, err)
|
|
}
|
|
networks := make([]netip.Prefix, 0, len(document.Prefixes))
|
|
for _, entry := range document.Prefixes {
|
|
rawPrefix := strings.TrimSpace(entry.IPv4Prefix)
|
|
if rawPrefix == "" {
|
|
rawPrefix = strings.TrimSpace(entry.IPv6Prefix)
|
|
}
|
|
if rawPrefix == "" {
|
|
continue
|
|
}
|
|
prefix, err := netip.ParsePrefix(rawPrefix)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parse published prefix %q from %s: %w", rawPrefix, sourceURL, err)
|
|
}
|
|
networks = append(networks, prefix.Masked())
|
|
}
|
|
return networks, nil
|
|
case "geofeed_csv":
|
|
reader := csv.NewReader(strings.NewReader(string(payload)))
|
|
rows, err := reader.ReadAll()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("decode geofeed payload from %s: %w", sourceURL, err)
|
|
}
|
|
networks := make([]netip.Prefix, 0, len(rows))
|
|
for _, row := range rows {
|
|
if len(row) == 0 {
|
|
continue
|
|
}
|
|
candidate := strings.TrimSpace(row[0])
|
|
if candidate == "" || strings.HasPrefix(candidate, "#") {
|
|
continue
|
|
}
|
|
prefix, err := netip.ParsePrefix(candidate)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parse geofeed prefix %q from %s: %w", candidate, sourceURL, err)
|
|
}
|
|
networks = append(networks, prefix.Masked())
|
|
}
|
|
return networks, nil
|
|
default:
|
|
return nil, fmt.Errorf("unsupported source format %q for %s", sourceFormat, sourceURL)
|
|
}
|
|
}
|
|
|
|
func (s *Service) lookupReverseDNS(ctx context.Context, ip netip.Addr) (*model.ReverseDNSInfo, error) {
|
|
names, err := s.resolver.LookupAddr(ctx, ip.String())
|
|
if err != nil {
|
|
if isDNSNotFound(err) {
|
|
return nil, nil
|
|
}
|
|
return nil, fmt.Errorf("reverse dns lookup for %s: %w", ip, err)
|
|
}
|
|
if len(names) == 0 {
|
|
return nil, nil
|
|
}
|
|
sort.Strings(names)
|
|
ptr := strings.TrimSuffix(strings.TrimSpace(names[0]), ".")
|
|
if ptr == "" {
|
|
return nil, nil
|
|
}
|
|
|
|
resolvedIPs, err := s.resolver.LookupIPAddr(ctx, ptr)
|
|
if err != nil && !isDNSNotFound(err) {
|
|
return &model.ReverseDNSInfo{PTR: ptr, ForwardConfirmed: false}, fmt.Errorf("forward-confirm dns lookup for %s: %w", ptr, err)
|
|
}
|
|
forwardConfirmed := false
|
|
for _, resolved := range resolvedIPs {
|
|
addr, ok := netip.AddrFromSlice(resolved.IP)
|
|
if ok && addr.Unmap() == ip.Unmap() {
|
|
forwardConfirmed = true
|
|
break
|
|
}
|
|
}
|
|
return &model.ReverseDNSInfo{PTR: ptr, ForwardConfirmed: forwardConfirmed}, nil
|
|
}
|
|
|
|
func (s *Service) lookupRegistration(ctx context.Context, ip netip.Addr) (*model.RegistrationInfo, error) {
|
|
family := "ipv4"
|
|
if ip.Is6() {
|
|
family = "ipv6"
|
|
}
|
|
services, err := s.loadBootstrap(ctx, family)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
baseURL := lookupRDAPBaseURL(ip, services)
|
|
if baseURL == "" {
|
|
return nil, fmt.Errorf("no RDAP service found for %s", ip)
|
|
}
|
|
requestURL := strings.TrimRight(baseURL, "/") + "/ip/" + url.PathEscape(ip.String())
|
|
payload, err := s.fetchJSONDocument(ctx, requestURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("rdap lookup for %s: %w", ip, err)
|
|
}
|
|
registration := &model.RegistrationInfo{
|
|
Source: requestURL,
|
|
Handle: strings.TrimSpace(asString(payload["handle"])),
|
|
Name: strings.TrimSpace(asString(payload["name"])),
|
|
Country: strings.TrimSpace(asString(payload["country"])),
|
|
Prefix: extractPrefix(payload),
|
|
Organization: extractOrganization(payload),
|
|
AbuseEmail: extractAbuseEmail(payload["entities"]),
|
|
}
|
|
if registration.Organization == "" {
|
|
registration.Organization = registration.Name
|
|
}
|
|
if registration.Name == "" && registration.Organization == "" && registration.Handle == "" && registration.Prefix == "" && registration.Country == "" && registration.AbuseEmail == "" {
|
|
return nil, nil
|
|
}
|
|
return registration, nil
|
|
}
|
|
|
|
func (s *Service) loadBootstrap(ctx context.Context, family string) ([]rdapService, error) {
|
|
s.mu.Lock()
|
|
entry, found := s.bootstrapCache[family]
|
|
s.mu.Unlock()
|
|
if found && time.Since(entry.updatedAt) < 24*time.Hour {
|
|
return append([]rdapService(nil), entry.services...), nil
|
|
}
|
|
|
|
bootstrapURL := s.bootstrapURLs[family]
|
|
payload, err := s.fetchDocument(ctx, bootstrapURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("fetch %s RDAP bootstrap: %w", family, err)
|
|
}
|
|
services, err := parseBootstrap(payload, bootstrapURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
s.mu.Lock()
|
|
s.bootstrapCache[family] = bootstrapCacheEntry{updatedAt: time.Now().UTC(), services: append([]rdapService(nil), services...)}
|
|
s.mu.Unlock()
|
|
return services, nil
|
|
}
|
|
|
|
func parseBootstrap(payload []byte, sourceURL string) ([]rdapService, error) {
|
|
var document struct {
|
|
Services [][][]string `json:"services"`
|
|
}
|
|
if err := json.Unmarshal(payload, &document); err != nil {
|
|
return nil, fmt.Errorf("decode RDAP bootstrap from %s: %w", sourceURL, err)
|
|
}
|
|
services := make([]rdapService, 0, len(document.Services))
|
|
for _, rawService := range document.Services {
|
|
if len(rawService) < 2 {
|
|
continue
|
|
}
|
|
prefixes := make([]netip.Prefix, 0, len(rawService[0]))
|
|
for _, candidate := range rawService[0] {
|
|
prefix, err := netip.ParsePrefix(strings.TrimSpace(candidate))
|
|
if err != nil {
|
|
continue
|
|
}
|
|
prefixes = append(prefixes, prefix.Masked())
|
|
}
|
|
if len(prefixes) == 0 || len(rawService[1]) == 0 {
|
|
continue
|
|
}
|
|
services = append(services, rdapService{prefixes: prefixes, urls: append([]string(nil), rawService[1]...)})
|
|
}
|
|
if len(services) == 0 {
|
|
return nil, fmt.Errorf("empty RDAP bootstrap payload from %s", sourceURL)
|
|
}
|
|
return services, nil
|
|
}
|
|
|
|
func lookupRDAPBaseURL(ip netip.Addr, services []rdapService) string {
|
|
bestBits := -1
|
|
bestURL := ""
|
|
for _, service := range services {
|
|
for _, prefix := range service.prefixes {
|
|
if prefix.Contains(ip) && prefix.Bits() > bestBits && len(service.urls) > 0 {
|
|
bestBits = prefix.Bits()
|
|
bestURL = strings.TrimSpace(service.urls[0])
|
|
}
|
|
}
|
|
}
|
|
return bestURL
|
|
}
|
|
|
|
func (s *Service) lookupSpamhaus(ctx context.Context, ip netip.Addr) (*model.ReputationInfo, error) {
|
|
if !isPublicIP(ip) {
|
|
return nil, nil
|
|
}
|
|
lookupName, err := spamhausLookupName(ip)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
answers, err := s.resolver.LookupHost(ctx, lookupName)
|
|
if err != nil {
|
|
if isDNSNotFound(err) {
|
|
return &model.ReputationInfo{SpamhausLookup: spamhausLookupZone, SpamhausListed: false}, nil
|
|
}
|
|
return &model.ReputationInfo{SpamhausLookup: spamhausLookupZone, Error: err.Error()}, nil
|
|
}
|
|
return &model.ReputationInfo{
|
|
SpamhausLookup: spamhausLookupZone,
|
|
SpamhausListed: len(answers) > 0,
|
|
SpamhausCodes: uniqueStrings(answers),
|
|
}, nil
|
|
}
|
|
|
|
func (s *Service) fetchDocument(ctx context.Context, requestURL string) ([]byte, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL, nil)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("build request for %s: %w", requestURL, err)
|
|
}
|
|
req.Header.Set("Accept", "application/json, text/plain, */*")
|
|
req.Header.Set("User-Agent", s.cfg.UserAgent)
|
|
|
|
resp, err := s.client.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("request %s: %w", requestURL, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
payload, _ := io.ReadAll(io.LimitReader(resp.Body, 8<<10))
|
|
return nil, fmt.Errorf("request %s returned %s: %s", requestURL, resp.Status, strings.TrimSpace(string(payload)))
|
|
}
|
|
payload, err := io.ReadAll(io.LimitReader(resp.Body, 4<<20))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("read response %s: %w", requestURL, err)
|
|
}
|
|
return payload, nil
|
|
}
|
|
|
|
func (s *Service) fetchJSONDocument(ctx context.Context, requestURL string) (map[string]any, error) {
|
|
payload, err := s.fetchDocument(ctx, requestURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var decoded map[string]any
|
|
if err := json.Unmarshal(payload, &decoded); err != nil {
|
|
return nil, fmt.Errorf("decode json payload from %s: %w", requestURL, err)
|
|
}
|
|
return decoded, nil
|
|
}
|
|
|
|
func defaultBotProviders() []botProvider {
|
|
return []botProvider{
|
|
{
|
|
ID: "google_official",
|
|
Name: "Googlebot",
|
|
Icon: "🤖",
|
|
SourceFormat: "json_prefixes",
|
|
CacheTTL: 24 * time.Hour,
|
|
IPRangeURLs: []string{
|
|
"https://developers.google.com/static/crawling/ipranges/common-crawlers.json",
|
|
"https://developers.google.com/static/crawling/ipranges/special-crawlers.json",
|
|
"https://developers.google.com/static/crawling/ipranges/user-triggered-fetchers-google.json",
|
|
},
|
|
},
|
|
{
|
|
ID: "bing_official",
|
|
Name: "Bingbot",
|
|
Icon: "🤖",
|
|
SourceFormat: "json_prefixes",
|
|
CacheTTL: 24 * time.Hour,
|
|
ReverseDNSSuffixes: []string{".search.msn.com"},
|
|
},
|
|
{
|
|
ID: "apple_official",
|
|
Name: "Applebot",
|
|
Icon: "🤖",
|
|
SourceFormat: "json_prefixes",
|
|
CacheTTL: 24 * time.Hour,
|
|
IPRangeURLs: []string{"https://search.developer.apple.com/applebot.json"},
|
|
ReverseDNSSuffixes: []string{".applebot.apple.com"},
|
|
},
|
|
{
|
|
ID: "facebook_official",
|
|
Name: "Meta crawler",
|
|
Icon: "🤖",
|
|
SourceFormat: "geofeed_csv",
|
|
CacheTTL: 24 * time.Hour,
|
|
IPRangeURLs: []string{"https://www.facebook.com/peering/geofeed"},
|
|
UserAgentPrefixes: []string{
|
|
"facebookexternalhit/",
|
|
"meta-webindexer/",
|
|
"meta-externalads/",
|
|
"meta-externalagent/",
|
|
"meta-externalfetcher/",
|
|
},
|
|
},
|
|
{
|
|
ID: "duckduckgo_official",
|
|
Name: "DuckDuckBot",
|
|
Icon: "🤖",
|
|
SourceFormat: "json_prefixes",
|
|
CacheTTL: 24 * time.Hour,
|
|
IPRangeURLs: []string{"https://duckduckgo.com/duckduckbot.json"},
|
|
},
|
|
}
|
|
}
|
|
|
|
func ipMatchesPrefixes(ip netip.Addr, prefixes []netip.Prefix) bool {
|
|
for _, prefix := range prefixes {
|
|
if prefix.Contains(ip) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func userAgentMatchesPrefixes(userAgents []string, prefixes []string) bool {
|
|
for _, agent := range userAgents {
|
|
for _, prefix := range prefixes {
|
|
if strings.HasPrefix(agent, prefix) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func normalizeUserAgents(userAgents []string) []string {
|
|
items := make([]string, 0, len(userAgents))
|
|
for _, userAgent := range userAgents {
|
|
normalized := strings.ToLower(strings.TrimSpace(userAgent))
|
|
if normalized == "" {
|
|
continue
|
|
}
|
|
items = append(items, normalized)
|
|
}
|
|
return uniqueStrings(items)
|
|
}
|
|
|
|
func detectBotHint(userAgents []string) *model.BotMatch {
|
|
for _, userAgent := range userAgents {
|
|
name := extractBotHintName(userAgent)
|
|
if name == "" {
|
|
continue
|
|
}
|
|
return &model.BotMatch{
|
|
ProviderID: strings.ToLower(name),
|
|
Name: name,
|
|
Icon: "🤖",
|
|
Method: "user_agent_hint",
|
|
Verified: false,
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func extractBotHintName(userAgent string) string {
|
|
parts := strings.FieldsFunc(userAgent, func(value rune) bool {
|
|
switch value {
|
|
case ' ', ';', '(', ')', ',', '\t':
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
})
|
|
for _, part := range parts {
|
|
base := strings.TrimSpace(strings.SplitN(part, "/", 2)[0])
|
|
if base == "" {
|
|
continue
|
|
}
|
|
normalized := strings.ToLower(base)
|
|
if strings.Contains(normalized, "bot") || strings.Contains(normalized, "crawler") || strings.Contains(normalized, "spider") || strings.Contains(normalized, "slurp") || strings.Contains(normalized, "fetcher") || strings.Contains(normalized, "indexer") || strings.Contains(normalized, "preview") || strings.Contains(normalized, "externalhit") {
|
|
return base
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func extractPrefix(payload map[string]any) string {
|
|
items, ok := payload["cidr0_cidrs"].([]any)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
for _, item := range items {
|
|
entry, ok := item.(map[string]any)
|
|
if !ok {
|
|
continue
|
|
}
|
|
prefix := strings.TrimSpace(asString(entry["v4prefix"]))
|
|
if prefix == "" {
|
|
prefix = strings.TrimSpace(asString(entry["v6prefix"]))
|
|
}
|
|
length := asInt(entry["length"])
|
|
if prefix == "" || length == 0 {
|
|
continue
|
|
}
|
|
return prefix + "/" + strconv.Itoa(length)
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func extractOrganization(payload map[string]any) string {
|
|
if organization := extractEntityName(payload["entities"]); organization != "" {
|
|
return organization
|
|
}
|
|
return strings.TrimSpace(asString(payload["name"]))
|
|
}
|
|
|
|
func extractEntityName(value any) string {
|
|
entities, ok := value.([]any)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
for _, rawEntity := range entities {
|
|
entity, ok := rawEntity.(map[string]any)
|
|
if !ok {
|
|
continue
|
|
}
|
|
if name := strings.TrimSpace(asString(entity["fn"])); name != "" {
|
|
return name
|
|
}
|
|
if name := extractVCardText(entity["vcardArray"], "fn"); name != "" {
|
|
return name
|
|
}
|
|
if nested := extractEntityName(entity["entities"]); nested != "" {
|
|
return nested
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func extractAbuseEmail(value any) string {
|
|
entities, ok := value.([]any)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
for _, rawEntity := range entities {
|
|
entity, ok := rawEntity.(map[string]any)
|
|
if !ok {
|
|
continue
|
|
}
|
|
roles := toStrings(entity["roles"])
|
|
if containsString(roles, "abuse") {
|
|
if email := extractVCardText(entity["vcardArray"], "email"); email != "" {
|
|
return email
|
|
}
|
|
}
|
|
if nested := extractAbuseEmail(entity["entities"]); nested != "" {
|
|
return nested
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func extractVCardText(value any, field string) string {
|
|
items, ok := value.([]any)
|
|
if !ok || len(items) < 2 {
|
|
return ""
|
|
}
|
|
rows, ok := items[1].([]any)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
for _, rawRow := range rows {
|
|
row, ok := rawRow.([]any)
|
|
if !ok || len(row) < 4 {
|
|
continue
|
|
}
|
|
name, ok := row[0].(string)
|
|
if !ok || name != field {
|
|
continue
|
|
}
|
|
textValue, ok := row[3].(string)
|
|
if ok {
|
|
return strings.TrimSpace(textValue)
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func spamhausLookupName(ip netip.Addr) (string, error) {
|
|
ip = ip.Unmap()
|
|
if ip.Is4() {
|
|
bytes := ip.As4()
|
|
return fmt.Sprintf("%d.%d.%d.%d.%s", bytes[3], bytes[2], bytes[1], bytes[0], spamhausLookupZone), nil
|
|
}
|
|
if ip.Is6() {
|
|
bytes := ip.As16()
|
|
hexString := hex.EncodeToString(bytes[:])
|
|
parts := make([]string, 0, len(hexString))
|
|
for index := len(hexString) - 1; index >= 0; index-- {
|
|
parts = append(parts, string(hexString[index]))
|
|
}
|
|
return strings.Join(parts, ".") + "." + spamhausLookupZone, nil
|
|
}
|
|
return "", fmt.Errorf("unsupported ip family for %s", ip)
|
|
}
|
|
|
|
func uniquePrefixes(items []netip.Prefix) []netip.Prefix {
|
|
if len(items) == 0 {
|
|
return nil
|
|
}
|
|
seen := make(map[string]struct{}, len(items))
|
|
result := make([]netip.Prefix, 0, len(items))
|
|
for _, item := range items {
|
|
key := item.Masked().String()
|
|
if _, ok := seen[key]; ok {
|
|
continue
|
|
}
|
|
seen[key] = struct{}{}
|
|
result = append(result, item.Masked())
|
|
}
|
|
sort.Slice(result, func(left int, right int) bool {
|
|
if result[left].Bits() == result[right].Bits() {
|
|
return result[left].String() < result[right].String()
|
|
}
|
|
return result[left].Bits() > result[right].Bits()
|
|
})
|
|
return result
|
|
}
|
|
|
|
func uniqueStrings(items []string) []string {
|
|
if len(items) == 0 {
|
|
return nil
|
|
}
|
|
seen := make(map[string]struct{}, len(items))
|
|
result := make([]string, 0, len(items))
|
|
for _, item := range items {
|
|
if _, ok := seen[item]; ok {
|
|
continue
|
|
}
|
|
seen[item] = struct{}{}
|
|
result = append(result, item)
|
|
}
|
|
sort.Strings(result)
|
|
return result
|
|
}
|
|
|
|
func containsString(items []string, expected string) bool {
|
|
for _, item := range items {
|
|
if item == expected {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func toStrings(value any) []string {
|
|
rawItems, ok := value.([]any)
|
|
if !ok {
|
|
return nil
|
|
}
|
|
items := make([]string, 0, len(rawItems))
|
|
for _, rawItem := range rawItems {
|
|
if text, ok := rawItem.(string); ok {
|
|
items = append(items, strings.TrimSpace(text))
|
|
}
|
|
}
|
|
return items
|
|
}
|
|
|
|
func asString(value any) string {
|
|
text, _ := value.(string)
|
|
return text
|
|
}
|
|
|
|
func asInt(value any) int {
|
|
switch current := value.(type) {
|
|
case float64:
|
|
return int(current)
|
|
case float32:
|
|
return int(current)
|
|
case int:
|
|
return current
|
|
case int64:
|
|
return int(current)
|
|
case json.Number:
|
|
converted, _ := current.Int64()
|
|
return int(converted)
|
|
default:
|
|
return 0
|
|
}
|
|
}
|
|
|
|
func isDNSNotFound(err error) bool {
|
|
var dnsError *net.DNSError
|
|
if errors.As(err, &dnsError) {
|
|
return dnsError.IsNotFound
|
|
}
|
|
return false
|
|
}
|
|
|
|
func isPublicIP(ip netip.Addr) bool {
|
|
ip = ip.Unmap()
|
|
if !ip.IsValid() || ip.IsLoopback() || ip.IsMulticast() || ip.IsPrivate() || ip.IsLinkLocalMulticast() || ip.IsLinkLocalUnicast() || ip.IsUnspecified() {
|
|
return false
|
|
}
|
|
return true
|
|
}
|