package investigation import ( "context" "encoding/csv" "encoding/hex" "encoding/json" "errors" "fmt" "io" "log" "net" "net/http" "net/netip" "net/url" "sort" "strconv" "strings" "sync" "time" "git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/config" "git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/model" ) const ( defaultRDAPBootstrapIPv4 = "https://data.iana.org/rdap/ipv4.json" defaultRDAPBootstrapIPv6 = "https://data.iana.org/rdap/ipv6.json" spamhausLookupZone = "zen.spamhaus.org" ) type dnsResolver interface { LookupAddr(ctx context.Context, addr string) ([]string, error) LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, error) LookupHost(ctx context.Context, host string) ([]string, error) } type httpClient interface { Do(req *http.Request) (*http.Response, error) } type Service struct { cfg config.InvestigationConfig logger *log.Logger client httpClient resolver dnsResolver mu sync.Mutex networkCache map[string]networkCacheEntry bootstrapCache map[string]bootstrapCacheEntry providers []botProvider bootstrapURLs map[string]string } type networkCacheEntry struct { updatedAt time.Time networks []netip.Prefix } type bootstrapCacheEntry struct { updatedAt time.Time services []rdapService } type rdapService struct { prefixes []netip.Prefix urls []string } type botProvider struct { ID string Name string Icon string SourceFormat string CacheTTL time.Duration IPRangeURLs []string ReverseDNSSuffixes []string UserAgentPrefixes []string } func New(cfg config.InvestigationConfig, logger *log.Logger) *Service { return newService( cfg, &http.Client{Timeout: cfg.Timeout.Duration}, net.DefaultResolver, logger, defaultBotProviders(), map[string]string{ "ipv4": defaultRDAPBootstrapIPv4, "ipv6": defaultRDAPBootstrapIPv6, }, ) } func newService( cfg config.InvestigationConfig, client httpClient, resolver dnsResolver, logger *log.Logger, providers []botProvider, bootstrapURLs map[string]string, ) *Service { if logger == nil { logger = log.New(io.Discard, "", 0) } return &Service{ cfg: cfg, logger: logger, client: client, resolver: resolver, networkCache: map[string]networkCacheEntry{}, bootstrapCache: map[string]bootstrapCacheEntry{}, providers: providers, bootstrapURLs: bootstrapURLs, } } func (s *Service) Investigate(ctx context.Context, ip string, userAgents []string) (model.IPInvestigation, error) { parsed, err := netip.ParseAddr(strings.TrimSpace(ip)) if err != nil { return model.IPInvestigation{}, fmt.Errorf("invalid ip address %q: %w", ip, err) } investigation := model.IPInvestigation{ IP: parsed.String(), UpdatedAt: time.Now().UTC(), } if !s.cfg.Enabled { return investigation, nil } lookupCtx, cancel := context.WithTimeout(ctx, s.cfg.Timeout.Duration) defer cancel() normalizedUserAgents := normalizeUserAgents(userAgents) botMatch, reverseDNSInfo := s.identifyBot(lookupCtx, parsed, normalizedUserAgents) if botMatch != nil { investigation.Bot = botMatch investigation.ReverseDNS = reverseDNSInfo return investigation, nil } if hint := detectBotHint(userAgents); hint != nil { investigation.Bot = hint } warnings := make([]string, 0, 2) if reverseDNSInfo == nil { reverseDNSInfo, err = s.lookupReverseDNS(lookupCtx, parsed) if err != nil { warnings = append(warnings, err.Error()) } } if reverseDNSInfo != nil { investigation.ReverseDNS = reverseDNSInfo } registration, err := s.lookupRegistration(lookupCtx, parsed) if err != nil { warnings = append(warnings, err.Error()) } else if registration != nil { investigation.Registration = registration } if s.cfg.SpamhausEnabled { reputation, err := s.lookupSpamhaus(lookupCtx, parsed) if err != nil { warnings = append(warnings, err.Error()) } else if reputation != nil { investigation.Reputation = reputation } } if len(warnings) > 0 { investigation.Error = strings.Join(uniqueStrings(warnings), "; ") } return investigation, nil } func (s *Service) identifyBot(ctx context.Context, ip netip.Addr, userAgents []string) (*model.BotMatch, *model.ReverseDNSInfo) { var reverseDNSInfo *model.ReverseDNSInfo for _, provider := range s.providers { if len(provider.IPRangeURLs) > 0 { networks, err := s.loadPublishedNetworks(ctx, provider) if err != nil { s.logger.Printf("bot provider %s: %v", provider.ID, err) } else if ipMatchesPrefixes(ip, networks) { if len(provider.UserAgentPrefixes) == 0 || userAgentMatchesPrefixes(userAgents, provider.UserAgentPrefixes) { method := "published_ranges" if len(provider.UserAgentPrefixes) > 0 { method = "user_agent+published_ranges" } return &model.BotMatch{ ProviderID: provider.ID, Name: provider.Name, Icon: provider.Icon, Method: method, Verified: true, }, reverseDNSInfo } } } if len(provider.ReverseDNSSuffixes) == 0 { continue } info, err := s.lookupReverseDNS(ctx, ip) if err != nil { s.logger.Printf("bot provider %s reverse DNS: %v", provider.ID, err) continue } if info == nil { continue } reverseDNSInfo = info ptr := strings.ToLower(strings.TrimSuffix(info.PTR, ".")) if ptr == "" || !info.ForwardConfirmed { continue } for _, suffix := range provider.ReverseDNSSuffixes { if strings.HasSuffix(ptr, suffix) { return &model.BotMatch{ ProviderID: provider.ID, Name: provider.Name, Icon: provider.Icon, Method: "reverse_dns+fcrdns", Verified: true, }, reverseDNSInfo } } } return nil, reverseDNSInfo } func (s *Service) loadPublishedNetworks(ctx context.Context, provider botProvider) ([]netip.Prefix, error) { s.mu.Lock() entry, found := s.networkCache[provider.ID] s.mu.Unlock() if found && time.Since(entry.updatedAt) < provider.CacheTTL { return append([]netip.Prefix(nil), entry.networks...), nil } networks := make([]netip.Prefix, 0, 64) errMessages := make([]string, 0, len(provider.IPRangeURLs)) for _, sourceURL := range provider.IPRangeURLs { payload, err := s.fetchDocument(ctx, sourceURL) if err != nil { errMessages = append(errMessages, err.Error()) continue } parsed, err := parsePublishedNetworks(payload, provider.SourceFormat, sourceURL) if err != nil { errMessages = append(errMessages, err.Error()) continue } networks = append(networks, parsed...) } if len(networks) == 0 && len(errMessages) > 0 { return nil, fmt.Errorf("load published ranges for %s: %s", provider.ID, strings.Join(uniqueStrings(errMessages), "; ")) } networks = uniquePrefixes(networks) s.mu.Lock() s.networkCache[provider.ID] = networkCacheEntry{updatedAt: time.Now().UTC(), networks: append([]netip.Prefix(nil), networks...)} s.mu.Unlock() return networks, nil } func parsePublishedNetworks(payload []byte, sourceFormat string, sourceURL string) ([]netip.Prefix, error) { switch sourceFormat { case "json_prefixes": var document struct { Prefixes []struct { IPv4Prefix string `json:"ipv4Prefix"` IPv6Prefix string `json:"ipv6Prefix"` } `json:"prefixes"` } if err := json.Unmarshal(payload, &document); err != nil { return nil, fmt.Errorf("decode published prefix payload from %s: %w", sourceURL, err) } networks := make([]netip.Prefix, 0, len(document.Prefixes)) for _, entry := range document.Prefixes { rawPrefix := strings.TrimSpace(entry.IPv4Prefix) if rawPrefix == "" { rawPrefix = strings.TrimSpace(entry.IPv6Prefix) } if rawPrefix == "" { continue } prefix, err := netip.ParsePrefix(rawPrefix) if err != nil { return nil, fmt.Errorf("parse published prefix %q from %s: %w", rawPrefix, sourceURL, err) } networks = append(networks, prefix.Masked()) } return networks, nil case "geofeed_csv": reader := csv.NewReader(strings.NewReader(string(payload))) rows, err := reader.ReadAll() if err != nil { return nil, fmt.Errorf("decode geofeed payload from %s: %w", sourceURL, err) } networks := make([]netip.Prefix, 0, len(rows)) for _, row := range rows { if len(row) == 0 { continue } candidate := strings.TrimSpace(row[0]) if candidate == "" || strings.HasPrefix(candidate, "#") { continue } prefix, err := netip.ParsePrefix(candidate) if err != nil { return nil, fmt.Errorf("parse geofeed prefix %q from %s: %w", candidate, sourceURL, err) } networks = append(networks, prefix.Masked()) } return networks, nil default: return nil, fmt.Errorf("unsupported source format %q for %s", sourceFormat, sourceURL) } } func (s *Service) lookupReverseDNS(ctx context.Context, ip netip.Addr) (*model.ReverseDNSInfo, error) { names, err := s.resolver.LookupAddr(ctx, ip.String()) if err != nil { if isDNSNotFound(err) { return nil, nil } return nil, fmt.Errorf("reverse dns lookup for %s: %w", ip, err) } if len(names) == 0 { return nil, nil } sort.Strings(names) ptr := strings.TrimSuffix(strings.TrimSpace(names[0]), ".") if ptr == "" { return nil, nil } resolvedIPs, err := s.resolver.LookupIPAddr(ctx, ptr) if err != nil && !isDNSNotFound(err) { return &model.ReverseDNSInfo{PTR: ptr, ForwardConfirmed: false}, fmt.Errorf("forward-confirm dns lookup for %s: %w", ptr, err) } forwardConfirmed := false for _, resolved := range resolvedIPs { addr, ok := netip.AddrFromSlice(resolved.IP) if ok && addr.Unmap() == ip.Unmap() { forwardConfirmed = true break } } return &model.ReverseDNSInfo{PTR: ptr, ForwardConfirmed: forwardConfirmed}, nil } func (s *Service) lookupRegistration(ctx context.Context, ip netip.Addr) (*model.RegistrationInfo, error) { family := "ipv4" if ip.Is6() { family = "ipv6" } services, err := s.loadBootstrap(ctx, family) if err != nil { return nil, err } baseURL := lookupRDAPBaseURL(ip, services) if baseURL == "" { return nil, fmt.Errorf("no RDAP service found for %s", ip) } requestURL := strings.TrimRight(baseURL, "/") + "/ip/" + url.PathEscape(ip.String()) payload, err := s.fetchJSONDocument(ctx, requestURL) if err != nil { return nil, fmt.Errorf("rdap lookup for %s: %w", ip, err) } registration := &model.RegistrationInfo{ Source: requestURL, Handle: strings.TrimSpace(asString(payload["handle"])), Name: strings.TrimSpace(asString(payload["name"])), Country: strings.TrimSpace(asString(payload["country"])), Prefix: extractPrefix(payload), Organization: extractOrganization(payload), AbuseEmail: extractAbuseEmail(payload["entities"]), } if registration.Organization == "" { registration.Organization = registration.Name } if registration.Name == "" && registration.Organization == "" && registration.Handle == "" && registration.Prefix == "" && registration.Country == "" && registration.AbuseEmail == "" { return nil, nil } return registration, nil } func (s *Service) loadBootstrap(ctx context.Context, family string) ([]rdapService, error) { s.mu.Lock() entry, found := s.bootstrapCache[family] s.mu.Unlock() if found && time.Since(entry.updatedAt) < 24*time.Hour { return append([]rdapService(nil), entry.services...), nil } bootstrapURL := s.bootstrapURLs[family] payload, err := s.fetchDocument(ctx, bootstrapURL) if err != nil { return nil, fmt.Errorf("fetch %s RDAP bootstrap: %w", family, err) } services, err := parseBootstrap(payload, bootstrapURL) if err != nil { return nil, err } s.mu.Lock() s.bootstrapCache[family] = bootstrapCacheEntry{updatedAt: time.Now().UTC(), services: append([]rdapService(nil), services...)} s.mu.Unlock() return services, nil } func parseBootstrap(payload []byte, sourceURL string) ([]rdapService, error) { var document struct { Services [][][]string `json:"services"` } if err := json.Unmarshal(payload, &document); err != nil { return nil, fmt.Errorf("decode RDAP bootstrap from %s: %w", sourceURL, err) } services := make([]rdapService, 0, len(document.Services)) for _, rawService := range document.Services { if len(rawService) < 2 { continue } prefixes := make([]netip.Prefix, 0, len(rawService[0])) for _, candidate := range rawService[0] { prefix, err := netip.ParsePrefix(strings.TrimSpace(candidate)) if err != nil { continue } prefixes = append(prefixes, prefix.Masked()) } if len(prefixes) == 0 || len(rawService[1]) == 0 { continue } services = append(services, rdapService{prefixes: prefixes, urls: append([]string(nil), rawService[1]...)}) } if len(services) == 0 { return nil, fmt.Errorf("empty RDAP bootstrap payload from %s", sourceURL) } return services, nil } func lookupRDAPBaseURL(ip netip.Addr, services []rdapService) string { bestBits := -1 bestURL := "" for _, service := range services { for _, prefix := range service.prefixes { if prefix.Contains(ip) && prefix.Bits() > bestBits && len(service.urls) > 0 { bestBits = prefix.Bits() bestURL = strings.TrimSpace(service.urls[0]) } } } return bestURL } func (s *Service) lookupSpamhaus(ctx context.Context, ip netip.Addr) (*model.ReputationInfo, error) { if !isPublicIP(ip) { return nil, nil } lookupName, err := spamhausLookupName(ip) if err != nil { return nil, err } answers, err := s.resolver.LookupHost(ctx, lookupName) if err != nil { if isDNSNotFound(err) { return &model.ReputationInfo{SpamhausLookup: spamhausLookupZone, SpamhausListed: false}, nil } return &model.ReputationInfo{SpamhausLookup: spamhausLookupZone, Error: err.Error()}, nil } return &model.ReputationInfo{ SpamhausLookup: spamhausLookupZone, SpamhausListed: len(answers) > 0, SpamhausCodes: uniqueStrings(answers), }, nil } func (s *Service) fetchDocument(ctx context.Context, requestURL string) ([]byte, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL, nil) if err != nil { return nil, fmt.Errorf("build request for %s: %w", requestURL, err) } req.Header.Set("Accept", "application/json, text/plain, */*") req.Header.Set("User-Agent", s.cfg.UserAgent) resp, err := s.client.Do(req) if err != nil { return nil, fmt.Errorf("request %s: %w", requestURL, err) } defer resp.Body.Close() if resp.StatusCode < 200 || resp.StatusCode >= 300 { payload, _ := io.ReadAll(io.LimitReader(resp.Body, 8<<10)) return nil, fmt.Errorf("request %s returned %s: %s", requestURL, resp.Status, strings.TrimSpace(string(payload))) } payload, err := io.ReadAll(io.LimitReader(resp.Body, 4<<20)) if err != nil { return nil, fmt.Errorf("read response %s: %w", requestURL, err) } return payload, nil } func (s *Service) fetchJSONDocument(ctx context.Context, requestURL string) (map[string]any, error) { payload, err := s.fetchDocument(ctx, requestURL) if err != nil { return nil, err } var decoded map[string]any if err := json.Unmarshal(payload, &decoded); err != nil { return nil, fmt.Errorf("decode json payload from %s: %w", requestURL, err) } return decoded, nil } func defaultBotProviders() []botProvider { return []botProvider{ { ID: "google_official", Name: "Googlebot", Icon: "🤖", SourceFormat: "json_prefixes", CacheTTL: 24 * time.Hour, IPRangeURLs: []string{ "https://developers.google.com/static/crawling/ipranges/common-crawlers.json", "https://developers.google.com/static/crawling/ipranges/special-crawlers.json", "https://developers.google.com/static/crawling/ipranges/user-triggered-fetchers-google.json", }, }, { ID: "bing_official", Name: "Bingbot", Icon: "🤖", SourceFormat: "json_prefixes", CacheTTL: 24 * time.Hour, ReverseDNSSuffixes: []string{".search.msn.com"}, }, { ID: "apple_official", Name: "Applebot", Icon: "🤖", SourceFormat: "json_prefixes", CacheTTL: 24 * time.Hour, IPRangeURLs: []string{"https://search.developer.apple.com/applebot.json"}, ReverseDNSSuffixes: []string{".applebot.apple.com"}, }, { ID: "facebook_official", Name: "Meta crawler", Icon: "🤖", SourceFormat: "geofeed_csv", CacheTTL: 24 * time.Hour, IPRangeURLs: []string{"https://www.facebook.com/peering/geofeed"}, UserAgentPrefixes: []string{ "facebookexternalhit/", "meta-webindexer/", "meta-externalads/", "meta-externalagent/", "meta-externalfetcher/", }, }, { ID: "duckduckgo_official", Name: "DuckDuckBot", Icon: "🤖", SourceFormat: "json_prefixes", CacheTTL: 24 * time.Hour, IPRangeURLs: []string{"https://duckduckgo.com/duckduckbot.json"}, }, } } func ipMatchesPrefixes(ip netip.Addr, prefixes []netip.Prefix) bool { for _, prefix := range prefixes { if prefix.Contains(ip) { return true } } return false } func userAgentMatchesPrefixes(userAgents []string, prefixes []string) bool { for _, agent := range userAgents { for _, prefix := range prefixes { if strings.HasPrefix(agent, prefix) { return true } } } return false } func normalizeUserAgents(userAgents []string) []string { items := make([]string, 0, len(userAgents)) for _, userAgent := range userAgents { normalized := strings.ToLower(strings.TrimSpace(userAgent)) if normalized == "" { continue } items = append(items, normalized) } return uniqueStrings(items) } func detectBotHint(userAgents []string) *model.BotMatch { for _, userAgent := range userAgents { name := extractBotHintName(userAgent) if name == "" { continue } return &model.BotMatch{ ProviderID: strings.ToLower(name), Name: name, Icon: "🤖", Method: "user_agent_hint", Verified: false, } } return nil } func extractBotHintName(userAgent string) string { parts := strings.FieldsFunc(userAgent, func(value rune) bool { switch value { case ' ', ';', '(', ')', ',', '\t': return true default: return false } }) for _, part := range parts { base := strings.TrimSpace(strings.SplitN(part, "/", 2)[0]) if base == "" { continue } normalized := strings.ToLower(base) if strings.Contains(normalized, "bot") || strings.Contains(normalized, "crawler") || strings.Contains(normalized, "spider") || strings.Contains(normalized, "slurp") || strings.Contains(normalized, "fetcher") || strings.Contains(normalized, "indexer") || strings.Contains(normalized, "preview") || strings.Contains(normalized, "externalhit") { return base } } return "" } func extractPrefix(payload map[string]any) string { items, ok := payload["cidr0_cidrs"].([]any) if !ok { return "" } for _, item := range items { entry, ok := item.(map[string]any) if !ok { continue } prefix := strings.TrimSpace(asString(entry["v4prefix"])) if prefix == "" { prefix = strings.TrimSpace(asString(entry["v6prefix"])) } length := asInt(entry["length"]) if prefix == "" || length == 0 { continue } return prefix + "/" + strconv.Itoa(length) } return "" } func extractOrganization(payload map[string]any) string { if organization := extractEntityName(payload["entities"]); organization != "" { return organization } return strings.TrimSpace(asString(payload["name"])) } func extractEntityName(value any) string { entities, ok := value.([]any) if !ok { return "" } for _, rawEntity := range entities { entity, ok := rawEntity.(map[string]any) if !ok { continue } if name := strings.TrimSpace(asString(entity["fn"])); name != "" { return name } if name := extractVCardText(entity["vcardArray"], "fn"); name != "" { return name } if nested := extractEntityName(entity["entities"]); nested != "" { return nested } } return "" } func extractAbuseEmail(value any) string { entities, ok := value.([]any) if !ok { return "" } for _, rawEntity := range entities { entity, ok := rawEntity.(map[string]any) if !ok { continue } roles := toStrings(entity["roles"]) if containsString(roles, "abuse") { if email := extractVCardText(entity["vcardArray"], "email"); email != "" { return email } } if nested := extractAbuseEmail(entity["entities"]); nested != "" { return nested } } return "" } func extractVCardText(value any, field string) string { items, ok := value.([]any) if !ok || len(items) < 2 { return "" } rows, ok := items[1].([]any) if !ok { return "" } for _, rawRow := range rows { row, ok := rawRow.([]any) if !ok || len(row) < 4 { continue } name, ok := row[0].(string) if !ok || name != field { continue } textValue, ok := row[3].(string) if ok { return strings.TrimSpace(textValue) } } return "" } func spamhausLookupName(ip netip.Addr) (string, error) { ip = ip.Unmap() if ip.Is4() { bytes := ip.As4() return fmt.Sprintf("%d.%d.%d.%d.%s", bytes[3], bytes[2], bytes[1], bytes[0], spamhausLookupZone), nil } if ip.Is6() { bytes := ip.As16() hexString := hex.EncodeToString(bytes[:]) parts := make([]string, 0, len(hexString)) for index := len(hexString) - 1; index >= 0; index-- { parts = append(parts, string(hexString[index])) } return strings.Join(parts, ".") + "." + spamhausLookupZone, nil } return "", fmt.Errorf("unsupported ip family for %s", ip) } func uniquePrefixes(items []netip.Prefix) []netip.Prefix { if len(items) == 0 { return nil } seen := make(map[string]struct{}, len(items)) result := make([]netip.Prefix, 0, len(items)) for _, item := range items { key := item.Masked().String() if _, ok := seen[key]; ok { continue } seen[key] = struct{}{} result = append(result, item.Masked()) } sort.Slice(result, func(left int, right int) bool { if result[left].Bits() == result[right].Bits() { return result[left].String() < result[right].String() } return result[left].Bits() > result[right].Bits() }) return result } func uniqueStrings(items []string) []string { if len(items) == 0 { return nil } seen := make(map[string]struct{}, len(items)) result := make([]string, 0, len(items)) for _, item := range items { if _, ok := seen[item]; ok { continue } seen[item] = struct{}{} result = append(result, item) } sort.Strings(result) return result } func containsString(items []string, expected string) bool { for _, item := range items { if item == expected { return true } } return false } func toStrings(value any) []string { rawItems, ok := value.([]any) if !ok { return nil } items := make([]string, 0, len(rawItems)) for _, rawItem := range rawItems { if text, ok := rawItem.(string); ok { items = append(items, strings.TrimSpace(text)) } } return items } func asString(value any) string { text, _ := value.(string) return text } func asInt(value any) int { switch current := value.(type) { case float64: return int(current) case float32: return int(current) case int: return current case int64: return int(current) case json.Number: converted, _ := current.Int64() return int(converted) default: return 0 } } func isDNSNotFound(err error) bool { var dnsError *net.DNSError if errors.As(err, &dnsError) { return dnsError.IsNotFound } return false } func isPublicIP(ip netip.Addr) bool { ip = ip.Unmap() if !ip.IsValid() || ip.IsLoopback() || ip.IsMulticast() || ip.IsPrivate() || ip.IsLinkLocalMulticast() || ip.IsLinkLocalUnicast() || ip.IsUnspecified() { return false } return true }