diff --git a/README.md b/README.md index 9ab0468..29d14d5 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,9 @@ - Real-time ingestion of multiple Caddy JSON log files. - One heuristic profile per log source. - Persistent local state in SQLite. -- Local-only web UI for reviewing events and IPs. -- Manual block, unblock, and override reset actions. +- Local-only web UI for reviewing events, IPs, and the full request history of a selected address. +- On-demand IP investigation with persistent caching for bot verification, reverse DNS, RDAP, and Spamhaus lookups. +- Manual block, unblock, and clear-override actions with OPNsense-aware UI state. - OPNsense alias backend with automatic alias creation. - Concurrent polling across multiple log files. @@ -24,12 +25,13 @@ The decision engine is deliberately simple and deterministic for now: - excluded CIDR ranges - manual overrides -This keeps the application usable immediately while leaving room for a more advanced network-intelligence engine later. +This keeps the application usable immediately while leaving room for a more advanced policy engine later. ## Architecture - `internal/caddylog`: parses default Caddy JSON access logs - `internal/engine`: evaluates requests against a profile +- `internal/investigation`: performs on-demand bot verification and IP enrichment - `internal/store`: persists events, IP state, manual decisions, backend actions, and source offsets - `internal/opnsense`: manages the target OPNsense alias through its API - `internal/service`: runs concurrent log followers and applies automatic decisions @@ -60,6 +62,7 @@ Important points: - Each source points to one Caddy log file. - Each source references exactly one profile. - `initial_position: end` means “start following new lines only” on first boot. +- The `investigation` section controls how long IP enrichment is cached and whether on-demand Spamhaus lookups are enabled. - The web UI should stay bound to a local address such as `127.0.0.1:9080`. ## Web UI and API @@ -72,9 +75,12 @@ It refreshes through lightweight JSON polling and exposes these endpoints: - `GET /api/events` - `GET /api/ips` - `GET /api/ips/{ip}` +- `POST /api/ips/{ip}/investigate` - `POST /api/ips/{ip}/block` - `POST /api/ips/{ip}/unblock` -- `POST /api/ips/{ip}/reset` +- `POST /api/ips/{ip}/clear-override` + +The legacy `POST /api/ips/{ip}/reset` endpoint is still accepted as a backwards-compatible alias for `clear-override`. ## Development @@ -147,7 +153,7 @@ Use the NixOS module from another configuration: ## Roadmap - richer decision engine -- asynchronous DNS / RDAP / ASN enrichment +- optional GeoIP and ASN providers beyond RDAP - richer review filters in the UI - alternative blocking backends besides OPNsense - direct streaming ingestion targets in addition to file polling diff --git a/cmd/caddy-opnsense-blocker/main.go b/cmd/caddy-opnsense-blocker/main.go index 467c54f..9d11373 100644 --- a/cmd/caddy-opnsense-blocker/main.go +++ b/cmd/caddy-opnsense-blocker/main.go @@ -12,6 +12,7 @@ import ( "syscall" "git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/config" + "git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/investigation" "git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/opnsense" "git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/service" "git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/store" @@ -46,8 +47,9 @@ func run() error { if cfg.OPNsense.Enabled { blocker = opnsense.NewClient(cfg.OPNsense) } + investigator := investigation.New(cfg.Investigation, logger) - svc := service.New(cfg, database, blocker, logger) + svc := service.New(cfg, database, blocker, investigator, logger) handler := web.NewHandler(svc) httpServer := &http.Server{ Addr: cfg.Server.ListenAddress, diff --git a/config.example.yaml b/config.example.yaml index 4cb0ce6..41deaab 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -7,6 +7,13 @@ server: storage: path: ./data/caddy-opnsense-blocker.db +investigation: + enabled: true + refresh_after: 24h + timeout: 8s + user_agent: caddy-opnsense-blocker/0.2 + spamhaus_enabled: true + opnsense: enabled: true base_url: https://router.example.test diff --git a/internal/config/config.go b/internal/config/config.go index 31d606a..f7c0c72 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -36,11 +36,12 @@ func (d Duration) MarshalYAML() (any, error) { } type Config struct { - Server ServerConfig `yaml:"server"` - Storage StorageConfig `yaml:"storage"` - OPNsense OPNsenseConfig `yaml:"opnsense"` - Profiles map[string]ProfileConfig `yaml:"profiles"` - Sources []SourceConfig `yaml:"sources"` + Server ServerConfig `yaml:"server"` + Storage StorageConfig `yaml:"storage"` + Investigation InvestigationConfig `yaml:"investigation"` + OPNsense OPNsenseConfig `yaml:"opnsense"` + Profiles map[string]ProfileConfig `yaml:"profiles"` + Sources []SourceConfig `yaml:"sources"` } type ServerConfig struct { @@ -54,6 +55,14 @@ type StorageConfig struct { Path string `yaml:"path"` } +type InvestigationConfig struct { + Enabled bool `yaml:"enabled"` + RefreshAfter Duration `yaml:"refresh_after"` + Timeout Duration `yaml:"timeout"` + UserAgent string `yaml:"user_agent"` + SpamhausEnabled bool `yaml:"spamhaus_enabled"` +} + type OPNsenseConfig struct { Enabled bool `yaml:"enabled"` BaseURL string `yaml:"base_url"` @@ -157,6 +166,21 @@ func (c *Config) applyDefaults() error { if c.Storage.Path == "" { c.Storage.Path = "./data/caddy-opnsense-blocker.db" } + if !c.Investigation.Enabled { + c.Investigation.Enabled = true + } + if c.Investigation.RefreshAfter.Duration == 0 { + c.Investigation.RefreshAfter.Duration = 24 * time.Hour + } + if c.Investigation.Timeout.Duration == 0 { + c.Investigation.Timeout.Duration = 8 * time.Second + } + if strings.TrimSpace(c.Investigation.UserAgent) == "" { + c.Investigation.UserAgent = "caddy-opnsense-blocker/0.2" + } + if !c.Investigation.SpamhausEnabled { + c.Investigation.SpamhausEnabled = true + } if c.OPNsense.Timeout.Duration == 0 { c.OPNsense.Timeout.Duration = 8 * time.Second diff --git a/internal/investigation/service.go b/internal/investigation/service.go new file mode 100644 index 0000000..ad79215 --- /dev/null +++ b/internal/investigation/service.go @@ -0,0 +1,822 @@ +package investigation + +import ( + "context" + "encoding/csv" + "encoding/hex" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "net" + "net/http" + "net/netip" + "net/url" + "sort" + "strconv" + "strings" + "sync" + "time" + + "git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/config" + "git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/model" +) + +const ( + defaultRDAPBootstrapIPv4 = "https://data.iana.org/rdap/ipv4.json" + defaultRDAPBootstrapIPv6 = "https://data.iana.org/rdap/ipv6.json" + spamhausLookupZone = "zen.spamhaus.org" +) + +type dnsResolver interface { + LookupAddr(ctx context.Context, addr string) ([]string, error) + LookupIPAddr(ctx context.Context, host string) ([]net.IPAddr, error) + LookupHost(ctx context.Context, host string) ([]string, error) +} + +type httpClient interface { + Do(req *http.Request) (*http.Response, error) +} + +type Service struct { + cfg config.InvestigationConfig + logger *log.Logger + client httpClient + resolver dnsResolver + + mu sync.Mutex + networkCache map[string]networkCacheEntry + bootstrapCache map[string]bootstrapCacheEntry + providers []botProvider + bootstrapURLs map[string]string +} + +type networkCacheEntry struct { + updatedAt time.Time + networks []netip.Prefix +} + +type bootstrapCacheEntry struct { + updatedAt time.Time + services []rdapService +} + +type rdapService struct { + prefixes []netip.Prefix + urls []string +} + +type botProvider struct { + ID string + Name string + Icon string + SourceFormat string + CacheTTL time.Duration + IPRangeURLs []string + ReverseDNSSuffixes []string + UserAgentPrefixes []string +} + +func New(cfg config.InvestigationConfig, logger *log.Logger) *Service { + return newService( + cfg, + &http.Client{Timeout: cfg.Timeout.Duration}, + net.DefaultResolver, + logger, + defaultBotProviders(), + map[string]string{ + "ipv4": defaultRDAPBootstrapIPv4, + "ipv6": defaultRDAPBootstrapIPv6, + }, + ) +} + +func newService( + cfg config.InvestigationConfig, + client httpClient, + resolver dnsResolver, + logger *log.Logger, + providers []botProvider, + bootstrapURLs map[string]string, +) *Service { + if logger == nil { + logger = log.New(io.Discard, "", 0) + } + return &Service{ + cfg: cfg, + logger: logger, + client: client, + resolver: resolver, + networkCache: map[string]networkCacheEntry{}, + bootstrapCache: map[string]bootstrapCacheEntry{}, + providers: providers, + bootstrapURLs: bootstrapURLs, + } +} + +func (s *Service) Investigate(ctx context.Context, ip string, userAgents []string) (model.IPInvestigation, error) { + parsed, err := netip.ParseAddr(strings.TrimSpace(ip)) + if err != nil { + return model.IPInvestigation{}, fmt.Errorf("invalid ip address %q: %w", ip, err) + } + + investigation := model.IPInvestigation{ + IP: parsed.String(), + UpdatedAt: time.Now().UTC(), + } + if !s.cfg.Enabled { + return investigation, nil + } + + lookupCtx, cancel := context.WithTimeout(ctx, s.cfg.Timeout.Duration) + defer cancel() + + normalizedUserAgents := normalizeUserAgents(userAgents) + botMatch, reverseDNSInfo := s.identifyBot(lookupCtx, parsed, normalizedUserAgents) + if botMatch != nil { + investigation.Bot = botMatch + investigation.ReverseDNS = reverseDNSInfo + return investigation, nil + } + + warnings := make([]string, 0, 2) + if reverseDNSInfo == nil { + reverseDNSInfo, err = s.lookupReverseDNS(lookupCtx, parsed) + if err != nil { + warnings = append(warnings, err.Error()) + } + } + if reverseDNSInfo != nil { + investigation.ReverseDNS = reverseDNSInfo + } + + registration, err := s.lookupRegistration(lookupCtx, parsed) + if err != nil { + warnings = append(warnings, err.Error()) + } else if registration != nil { + investigation.Registration = registration + } + + if s.cfg.SpamhausEnabled { + reputation, err := s.lookupSpamhaus(lookupCtx, parsed) + if err != nil { + warnings = append(warnings, err.Error()) + } else if reputation != nil { + investigation.Reputation = reputation + } + } + + if len(warnings) > 0 { + investigation.Error = strings.Join(uniqueStrings(warnings), "; ") + } + return investigation, nil +} + +func (s *Service) identifyBot(ctx context.Context, ip netip.Addr, userAgents []string) (*model.BotMatch, *model.ReverseDNSInfo) { + var reverseDNSInfo *model.ReverseDNSInfo + for _, provider := range s.providers { + if len(provider.IPRangeURLs) > 0 { + networks, err := s.loadPublishedNetworks(ctx, provider) + if err != nil { + s.logger.Printf("bot provider %s: %v", provider.ID, err) + } else if ipMatchesPrefixes(ip, networks) { + if len(provider.UserAgentPrefixes) == 0 || userAgentMatchesPrefixes(userAgents, provider.UserAgentPrefixes) { + method := "published_ranges" + if len(provider.UserAgentPrefixes) > 0 { + method = "user_agent+published_ranges" + } + return &model.BotMatch{ + ProviderID: provider.ID, + Name: provider.Name, + Icon: provider.Icon, + Method: method, + Verified: true, + }, reverseDNSInfo + } + } + } + + if len(provider.ReverseDNSSuffixes) == 0 { + continue + } + info, err := s.lookupReverseDNS(ctx, ip) + if err != nil { + s.logger.Printf("bot provider %s reverse DNS: %v", provider.ID, err) + continue + } + if info == nil { + continue + } + reverseDNSInfo = info + ptr := strings.ToLower(strings.TrimSuffix(info.PTR, ".")) + if ptr == "" || !info.ForwardConfirmed { + continue + } + for _, suffix := range provider.ReverseDNSSuffixes { + if strings.HasSuffix(ptr, suffix) { + return &model.BotMatch{ + ProviderID: provider.ID, + Name: provider.Name, + Icon: provider.Icon, + Method: "reverse_dns+fcrdns", + Verified: true, + }, reverseDNSInfo + } + } + } + return nil, reverseDNSInfo +} + +func (s *Service) loadPublishedNetworks(ctx context.Context, provider botProvider) ([]netip.Prefix, error) { + s.mu.Lock() + entry, found := s.networkCache[provider.ID] + s.mu.Unlock() + if found && time.Since(entry.updatedAt) < provider.CacheTTL { + return append([]netip.Prefix(nil), entry.networks...), nil + } + + networks := make([]netip.Prefix, 0, 64) + errMessages := make([]string, 0, len(provider.IPRangeURLs)) + for _, sourceURL := range provider.IPRangeURLs { + payload, err := s.fetchDocument(ctx, sourceURL) + if err != nil { + errMessages = append(errMessages, err.Error()) + continue + } + parsed, err := parsePublishedNetworks(payload, provider.SourceFormat, sourceURL) + if err != nil { + errMessages = append(errMessages, err.Error()) + continue + } + networks = append(networks, parsed...) + } + if len(networks) == 0 && len(errMessages) > 0 { + return nil, fmt.Errorf("load published ranges for %s: %s", provider.ID, strings.Join(uniqueStrings(errMessages), "; ")) + } + networks = uniquePrefixes(networks) + s.mu.Lock() + s.networkCache[provider.ID] = networkCacheEntry{updatedAt: time.Now().UTC(), networks: append([]netip.Prefix(nil), networks...)} + s.mu.Unlock() + return networks, nil +} + +func parsePublishedNetworks(payload []byte, sourceFormat string, sourceURL string) ([]netip.Prefix, error) { + switch sourceFormat { + case "json_prefixes": + var document struct { + Prefixes []struct { + IPv4Prefix string `json:"ipv4Prefix"` + IPv6Prefix string `json:"ipv6Prefix"` + } `json:"prefixes"` + } + if err := json.Unmarshal(payload, &document); err != nil { + return nil, fmt.Errorf("decode published prefix payload from %s: %w", sourceURL, err) + } + networks := make([]netip.Prefix, 0, len(document.Prefixes)) + for _, entry := range document.Prefixes { + rawPrefix := strings.TrimSpace(entry.IPv4Prefix) + if rawPrefix == "" { + rawPrefix = strings.TrimSpace(entry.IPv6Prefix) + } + if rawPrefix == "" { + continue + } + prefix, err := netip.ParsePrefix(rawPrefix) + if err != nil { + return nil, fmt.Errorf("parse published prefix %q from %s: %w", rawPrefix, sourceURL, err) + } + networks = append(networks, prefix.Masked()) + } + return networks, nil + case "geofeed_csv": + reader := csv.NewReader(strings.NewReader(string(payload))) + rows, err := reader.ReadAll() + if err != nil { + return nil, fmt.Errorf("decode geofeed payload from %s: %w", sourceURL, err) + } + networks := make([]netip.Prefix, 0, len(rows)) + for _, row := range rows { + if len(row) == 0 { + continue + } + candidate := strings.TrimSpace(row[0]) + if candidate == "" || strings.HasPrefix(candidate, "#") { + continue + } + prefix, err := netip.ParsePrefix(candidate) + if err != nil { + return nil, fmt.Errorf("parse geofeed prefix %q from %s: %w", candidate, sourceURL, err) + } + networks = append(networks, prefix.Masked()) + } + return networks, nil + default: + return nil, fmt.Errorf("unsupported source format %q for %s", sourceFormat, sourceURL) + } +} + +func (s *Service) lookupReverseDNS(ctx context.Context, ip netip.Addr) (*model.ReverseDNSInfo, error) { + names, err := s.resolver.LookupAddr(ctx, ip.String()) + if err != nil { + if isDNSNotFound(err) { + return nil, nil + } + return nil, fmt.Errorf("reverse dns lookup for %s: %w", ip, err) + } + if len(names) == 0 { + return nil, nil + } + sort.Strings(names) + ptr := strings.TrimSuffix(strings.TrimSpace(names[0]), ".") + if ptr == "" { + return nil, nil + } + + resolvedIPs, err := s.resolver.LookupIPAddr(ctx, ptr) + if err != nil && !isDNSNotFound(err) { + return &model.ReverseDNSInfo{PTR: ptr, ForwardConfirmed: false}, fmt.Errorf("forward-confirm dns lookup for %s: %w", ptr, err) + } + forwardConfirmed := false + for _, resolved := range resolvedIPs { + addr, ok := netip.AddrFromSlice(resolved.IP) + if ok && addr.Unmap() == ip.Unmap() { + forwardConfirmed = true + break + } + } + return &model.ReverseDNSInfo{PTR: ptr, ForwardConfirmed: forwardConfirmed}, nil +} + +func (s *Service) lookupRegistration(ctx context.Context, ip netip.Addr) (*model.RegistrationInfo, error) { + family := "ipv4" + if ip.Is6() { + family = "ipv6" + } + services, err := s.loadBootstrap(ctx, family) + if err != nil { + return nil, err + } + baseURL := lookupRDAPBaseURL(ip, services) + if baseURL == "" { + return nil, fmt.Errorf("no RDAP service found for %s", ip) + } + requestURL := strings.TrimRight(baseURL, "/") + "/ip/" + url.PathEscape(ip.String()) + payload, err := s.fetchJSONDocument(ctx, requestURL) + if err != nil { + return nil, fmt.Errorf("rdap lookup for %s: %w", ip, err) + } + registration := &model.RegistrationInfo{ + Source: requestURL, + Handle: strings.TrimSpace(asString(payload["handle"])), + Name: strings.TrimSpace(asString(payload["name"])), + Country: strings.TrimSpace(asString(payload["country"])), + Prefix: extractPrefix(payload), + Organization: extractOrganization(payload), + AbuseEmail: extractAbuseEmail(payload["entities"]), + } + if registration.Organization == "" { + registration.Organization = registration.Name + } + if registration.Name == "" && registration.Organization == "" && registration.Handle == "" && registration.Prefix == "" && registration.Country == "" && registration.AbuseEmail == "" { + return nil, nil + } + return registration, nil +} + +func (s *Service) loadBootstrap(ctx context.Context, family string) ([]rdapService, error) { + s.mu.Lock() + entry, found := s.bootstrapCache[family] + s.mu.Unlock() + if found && time.Since(entry.updatedAt) < 24*time.Hour { + return append([]rdapService(nil), entry.services...), nil + } + + bootstrapURL := s.bootstrapURLs[family] + payload, err := s.fetchDocument(ctx, bootstrapURL) + if err != nil { + return nil, fmt.Errorf("fetch %s RDAP bootstrap: %w", family, err) + } + services, err := parseBootstrap(payload, bootstrapURL) + if err != nil { + return nil, err + } + s.mu.Lock() + s.bootstrapCache[family] = bootstrapCacheEntry{updatedAt: time.Now().UTC(), services: append([]rdapService(nil), services...)} + s.mu.Unlock() + return services, nil +} + +func parseBootstrap(payload []byte, sourceURL string) ([]rdapService, error) { + var document struct { + Services [][][]string `json:"services"` + } + if err := json.Unmarshal(payload, &document); err != nil { + return nil, fmt.Errorf("decode RDAP bootstrap from %s: %w", sourceURL, err) + } + services := make([]rdapService, 0, len(document.Services)) + for _, rawService := range document.Services { + if len(rawService) < 2 { + continue + } + prefixes := make([]netip.Prefix, 0, len(rawService[0])) + for _, candidate := range rawService[0] { + prefix, err := netip.ParsePrefix(strings.TrimSpace(candidate)) + if err != nil { + continue + } + prefixes = append(prefixes, prefix.Masked()) + } + if len(prefixes) == 0 || len(rawService[1]) == 0 { + continue + } + services = append(services, rdapService{prefixes: prefixes, urls: append([]string(nil), rawService[1]...)}) + } + if len(services) == 0 { + return nil, fmt.Errorf("empty RDAP bootstrap payload from %s", sourceURL) + } + return services, nil +} + +func lookupRDAPBaseURL(ip netip.Addr, services []rdapService) string { + bestBits := -1 + bestURL := "" + for _, service := range services { + for _, prefix := range service.prefixes { + if prefix.Contains(ip) && prefix.Bits() > bestBits && len(service.urls) > 0 { + bestBits = prefix.Bits() + bestURL = strings.TrimSpace(service.urls[0]) + } + } + } + return bestURL +} + +func (s *Service) lookupSpamhaus(ctx context.Context, ip netip.Addr) (*model.ReputationInfo, error) { + if !isPublicIP(ip) { + return nil, nil + } + lookupName, err := spamhausLookupName(ip) + if err != nil { + return nil, err + } + answers, err := s.resolver.LookupHost(ctx, lookupName) + if err != nil { + if isDNSNotFound(err) { + return &model.ReputationInfo{SpamhausLookup: spamhausLookupZone, SpamhausListed: false}, nil + } + return &model.ReputationInfo{SpamhausLookup: spamhausLookupZone, Error: err.Error()}, nil + } + return &model.ReputationInfo{ + SpamhausLookup: spamhausLookupZone, + SpamhausListed: len(answers) > 0, + SpamhausCodes: uniqueStrings(answers), + }, nil +} + +func (s *Service) fetchDocument(ctx context.Context, requestURL string) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, requestURL, nil) + if err != nil { + return nil, fmt.Errorf("build request for %s: %w", requestURL, err) + } + req.Header.Set("Accept", "application/json, text/plain, */*") + req.Header.Set("User-Agent", s.cfg.UserAgent) + + resp, err := s.client.Do(req) + if err != nil { + return nil, fmt.Errorf("request %s: %w", requestURL, err) + } + defer resp.Body.Close() + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + payload, _ := io.ReadAll(io.LimitReader(resp.Body, 8<<10)) + return nil, fmt.Errorf("request %s returned %s: %s", requestURL, resp.Status, strings.TrimSpace(string(payload))) + } + payload, err := io.ReadAll(io.LimitReader(resp.Body, 4<<20)) + if err != nil { + return nil, fmt.Errorf("read response %s: %w", requestURL, err) + } + return payload, nil +} + +func (s *Service) fetchJSONDocument(ctx context.Context, requestURL string) (map[string]any, error) { + payload, err := s.fetchDocument(ctx, requestURL) + if err != nil { + return nil, err + } + var decoded map[string]any + if err := json.Unmarshal(payload, &decoded); err != nil { + return nil, fmt.Errorf("decode json payload from %s: %w", requestURL, err) + } + return decoded, nil +} + +func defaultBotProviders() []botProvider { + return []botProvider{ + { + ID: "google_official", + Name: "Googlebot", + Icon: "🤖", + SourceFormat: "json_prefixes", + CacheTTL: 24 * time.Hour, + IPRangeURLs: []string{ + "https://developers.google.com/static/crawling/ipranges/common-crawlers.json", + "https://developers.google.com/static/crawling/ipranges/special-crawlers.json", + "https://developers.google.com/static/crawling/ipranges/user-triggered-fetchers-google.json", + }, + }, + { + ID: "bing_official", + Name: "Bingbot", + Icon: "🤖", + SourceFormat: "json_prefixes", + CacheTTL: 24 * time.Hour, + ReverseDNSSuffixes: []string{".search.msn.com"}, + }, + { + ID: "apple_official", + Name: "Applebot", + Icon: "🤖", + SourceFormat: "json_prefixes", + CacheTTL: 24 * time.Hour, + IPRangeURLs: []string{"https://search.developer.apple.com/applebot.json"}, + ReverseDNSSuffixes: []string{".applebot.apple.com"}, + }, + { + ID: "facebook_official", + Name: "Meta crawler", + Icon: "🤖", + SourceFormat: "geofeed_csv", + CacheTTL: 24 * time.Hour, + IPRangeURLs: []string{"https://www.facebook.com/peering/geofeed"}, + UserAgentPrefixes: []string{ + "facebookexternalhit/", + "meta-webindexer/", + "meta-externalads/", + "meta-externalagent/", + "meta-externalfetcher/", + }, + }, + { + ID: "duckduckgo_official", + Name: "DuckDuckBot", + Icon: "🤖", + SourceFormat: "json_prefixes", + CacheTTL: 24 * time.Hour, + IPRangeURLs: []string{"https://duckduckgo.com/duckduckbot.json"}, + }, + } +} + +func ipMatchesPrefixes(ip netip.Addr, prefixes []netip.Prefix) bool { + for _, prefix := range prefixes { + if prefix.Contains(ip) { + return true + } + } + return false +} + +func userAgentMatchesPrefixes(userAgents []string, prefixes []string) bool { + for _, agent := range userAgents { + for _, prefix := range prefixes { + if strings.HasPrefix(agent, prefix) { + return true + } + } + } + return false +} + +func normalizeUserAgents(userAgents []string) []string { + items := make([]string, 0, len(userAgents)) + for _, userAgent := range userAgents { + normalized := strings.ToLower(strings.TrimSpace(userAgent)) + if normalized == "" { + continue + } + items = append(items, normalized) + } + return uniqueStrings(items) +} + +func extractPrefix(payload map[string]any) string { + items, ok := payload["cidr0_cidrs"].([]any) + if !ok { + return "" + } + for _, item := range items { + entry, ok := item.(map[string]any) + if !ok { + continue + } + prefix := strings.TrimSpace(asString(entry["v4prefix"])) + if prefix == "" { + prefix = strings.TrimSpace(asString(entry["v6prefix"])) + } + length := asInt(entry["length"]) + if prefix == "" || length == 0 { + continue + } + return prefix + "/" + strconv.Itoa(length) + } + return "" +} + +func extractOrganization(payload map[string]any) string { + if organization := extractEntityName(payload["entities"]); organization != "" { + return organization + } + return strings.TrimSpace(asString(payload["name"])) +} + +func extractEntityName(value any) string { + entities, ok := value.([]any) + if !ok { + return "" + } + for _, rawEntity := range entities { + entity, ok := rawEntity.(map[string]any) + if !ok { + continue + } + if name := strings.TrimSpace(asString(entity["fn"])); name != "" { + return name + } + if name := extractVCardText(entity["vcardArray"], "fn"); name != "" { + return name + } + if nested := extractEntityName(entity["entities"]); nested != "" { + return nested + } + } + return "" +} + +func extractAbuseEmail(value any) string { + entities, ok := value.([]any) + if !ok { + return "" + } + for _, rawEntity := range entities { + entity, ok := rawEntity.(map[string]any) + if !ok { + continue + } + roles := toStrings(entity["roles"]) + if containsString(roles, "abuse") { + if email := extractVCardText(entity["vcardArray"], "email"); email != "" { + return email + } + } + if nested := extractAbuseEmail(entity["entities"]); nested != "" { + return nested + } + } + return "" +} + +func extractVCardText(value any, field string) string { + items, ok := value.([]any) + if !ok || len(items) < 2 { + return "" + } + rows, ok := items[1].([]any) + if !ok { + return "" + } + for _, rawRow := range rows { + row, ok := rawRow.([]any) + if !ok || len(row) < 4 { + continue + } + name, ok := row[0].(string) + if !ok || name != field { + continue + } + textValue, ok := row[3].(string) + if ok { + return strings.TrimSpace(textValue) + } + } + return "" +} + +func spamhausLookupName(ip netip.Addr) (string, error) { + ip = ip.Unmap() + if ip.Is4() { + bytes := ip.As4() + return fmt.Sprintf("%d.%d.%d.%d.%s", bytes[3], bytes[2], bytes[1], bytes[0], spamhausLookupZone), nil + } + if ip.Is6() { + bytes := ip.As16() + hexString := hex.EncodeToString(bytes[:]) + parts := make([]string, 0, len(hexString)) + for index := len(hexString) - 1; index >= 0; index-- { + parts = append(parts, string(hexString[index])) + } + return strings.Join(parts, ".") + "." + spamhausLookupZone, nil + } + return "", fmt.Errorf("unsupported ip family for %s", ip) +} + +func uniquePrefixes(items []netip.Prefix) []netip.Prefix { + if len(items) == 0 { + return nil + } + seen := make(map[string]struct{}, len(items)) + result := make([]netip.Prefix, 0, len(items)) + for _, item := range items { + key := item.Masked().String() + if _, ok := seen[key]; ok { + continue + } + seen[key] = struct{}{} + result = append(result, item.Masked()) + } + sort.Slice(result, func(left int, right int) bool { + if result[left].Bits() == result[right].Bits() { + return result[left].String() < result[right].String() + } + return result[left].Bits() > result[right].Bits() + }) + return result +} + +func uniqueStrings(items []string) []string { + if len(items) == 0 { + return nil + } + seen := make(map[string]struct{}, len(items)) + result := make([]string, 0, len(items)) + for _, item := range items { + if _, ok := seen[item]; ok { + continue + } + seen[item] = struct{}{} + result = append(result, item) + } + sort.Strings(result) + return result +} + +func containsString(items []string, expected string) bool { + for _, item := range items { + if item == expected { + return true + } + } + return false +} + +func toStrings(value any) []string { + rawItems, ok := value.([]any) + if !ok { + return nil + } + items := make([]string, 0, len(rawItems)) + for _, rawItem := range rawItems { + if text, ok := rawItem.(string); ok { + items = append(items, strings.TrimSpace(text)) + } + } + return items +} + +func asString(value any) string { + text, _ := value.(string) + return text +} + +func asInt(value any) int { + switch current := value.(type) { + case float64: + return int(current) + case float32: + return int(current) + case int: + return current + case int64: + return int(current) + case json.Number: + converted, _ := current.Int64() + return int(converted) + default: + return 0 + } +} + +func isDNSNotFound(err error) bool { + var dnsError *net.DNSError + if errors.As(err, &dnsError) { + return dnsError.IsNotFound + } + return false +} + +func isPublicIP(ip netip.Addr) bool { + ip = ip.Unmap() + if !ip.IsValid() || ip.IsLoopback() || ip.IsMulticast() || ip.IsPrivate() || ip.IsLinkLocalMulticast() || ip.IsLinkLocalUnicast() || ip.IsUnspecified() { + return false + } + return true +} diff --git a/internal/investigation/service_test.go b/internal/investigation/service_test.go new file mode 100644 index 0000000..95469a6 --- /dev/null +++ b/internal/investigation/service_test.go @@ -0,0 +1,229 @@ +package investigation + +import ( + "context" + "log" + "net" + "net/http" + "net/http/httptest" + "net/netip" + "strings" + "sync" + "testing" + "time" + + "git.dern.ovh/infrastructure/caddy-opnsense-blocker/internal/config" +) + +func TestInvestigateRecognizesBotViaPublishedRanges(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/ranges.json" { + http.NotFound(w, r) + return + } + _, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"203.0.113.0/24"}]}`)) + })) + defer server.Close() + + svc := newService( + config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true}, + server.Client(), + &fakeResolver{}, + log.New(testWriter{t}, "", 0), + []botProvider{{ + ID: "google_official", + Name: "Googlebot", + Icon: "🤖", + SourceFormat: "json_prefixes", + CacheTTL: time.Hour, + IPRangeURLs: []string{server.URL + "/ranges.json"}, + }}, + map[string]string{"ipv4": server.URL + "/bootstrap-v4.json", "ipv6": server.URL + "/bootstrap-v6.json"}, + ) + + investigation, err := svc.Investigate(context.Background(), "203.0.113.10", []string{"Mozilla/5.0"}) + if err != nil { + t.Fatalf("investigate ip: %v", err) + } + if investigation.Bot == nil || investigation.Bot.Name != "Googlebot" { + t.Fatalf("expected Googlebot match, got %+v", investigation) + } + if investigation.Registration != nil || investigation.Reputation != nil { + t.Fatalf("expected bot investigation to stop before deep lookups, got %+v", investigation) + } +} + +func TestInvestigateRecognizesBotViaReverseDNS(t *testing.T) { + t.Parallel() + + resolver := &fakeResolver{ + reverse: map[string][]string{"198.51.100.20": {"crawl.search.example.test."}}, + forward: map[string][]net.IPAddr{"crawl.search.example.test": {{IP: net.ParseIP("198.51.100.20")}}}, + } + + svc := newService( + config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true}, + http.DefaultClient, + resolver, + log.New(testWriter{t}, "", 0), + []botProvider{{ + ID: "bing_official", + Name: "Bingbot", + Icon: "🤖", + CacheTTL: time.Hour, + ReverseDNSSuffixes: []string{".search.example.test"}, + }}, + map[string]string{}, + ) + + investigation, err := svc.Investigate(context.Background(), "198.51.100.20", nil) + if err != nil { + t.Fatalf("investigate ip: %v", err) + } + if investigation.Bot == nil || investigation.Bot.Name != "Bingbot" || investigation.ReverseDNS == nil || !investigation.ReverseDNS.ForwardConfirmed { + t.Fatalf("expected reverse DNS bot match, got %+v", investigation) + } +} + +func TestInvestigateLoadsRegistrationAndSpamhausForNonBot(t *testing.T) { + t.Parallel() + + serverURL := "" + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/bootstrap-v4.json": + _, _ = w.Write([]byte(`{"services":[[["198.51.100.0/24"],["` + serverURL + `/rdap/"]]]}`)) + case "/rdap/ip/198.51.100.30": + _, _ = w.Write([]byte(`{ + "handle":"NET-198-51-100-0-1", + "name":"Example Network", + "country":"FR", + "cidr0_cidrs":[{"v4prefix":"198.51.100.0","length":24}], + "entities":[{"roles":["abuse"],"vcardArray":["vcard",[["email",{},"text","abuse@example.test"],["fn",{},"text","Example ISP"]]]}] + }`)) + default: + http.NotFound(w, r) + } + })) + serverURL = server.URL + defer server.Close() + + resolver := &fakeResolver{ + hosts: map[string][]string{spamhausQuery(t, "198.51.100.30"): {"127.0.0.2"}}, + } + + svc := newService( + config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: 2 * time.Second}, UserAgent: "test-agent", SpamhausEnabled: true}, + server.Client(), + resolver, + log.New(testWriter{t}, "", 0), + nil, + map[string]string{"ipv4": server.URL + "/bootstrap-v4.json", "ipv6": server.URL + "/bootstrap-v6.json"}, + ) + + investigation, err := svc.Investigate(context.Background(), "198.51.100.30", []string{"curl/8.0"}) + if err != nil { + t.Fatalf("investigate ip: %v", err) + } + if investigation.Bot != nil { + t.Fatalf("expected no bot match, got %+v", investigation.Bot) + } + if investigation.Registration == nil || investigation.Registration.Organization != "Example ISP" || investigation.Registration.Prefix != "198.51.100.0/24" { + t.Fatalf("unexpected registration info: %+v", investigation.Registration) + } + if investigation.Reputation == nil || !investigation.Reputation.SpamhausListed { + t.Fatalf("expected spamhaus listing, got %+v", investigation.Reputation) + } +} + +func TestPublishedNetworksAreCached(t *testing.T) { + t.Parallel() + + var mu sync.Mutex + requestCount := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + mu.Lock() + requestCount++ + mu.Unlock() + _, _ = w.Write([]byte(`{"prefixes":[{"ipv4Prefix":"203.0.113.0/24"}]}`)) + })) + defer server.Close() + + svc := newService( + config.InvestigationConfig{Enabled: true, Timeout: config.Duration{Duration: time.Second}, UserAgent: "test-agent", SpamhausEnabled: true}, + server.Client(), + &fakeResolver{}, + log.New(testWriter{t}, "", 0), + []botProvider{{ + ID: "provider", + Name: "Provider bot", + Icon: "🤖", + SourceFormat: "json_prefixes", + CacheTTL: time.Hour, + IPRangeURLs: []string{server.URL}, + }}, + map[string]string{}, + ) + + for index := 0; index < 2; index++ { + if _, err := svc.Investigate(context.Background(), "203.0.113.10", nil); err != nil { + t.Fatalf("investigate ip #%d: %v", index, err) + } + } + + mu.Lock() + defer mu.Unlock() + if requestCount != 1 { + t.Fatalf("expected exactly one published-range request, got %d", requestCount) + } +} + +type fakeResolver struct { + reverse map[string][]string + forward map[string][]net.IPAddr + hosts map[string][]string +} + +func (r *fakeResolver) LookupAddr(_ context.Context, addr string) ([]string, error) { + if values, ok := r.reverse[addr]; ok { + return values, nil + } + return nil, &net.DNSError{IsNotFound: true} +} + +func (r *fakeResolver) LookupIPAddr(_ context.Context, host string) ([]net.IPAddr, error) { + if values, ok := r.forward[host]; ok { + return values, nil + } + return nil, &net.DNSError{IsNotFound: true} +} + +func (r *fakeResolver) LookupHost(_ context.Context, host string) ([]string, error) { + if values, ok := r.hosts[host]; ok { + return values, nil + } + return nil, &net.DNSError{IsNotFound: true} +} + +type testWriter struct{ t *testing.T } + +func (w testWriter) Write(payload []byte) (int, error) { + w.t.Helper() + w.t.Log(strings.TrimSpace(string(payload))) + return len(payload), nil +} + +func spamhausQuery(t *testing.T, ip string) string { + t.Helper() + addr, err := netip.ParseAddr(ip) + if err != nil { + t.Fatalf("parse ip: %v", err) + } + query, err := spamhausLookupName(addr) + if err != nil { + t.Fatalf("build spamhaus query: %v", err) + } + return query +} diff --git a/internal/model/types.go b/internal/model/types.go index 6c9f895..5894528 100644 --- a/internal/model/types.go +++ b/internal/model/types.go @@ -113,6 +113,59 @@ type OPNsenseAction struct { CreatedAt time.Time `json:"created_at"` } +type BotMatch struct { + ProviderID string `json:"provider_id"` + Name string `json:"name"` + Icon string `json:"icon"` + Method string `json:"method"` + Verified bool `json:"verified"` +} + +type ReverseDNSInfo struct { + PTR string `json:"ptr"` + ForwardConfirmed bool `json:"forward_confirmed"` +} + +type RegistrationInfo struct { + Source string `json:"source"` + Handle string `json:"handle"` + Name string `json:"name"` + Prefix string `json:"prefix"` + Organization string `json:"organization"` + Country string `json:"country"` + AbuseEmail string `json:"abuse_email"` +} + +type ReputationInfo struct { + SpamhausLookup string `json:"spamhaus_lookup"` + SpamhausListed bool `json:"spamhaus_listed"` + SpamhausCodes []string `json:"spamhaus_codes,omitempty"` + Error string `json:"error,omitempty"` +} + +type IPInvestigation struct { + IP string `json:"ip"` + UpdatedAt time.Time `json:"updated_at"` + Error string `json:"error,omitempty"` + Bot *BotMatch `json:"bot,omitempty"` + ReverseDNS *ReverseDNSInfo `json:"reverse_dns,omitempty"` + Registration *RegistrationInfo `json:"registration,omitempty"` + Reputation *ReputationInfo `json:"reputation,omitempty"` +} + +type OPNsenseStatus struct { + Configured bool `json:"configured"` + Present bool `json:"present"` + CheckedAt time.Time `json:"checked_at,omitempty"` + Error string `json:"error,omitempty"` +} + +type ActionAvailability struct { + CanBlock bool `json:"can_block"` + CanUnblock bool `json:"can_unblock"` + CanClearOverride bool `json:"can_clear_override"` +} + type SourceOffset struct { SourceName string Path string @@ -122,10 +175,13 @@ type SourceOffset struct { } type IPDetails struct { - State IPState `json:"state"` - RecentEvents []Event `json:"recent_events"` - Decisions []DecisionRecord `json:"decisions"` - BackendActions []OPNsenseAction `json:"backend_actions"` + State IPState `json:"state"` + RecentEvents []Event `json:"recent_events"` + Decisions []DecisionRecord `json:"decisions"` + BackendActions []OPNsenseAction `json:"backend_actions"` + Investigation *IPInvestigation `json:"investigation,omitempty"` + OPNsense OPNsenseStatus `json:"opnsense"` + Actions ActionAvailability `json:"actions"` } type Overview struct { diff --git a/internal/service/service.go b/internal/service/service.go index 6b4c753..e434fbc 100644 --- a/internal/service/service.go +++ b/internal/service/service.go @@ -23,23 +23,29 @@ import ( ) type Service struct { - cfg *config.Config - store *store.Store - evaluator *engine.Evaluator - blocker opnsense.AliasClient - logger *log.Logger + cfg *config.Config + store *store.Store + evaluator *engine.Evaluator + blocker opnsense.AliasClient + investigator Investigator + logger *log.Logger } -func New(cfg *config.Config, db *store.Store, blocker opnsense.AliasClient, logger *log.Logger) *Service { +type Investigator interface { + Investigate(ctx context.Context, ip string, userAgents []string) (model.IPInvestigation, error) +} + +func New(cfg *config.Config, db *store.Store, blocker opnsense.AliasClient, investigator Investigator, logger *log.Logger) *Service { if logger == nil { logger = log.New(io.Discard, "", 0) } return &Service{ - cfg: cfg, - store: db, - evaluator: engine.NewEvaluator(), - blocker: blocker, - logger: logger, + cfg: cfg, + store: db, + evaluator: engine.NewEvaluator(), + blocker: blocker, + investigator: investigator, + logger: logger, } } @@ -75,7 +81,40 @@ func (s *Service) GetIPDetails(ctx context.Context, ip string) (model.IPDetails, if err != nil { return model.IPDetails{}, err } - return s.store.GetIPDetails(ctx, normalized, 100, 100, 100) + details, err := s.store.GetIPDetails(ctx, normalized, 0, 100, 100) + if err != nil { + return model.IPDetails{}, err + } + return s.decorateDetails(ctx, details) +} + +func (s *Service) InvestigateIP(ctx context.Context, ip string) (model.IPDetails, error) { + normalized, err := normalizeIP(ip) + if err != nil { + return model.IPDetails{}, err + } + details, err := s.store.GetIPDetails(ctx, normalized, 0, 100, 100) + if err != nil { + return model.IPDetails{}, err + } + if s.investigator != nil { + investigation, found, err := s.store.GetInvestigation(ctx, normalized) + if err != nil { + return model.IPDetails{}, err + } + shouldRefresh := !found || time.Since(investigation.UpdatedAt) >= s.cfg.Investigation.RefreshAfter.Duration + if shouldRefresh { + fresh, err := s.investigator.Investigate(ctx, normalized, collectUserAgents(details.RecentEvents)) + if err != nil { + return model.IPDetails{}, err + } + if err := s.store.SaveInvestigation(ctx, fresh); err != nil { + return model.IPDetails{}, err + } + details.Investigation = &fresh + } + } + return s.decorateDetails(ctx, details) } func (s *Service) ForceBlock(ctx context.Context, ip string, actor string, reason string) error { @@ -410,3 +449,58 @@ func defaultReason(reason string, fallback string) string { } return strings.TrimSpace(reason) } + +func (s *Service) decorateDetails(ctx context.Context, details model.IPDetails) (model.IPDetails, error) { + if details.State.IP != "" && details.Investigation == nil { + investigation, found, err := s.store.GetInvestigation(ctx, details.State.IP) + if err != nil { + return model.IPDetails{}, err + } + if found { + details.Investigation = &investigation + } + } + details.OPNsense = s.resolveOPNsenseStatus(ctx, details.State) + details.Actions = actionAvailability(details.State, details.OPNsense) + return details, nil +} + +func (s *Service) resolveOPNsenseStatus(ctx context.Context, state model.IPState) model.OPNsenseStatus { + status := model.OPNsenseStatus{Configured: s.blocker != nil} + if s.blocker == nil || state.IP == "" { + return status + } + status.CheckedAt = time.Now().UTC() + present, err := s.blocker.IsIPPresent(ctx, state.IP) + if err != nil { + status.Error = err.Error() + return status + } + status.Present = present + return status +} + +func actionAvailability(state model.IPState, backend model.OPNsenseStatus) model.ActionAvailability { + present := false + if backend.Configured && backend.Error == "" { + present = backend.Present + } else { + present = state.State == model.IPStateBlocked || state.ManualOverride == model.ManualOverrideForceBlock + } + return model.ActionAvailability{ + CanBlock: !present, + CanUnblock: present, + CanClearOverride: state.ManualOverride != model.ManualOverrideNone, + } +} + +func collectUserAgents(events []model.Event) []string { + items := make([]string, 0, len(events)) + for _, event := range events { + if strings.TrimSpace(event.UserAgent) == "" { + continue + } + items = append(items, event.UserAgent) + } + return items +} diff --git a/internal/service/service_test.go b/internal/service/service_test.go index e68b9ee..0aeeaa0 100644 --- a/internal/service/service_test.go +++ b/internal/service/service_test.go @@ -87,7 +87,7 @@ sources: t.Fatalf("open store: %v", err) } defer database.Close() - svc := New(cfg, database, opnsense.NewClient(cfg.OPNsense), log.New(os.Stderr, "", 0)) + svc := New(cfg, database, opnsense.NewClient(cfg.OPNsense), nil, log.New(os.Stderr, "", 0)) ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/internal/store/store.go b/internal/store/store.go index 3f83e3f..425c6c5 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -94,6 +94,14 @@ CREATE TABLE IF NOT EXISTS source_offsets ( offset INTEGER NOT NULL, updated_at TEXT NOT NULL ); + +CREATE TABLE IF NOT EXISTS ip_investigations ( + ip TEXT PRIMARY KEY, + payload_json TEXT NOT NULL, + updated_at TEXT NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_ip_investigations_updated_at ON ip_investigations(updated_at DESC, ip ASC); ` type Store struct { @@ -492,6 +500,53 @@ func (s *Store) GetIPDetails(ctx context.Context, ip string, eventLimit, decisio }, nil } +func (s *Store) GetInvestigation(ctx context.Context, ip string) (model.IPInvestigation, bool, error) { + row := s.db.QueryRowContext(ctx, `SELECT payload_json, updated_at FROM ip_investigations WHERE ip = ?`, ip) + var payload string + var updatedAt string + if err := row.Scan(&payload, &updatedAt); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return model.IPInvestigation{}, false, nil + } + return model.IPInvestigation{}, false, fmt.Errorf("query ip investigation %q: %w", ip, err) + } + var item model.IPInvestigation + if err := json.Unmarshal([]byte(payload), &item); err != nil { + return model.IPInvestigation{}, false, fmt.Errorf("decode ip investigation %q: %w", ip, err) + } + parsed, err := parseTime(updatedAt) + if err != nil { + return model.IPInvestigation{}, false, fmt.Errorf("parse ip investigation updated_at: %w", err) + } + item.UpdatedAt = parsed + return item, true, nil +} + +func (s *Store) SaveInvestigation(ctx context.Context, item model.IPInvestigation) error { + if item.UpdatedAt.IsZero() { + item.UpdatedAt = time.Now().UTC() + } + payload, err := json.Marshal(item) + if err != nil { + return fmt.Errorf("encode ip investigation: %w", err) + } + _, err = s.db.ExecContext( + ctx, + `INSERT INTO ip_investigations (ip, payload_json, updated_at) + VALUES (?, ?, ?) + ON CONFLICT(ip) DO UPDATE SET + payload_json = excluded.payload_json, + updated_at = excluded.updated_at`, + item.IP, + string(payload), + formatTime(item.UpdatedAt), + ) + if err != nil { + return fmt.Errorf("upsert ip investigation %q: %w", item.IP, err) + } + return nil +} + func (s *Store) GetSourceOffset(ctx context.Context, sourceName string) (model.SourceOffset, bool, error) { row := s.db.QueryRowContext(ctx, `SELECT source_name, path, inode, offset, updated_at FROM source_offsets WHERE source_name = ?`, sourceName) var offset model.SourceOffset @@ -536,10 +591,7 @@ func (s *Store) SaveSourceOffset(ctx context.Context, offset model.SourceOffset) } func (s *Store) listEventsForIP(ctx context.Context, ip string, limit int) ([]model.Event, error) { - if limit <= 0 { - limit = 50 - } - rows, err := s.db.QueryContext(ctx, ` + query := ` SELECT e.id, e.source_name, e.profile_name, e.occurred_at, e.remote_ip, e.client_ip, e.host, e.method, e.uri, e.path, e.status, e.user_agent, e.decision, e.decision_reason, e.decision_reasons_json, e.enforced, e.raw_json, e.created_at, @@ -547,17 +599,19 @@ func (s *Store) listEventsForIP(ctx context.Context, ip string, limit int) ([]mo FROM events e LEFT JOIN ip_state s ON s.ip = e.client_ip WHERE e.client_ip = ? - ORDER BY e.occurred_at DESC, e.id DESC - LIMIT ?`, - ip, - limit, - ) + ORDER BY e.occurred_at DESC, e.id DESC` + args := []any{ip} + if limit > 0 { + query += ` LIMIT ?` + args = append(args, limit) + } + rows, err := s.db.QueryContext(ctx, query, args...) if err != nil { return nil, fmt.Errorf("list events for ip %q: %w", ip, err) } defer rows.Close() - items := make([]model.Event, 0, limit) + items := make([]model.Event, 0, max(limit, 0)) for rows.Next() { item, err := scanEvent(rows) if err != nil { @@ -651,6 +705,13 @@ func (s *Store) listBackendActionsForIP(ctx context.Context, ip string, limit in return items, nil } +func max(left int, right int) int { + if left > right { + return left + } + return right +} + func getIPStateDB(ctx context.Context, db queryer, ip string) (model.IPState, bool, error) { row := db.QueryRowContext(ctx, ` SELECT ip, first_seen_at, last_seen_at, last_source_name, last_user_agent, latest_status, diff --git a/internal/store/store_test.go b/internal/store/store_test.go index 9c6adac..5a69f2e 100644 --- a/internal/store/store_test.go +++ b/internal/store/store_test.go @@ -113,4 +113,20 @@ func TestStoreRecordsEventsAndState(t *testing.T) { if len(details.RecentEvents) != 1 || len(details.Decisions) != 1 || len(details.BackendActions) != 1 { t.Fatalf("unexpected ip details: %+v", details) } + + investigation := model.IPInvestigation{ + IP: event.ClientIP, + UpdatedAt: occurredAt, + Bot: &model.BotMatch{Name: "Googlebot", ProviderID: "google_official", Icon: "🤖", Method: "published_ranges", Verified: true}, + } + if err := db.SaveInvestigation(ctx, investigation); err != nil { + t.Fatalf("save investigation: %v", err) + } + loadedInvestigation, found, err := db.GetInvestigation(ctx, event.ClientIP) + if err != nil { + t.Fatalf("get investigation: %v", err) + } + if !found || loadedInvestigation.Bot == nil || loadedInvestigation.Bot.Name != "Googlebot" { + t.Fatalf("unexpected investigation payload: found=%v investigation=%+v", found, loadedInvestigation) + } } diff --git a/internal/web/handler.go b/internal/web/handler.go index c3f3d51..3cbc23f 100644 --- a/internal/web/handler.go +++ b/internal/web/handler.go @@ -21,6 +21,7 @@ type App interface { ListEvents(ctx context.Context, limit int) ([]model.Event, error) ListIPs(ctx context.Context, limit int, state string) ([]model.IPState, error) GetIPDetails(ctx context.Context, ip string) (model.IPDetails, error) + InvestigateIP(ctx context.Context, ip string) (model.IPDetails, error) ForceBlock(ctx context.Context, ip string, actor string, reason string) error ForceAllow(ctx context.Context, ip string, actor string, reason string) error ClearOverride(ctx context.Context, ip string, actor string, reason string) error @@ -165,6 +166,16 @@ func (h *handler) handleAPIIP(w http.ResponseWriter, r *http.Request) { methodNotAllowed(w) return } + if action == "investigate" { + details, err := h.app.InvestigateIP(r.Context(), ip) + if err != nil { + writeError(w, http.StatusInternalServerError, err) + return + } + writeJSON(w, http.StatusOK, details) + return + } + payload, err := decodeActionPayload(r) if err != nil { writeError(w, http.StatusBadRequest, err) @@ -175,7 +186,7 @@ func (h *handler) handleAPIIP(w http.ResponseWriter, r *http.Request) { err = h.app.ForceBlock(r.Context(), ip, payload.Actor, payload.Reason) case "unblock": err = h.app.ForceAllow(r.Context(), ip, payload.Actor, payload.Reason) - case "reset": + case "clear-override", "reset": err = h.app.ClearOverride(r.Context(), ip, payload.Actor, payload.Reason) default: http.NotFound(w, r) @@ -199,26 +210,50 @@ func decodeActionPayload(r *http.Request) (actionPayload, error) { if r.ContentLength == 0 { return payload, nil } - decoder := json.NewDecoder(io.LimitReader(r.Body, 1<<20)) - if err := decoder.Decode(&payload); err != nil { - if errors.Is(err, io.EOF) { - return payload, nil - } - return actionPayload{}, fmt.Errorf("decode request body: %w", err) + limited := io.LimitReader(r.Body, 1<<20) + if err := json.NewDecoder(limited).Decode(&payload); err != nil { + return actionPayload{}, fmt.Errorf("decode action payload: %w", err) } return payload, nil } +func queryLimit(r *http.Request, fallback int) int { + if fallback <= 0 { + fallback = 50 + } + value := strings.TrimSpace(r.URL.Query().Get("limit")) + if value == "" { + return fallback + } + parsed, err := strconv.Atoi(value) + if err != nil || parsed <= 0 { + return fallback + } + if parsed > 1000 { + return 1000 + } + return parsed +} + +func writeJSON(w http.ResponseWriter, status int, payload any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(payload) +} + +func writeError(w http.ResponseWriter, status int, err error) { + writeJSON(w, status, map[string]any{"error": err.Error()}) +} + func extractPathValue(path string, prefix string) (string, bool) { if !strings.HasPrefix(path, prefix) { return "", false } - rest := strings.TrimPrefix(path, prefix) - rest = strings.Trim(rest, "/") - if rest == "" { + value := strings.Trim(strings.TrimPrefix(path, prefix), "/") + if value == "" { return "", false } - decoded, err := url.PathUnescape(rest) + decoded, err := url.PathUnescape(value) if err != nil { return "", false } @@ -229,48 +264,19 @@ func extractAPIPath(path string) (ip string, action string, ok bool) { if !strings.HasPrefix(path, "/api/ips/") { return "", "", false } - rest := strings.TrimPrefix(path, "/api/ips/") - rest = strings.Trim(rest, "/") + rest := strings.Trim(strings.TrimPrefix(path, "/api/ips/"), "/") if rest == "" { return "", "", false } parts := strings.Split(rest, "/") - decoded, err := url.PathUnescape(parts[0]) + decodedIP, err := url.PathUnescape(parts[0]) if err != nil { return "", "", false } if len(parts) == 1 { - return decoded, "", true + return decodedIP, "", true } - if len(parts) == 2 { - return decoded, parts[1], true - } - return "", "", false -} - -func queryLimit(r *http.Request, fallback int) int { - value := strings.TrimSpace(r.URL.Query().Get("limit")) - if value == "" { - return fallback - } - parsed, err := strconv.Atoi(value) - if err != nil || parsed <= 0 { - return fallback - } - if parsed > 500 { - return 500 - } - return parsed -} - -func writeJSON(w http.ResponseWriter, status int, payload any) { - w.Header().Set("Content-Type", "application/json; charset=utf-8") - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(payload) -} - -func writeError(w http.ResponseWriter, status int, err error) { - writeJSON(w, status, map[string]any{"error": err.Error()}) + return decodedIP, parts[1], true } func methodNotAllowed(w http.ResponseWriter) { @@ -309,10 +315,6 @@ const overviewHTML = ` .status.review { background: #78350f; } .status.allowed { background: #14532d; } .status.observed { background: #1e293b; } - .actions { display: flex; gap: .35rem; flex-wrap: wrap; } - button { background: #2563eb; color: white; border: 0; border-radius: .45rem; padding: .35rem .6rem; cursor: pointer; } - button.secondary { background: #475569; } - button.danger { background: #dc2626; } .muted { color: #94a3b8; } .mono { font-family: ui-monospace, monospace; } .panel { background: #111827; border: 1px solid #334155; border-radius: .75rem; padding: 1rem; overflow: auto; } @@ -329,13 +331,13 @@ const overviewHTML = `

Recent IPs

- +
IPStateOverrideEventsLast seenReasonActions
IPStateOverrideEventsLast seenReason
-

Recent Events

+

Recent events

@@ -349,18 +351,11 @@ const overviewHTML = ` return String(value ?? '').replace(/[&<>"']/g, character => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[character])); } - async function sendAction(ip, action) { - const reason = window.prompt('Optional reason', ''); - const response = await fetch('/api/ips/' + encodeURIComponent(ip) + '/' + action, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ reason, actor: 'web-ui' }), - }); - if (!response.ok) { - const payload = await response.json().catch(() => ({ error: response.statusText })); - window.alert(payload.error || 'Request failed'); + function formatDate(value) { + if (!value) { + return '—'; } - await refresh(); + return new Date(value).toLocaleString(); } function renderStats(data) { @@ -387,15 +382,8 @@ const overviewHTML = ` ' ', ' ', ' ', - ' ', + ' ', ' ', - ' ', '' ].join('')).join(''); } @@ -403,7 +391,7 @@ const overviewHTML = ` function renderEvents(items) { document.getElementById('events-body').innerHTML = items.map(item => [ '', - ' ', + ' ', ' ', ' ', ' ', @@ -437,24 +425,29 @@ const ipDetailsHTML = `{{ .Title }} @@ -465,18 +458,19 @@ const ipDetailsHTML = `

State

-
-
- - - -
+
+
+
Clear override removes the local manual override only. It does not change the current OPNsense alias entry.
-

Recent events

+

Investigation

+
+
+
+

Requests from this IP

TimeSourceIPHostMethodPathStatusDecision
' + escapeHtml(item.state) + '' + escapeHtml(item.manual_override) + '' + escapeHtml(item.total_events) + '' + escapeHtml(new Date(item.last_seen_at).toLocaleString()) + '' + escapeHtml(formatDate(item.last_seen_at)) + '' + escapeHtml(item.state_reason) + '', - '
', - ' ', - ' ', - ' ', - '
', - '
' + escapeHtml(new Date(item.occurred_at).toLocaleString()) + '' + escapeHtml(formatDate(item.occurred_at)) + '' + escapeHtml(item.source_name) + '' + escapeHtml(item.client_ip) + '' + escapeHtml(item.host) + '
- +
TimeSourceMethodPathStatusDecision
TimeSourceHostMethodURIStatusDecisionUser agent
@@ -507,8 +501,18 @@ const ipDetailsHTML = ` return String(value ?? '').replace(/[&<>"']/g, character => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[character])); } - async function sendAction(action) { - const reason = window.prompt('Optional reason', ''); + function formatDate(value) { + if (!value) { + return '—'; + } + return new Date(value).toLocaleString(); + } + + async function sendAction(action, promptLabel) { + const reason = window.prompt(promptLabel, ''); + if (reason === null) { + return; + } const response = await fetch('/api/ips/' + encodeURIComponent(ip) + '/' + action, { method: 'POST', headers: { 'Content-Type': 'application/json' }, @@ -517,67 +521,148 @@ const ipDetailsHTML = ` if (!response.ok) { const payload = await response.json().catch(() => ({ error: response.statusText })); window.alert(payload.error || 'Request failed'); + return; } - await refresh(); + const data = await response.json(); + renderAll(data); } - function renderState(state) { + async function investigate() { + document.getElementById('investigation').innerHTML = '
Refreshing investigation…
'; + const response = await fetch('/api/ips/' + encodeURIComponent(ip) + '/investigate', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ actor: 'web-ui' }), + }); + if (!response.ok) { + const payload = await response.json().catch(() => ({ error: response.statusText })); + document.getElementById('investigation').innerHTML = '
' + escapeHtml(payload.error || 'Investigation failed') + '
'; + return; + } + const data = await response.json(); + renderAll(data); + } + + function renderState(data) { + const state = data.state || {}; + const opnsense = data.opnsense || {}; + const opnsenseLabel = opnsense.configured ? (opnsense.error ? ('unknown (' + opnsense.error + ')') : (opnsense.present ? 'blocked' : 'not blocked')) : 'disabled'; document.getElementById('state').innerHTML = [ '
State: ' + escapeHtml(state.state) + '
', '
Override: ' + escapeHtml(state.manual_override) + '
', '
Total events: ' + escapeHtml(state.total_events) + '
', - '
Last seen: ' + escapeHtml(new Date(state.last_seen_at).toLocaleString()) + '
', - '
Reason: ' + escapeHtml(state.state_reason) + '
' + '
Last seen: ' + escapeHtml(formatDate(state.last_seen_at)) + '
', + '
Reason: ' + escapeHtml(state.state_reason) + '
', + '
OPNsense alias: ' + escapeHtml(opnsenseLabel) + '
' ].join(''); } + function renderActions(data) { + const actions = data.actions || {}; + const buttons = []; + if (actions.can_unblock) { + buttons.push(''); + } else if (actions.can_block) { + buttons.push(''); + } + if (actions.can_clear_override) { + buttons.push(''); + } + buttons.push(''); + document.getElementById('actions').innerHTML = buttons.join(''); + } + + function renderInvestigation(investigation) { + if (!investigation) { + document.getElementById('investigation').innerHTML = '
No cached investigation yet.
'; + return; + } + const rows = []; + if (investigation.bot) { + rows.push('
Bot: ' + escapeHtml(investigation.bot.icon || '🤖') + ' ' + escapeHtml(investigation.bot.name) + ' via ' + escapeHtml(investigation.bot.method) + '
'); + } else { + rows.push('
Bot: no verified bot match
'); + } + if (investigation.reverse_dns) { + rows.push('
Reverse DNS: ' + escapeHtml(investigation.reverse_dns.ptr || '—') + '' + (investigation.reverse_dns.forward_confirmed ? ' · forward-confirmed' : '') + '
'); + } + if (investigation.registration) { + rows.push('
Registration: ' + escapeHtml(investigation.registration.organization || investigation.registration.name || '—') + '
'); + rows.push('
Prefix: ' + escapeHtml(investigation.registration.prefix || '—') + '
'); + rows.push('
Country: ' + escapeHtml(investigation.registration.country || '—') + '
'); + rows.push('
Abuse contact: ' + escapeHtml(investigation.registration.abuse_email || '—') + '
'); + } + if (investigation.reputation) { + const label = investigation.reputation.spamhaus_listed ? ('listed (' + (investigation.reputation.spamhaus_codes || []).join(', ') + ')') : 'not listed'; + rows.push('
Spamhaus: ' + escapeHtml(label) + '
'); + if (investigation.reputation.error) { + rows.push('
Spamhaus error: ' + escapeHtml(investigation.reputation.error) + '
'); + } + } + rows.push('
Updated: ' + escapeHtml(formatDate(investigation.updated_at)) + '
'); + if (investigation.error) { + rows.push('
Lookup warning: ' + escapeHtml(investigation.error) + '
'); + } + document.getElementById('investigation').innerHTML = rows.join(''); + } + function renderEvents(items) { - document.getElementById('events-body').innerHTML = items.map(item => [ + const rows = items.map(item => [ '', - ' ' + escapeHtml(new Date(item.occurred_at).toLocaleString()) + '', + ' ' + escapeHtml(formatDate(item.occurred_at)) + '', ' ' + escapeHtml(item.source_name) + '', + ' ' + escapeHtml(item.host) + '', ' ' + escapeHtml(item.method) + '', - ' ' + escapeHtml(item.path) + '', + ' ' + escapeHtml(item.uri || item.path) + '', ' ' + escapeHtml(item.status) + '', ' ' + escapeHtml(item.decision) + (item.enforced ? ' · enforced' : '') + '', + ' ' + escapeHtml(item.user_agent || '—') + '', '' - ].join('')).join(''); + ].join('')); + document.getElementById('events-body').innerHTML = rows.length > 0 ? rows.join('') : 'No requests stored for this IP yet.'; } function renderDecisions(items) { - document.getElementById('decisions-body').innerHTML = items.map(item => [ + const rows = items.map(item => [ '', - ' ' + escapeHtml(new Date(item.created_at).toLocaleString()) + '', + ' ' + escapeHtml(formatDate(item.created_at)) + '', ' ' + escapeHtml(item.kind) + '', ' ' + escapeHtml(item.action) + '', ' ' + escapeHtml(item.reason) + '', ' ' + escapeHtml(item.actor) + '', '' - ].join('')).join(''); + ].join('')); + document.getElementById('decisions-body').innerHTML = rows.length > 0 ? rows.join('') : 'No decisions recorded for this IP yet.'; } function renderBackend(items) { - document.getElementById('backend-body').innerHTML = items.map(item => [ + const rows = items.map(item => [ '', - ' ' + escapeHtml(new Date(item.created_at).toLocaleString()) + '', + ' ' + escapeHtml(formatDate(item.created_at)) + '', ' ' + escapeHtml(item.action) + '', ' ' + escapeHtml(item.result) + '', ' ' + escapeHtml(item.message) + '', '' - ].join('')).join(''); + ].join('')); + document.getElementById('backend-body').innerHTML = rows.length > 0 ? rows.join('') : 'No backend actions recorded for this IP yet.'; + } + + function renderAll(data) { + renderState(data || {}); + renderActions(data || {}); + renderInvestigation((data || {}).investigation || null); + renderEvents((data || {}).recent_events || []); + renderDecisions((data || {}).decisions || []); + renderBackend((data || {}).backend_actions || []); } async function refresh() { const response = await fetch('/api/ips/' + encodeURIComponent(ip)); const data = await response.json(); - renderState(data.state || {}); - renderEvents(data.recent_events || []); - renderDecisions(data.decisions || []); - renderBackend(data.backend_actions || []); + renderAll(data); } - refresh(); - setInterval(refresh, 2000); + refresh().then(() => investigate()); ` diff --git a/internal/web/handler_test.go b/internal/web/handler_test.go index 2db442e..99bc9a5 100644 --- a/internal/web/handler_test.go +++ b/internal/web/handler_test.go @@ -43,6 +43,14 @@ func TestHandlerServesOverviewAndManualActions(t *testing.T) { t.Fatalf("unexpected recorded action: %q", app.lastAction) } + recorder = httptest.NewRecorder() + request = httptest.NewRequest(http.MethodPost, "/api/ips/203.0.113.10/investigate", strings.NewReader(`{"actor":"tester"}`)) + request.Header.Set("Content-Type", "application/json") + handler.ServeHTTP(recorder, request) + if recorder.Code != http.StatusOK { + t.Fatalf("unexpected investigate status: %d body=%s", recorder.Code, recorder.Body.String()) + } + recorder = httptest.NewRecorder() request = httptest.NewRequest(http.MethodGet, "/", nil) handler.ServeHTTP(recorder, request) @@ -105,9 +113,16 @@ func (s *stubApp) GetIPDetails(context.Context, string) (model.IPDetails, error) RecentEvents: []model.Event{{ID: 1, ClientIP: "203.0.113.10", OccurredAt: now, Decision: model.DecisionActionBlock}}, Decisions: []model.DecisionRecord{{ID: 1, IP: "203.0.113.10", Action: model.DecisionActionBlock, CreatedAt: now}}, BackendActions: []model.OPNsenseAction{{ID: 1, IP: "203.0.113.10", Action: "block", Result: "added", CreatedAt: now}}, + OPNsense: model.OPNsenseStatus{Configured: true, Present: true, CheckedAt: now}, + Actions: model.ActionAvailability{CanUnblock: true}, + Investigation: &model.IPInvestigation{IP: "203.0.113.10", UpdatedAt: now}, }, nil } +func (s *stubApp) InvestigateIP(context.Context, string) (model.IPDetails, error) { + return s.GetIPDetails(context.Background(), "203.0.113.10") +} + func (s *stubApp) ForceBlock(_ context.Context, ip string, actor string, reason string) error { s.lastAction = "block:" + ip + ":" + actor + ":" + reason return nil