diff --git a/content_filtering.go b/content_filtering.go index c2c5861..0269bd1 100644 --- a/content_filtering.go +++ b/content_filtering.go @@ -1,6 +1,7 @@ package main import ( + "context" "fmt" "regexp" "slices" @@ -24,6 +25,44 @@ func hasProhibitedWordOrTag(event *nostr.Event) bool { return pornWordsRe.MatchString(event.Content) } +// hasExplicitMedia checks if the event contains explicit media content +// by examining image/video URLs in the content and checking them against the media alert API +func hasExplicitMedia(ctx context.Context, event *nostr.Event) bool { + // extract image and video URLs from content + var mediaURLs []string + + // find image URLs + imgMatches := imageExtensionMatcher.FindAllStringSubmatch(event.Content, -1) + for _, match := range imgMatches { + if len(match) > 0 { + mediaURLs = append(mediaURLs, match[0]) + } + } + + // find video URLs + vidMatches := videoExtensionMatcher.FindAllStringSubmatch(event.Content, -1) + for _, match := range vidMatches { + if len(match) > 0 { + mediaURLs = append(mediaURLs, match[0]) + } + } + + // check each URL for explicit content + for _, mediaURL := range mediaURLs { + isExplicit, err := isExplicitContent(ctx, mediaURL) + if err != nil { + log.Warn().Err(err).Str("url", mediaURL).Msg("failed to check media content") + continue + } + + if isExplicit { + return true + } + } + + return false +} + // list copied from https://jsr.io/@gleasonator/policy/0.2.0/data/porntags.json var pornTags = []string{ "adult", diff --git a/main.go b/main.go index 76d9a0f..02b8e2f 100644 --- a/main.go +++ b/main.go @@ -29,6 +29,7 @@ type Settings struct { TailwindDebug bool `envconfig:"TAILWIND_DEBUG"` RelayConfigPath string `envconfig:"RELAY_CONFIG_PATH"` TrustedPubKeys []string `envconfig:"TRUSTED_PUBKEYS"` + MediaAlertAPIKey string `envconfig:"MEDIA_ALERT_API_KEY"` } //go:embed static/* diff --git a/media_alert.go b/media_alert.go new file mode 100644 index 0000000..2210579 --- /dev/null +++ b/media_alert.go @@ -0,0 +1,117 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "github.com/dgraph-io/ristretto" +) + +var mediaAlertCache, _ = ristretto.NewCache(&ristretto.Config[string, bool]{ + NumCounters: 1e6, // number of keys to track frequency of (1M) + MaxCost: 1 << 24, // maximum cost of cache (64MB) + BufferItems: 64, // number of keys per Get buffer +}) + +type mediaAlertResponse struct { + Message string `json:"message"` + Score float64 `json:"score"` +} + +// isExplicitContent checks if the provided URL contains explicit content +// it returns true if the content is explicit, false otherwise +// the function handles caching and retries for timeout errors +func isExplicitContent(ctx context.Context, mediaURL string) (bool, error) { + // check cache first + if val, found := mediaAlertCache.Get(mediaURL); found { + return val, nil + } + + // make the API request + isExplicit, err := checkMediaAlert(ctx, mediaURL, false) + if err != nil { + return false, err + } + + // store result in cache + mediaAlertCache.SetWithTTL(mediaURL, isExplicit, 1, 24*time.Hour) + + return isExplicit, nil +} + +// checkMediaAlert makes the actual API request to the Media Alert service +// if retry is true, this is a retry attempt after a timeout +func checkMediaAlert(ctx context.Context, mediaURL string, retry bool) (bool, error) { + if s.MediaAlertAPIKey == "" { + return false, nil // skip check if no API key is configured + } + + ctx, cancel := context.WithTimeout(ctx, time.Second*30) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, "GET", "https://nostr-media-alert.com/score?"+url.Values{ + "key": {s.MediaAlertAPIKey}, + "url": {mediaURL}, + }.Encode(), nil) + if err != nil { + return false, fmt.Errorf("failed to create request: %w", err) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return false, fmt.Errorf("request failed: %w", err) + } + defer resp.Body.Close() + if resp.StatusCode >= 300 { + msg, _ := io.ReadAll(resp.Body) + return false, fmt.Errorf("got unexpected response %d: %s", resp.StatusCode, string(msg)) + } + + var result mediaAlertResponse + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return false, fmt.Errorf("failed to decode response: %w", err) + } + + // handle different response types + switch result.Message { + case "SUCCESS": + return result.Score >= 0.90, nil + case "TIMEOUT": + if retry { + // if this is already a retry, don't retry again + return false, nil + } + + // handle timeout by retrying after delay + go func() { + // create a new context with timeout for the retry + retryCtx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + // wait before retrying + time.Sleep(20 * time.Second) + + // retry the request + isExplicit, err := checkMediaAlert(retryCtx, mediaURL, true) + if err == nil { + // update cache with the result (the expensive stuff we store for longer ) + mediaAlertCache.SetWithTTL(mediaURL, isExplicit, 1, time.Hour*72) + } + }() + + return false, nil + case "RATE LIMITED": + log.Warn().Str("url", mediaURL).Msg("media alert API rate limited") + return false, nil + case "INVALID MEDIA": + log.Debug().Str("url", mediaURL).Msg("invalid media for content check") + return false, nil + default: + return false, fmt.Errorf("unknown response message: %s", result.Message) + } +} diff --git a/render_event.go b/render_event.go index 61c50fd..234482a 100644 --- a/render_event.go +++ b/render_event.go @@ -96,8 +96,10 @@ func renderEvent(w http.ResponseWriter, r *http.Request) { return } hasURL := urlRegex.MatchString(data.event.Content) - if isMaliciousBridged(data.event.author) || (hasURL && hasProhibitedWordOrTag(data.event.Event)) { - log.Warn().Str("event", data.nevent).Msg("detect prohibited porn content") + if isMaliciousBridged(data.event.author) || + (hasURL && hasProhibitedWordOrTag(data.event.Event)) || + (hasURL && hasExplicitMedia(ctx, data.event.Event)) { + log.Warn().Str("event", data.nevent).Msg("detect prohibited content") http.Error(w, "event is not allowed", http.StatusNotFound) return } diff --git a/render_image.go b/render_image.go index d727b26..0c7e3dd 100644 --- a/render_image.go +++ b/render_image.go @@ -71,7 +71,9 @@ func renderImage(w http.ResponseWriter, r *http.Request) { return } hasURL := urlRegex.MatchString(data.event.Content) - if isMaliciousBridged(data.event.author) || (hasURL && hasProhibitedWordOrTag(data.event.Event)) { + if isMaliciousBridged(data.event.author) || + (hasURL && hasProhibitedWordOrTag(data.event.Event)) || + (hasURL && hasExplicitMedia(ctx, data.event.Event)) { http.Error(w, "event is not allowed", http.StatusNotFound) return } diff --git a/render_profile.go b/render_profile.go index 14a6dbe..c64cd8b 100644 --- a/render_profile.go +++ b/render_profile.go @@ -44,7 +44,11 @@ func renderProfile(ctx context.Context, r *http.Request, w http.ResponseWriter, return } if isMaliciousBridged(profile) { - http.Error(w, "event is not allowed", http.StatusNotFound) + http.Error(w, "profile is malicious", http.StatusNotFound) + return + } + if is, _ := isExplicitContent(ctx, profile.Picture); is { + http.Error(w, "profile is not allowed", http.StatusNotFound) return }