implement media_alert filtering support.

This commit is contained in:
fiatjaf
2025-02-24 19:54:50 -03:00
parent 0f2aba5cb3
commit b615082c60
6 changed files with 169 additions and 4 deletions

View File

@@ -1,6 +1,7 @@
package main package main
import ( import (
"context"
"fmt" "fmt"
"regexp" "regexp"
"slices" "slices"
@@ -24,6 +25,44 @@ func hasProhibitedWordOrTag(event *nostr.Event) bool {
return pornWordsRe.MatchString(event.Content) return pornWordsRe.MatchString(event.Content)
} }
// hasExplicitMedia checks if the event contains explicit media content
// by examining image/video URLs in the content and checking them against the media alert API
func hasExplicitMedia(ctx context.Context, event *nostr.Event) bool {
// extract image and video URLs from content
var mediaURLs []string
// find image URLs
imgMatches := imageExtensionMatcher.FindAllStringSubmatch(event.Content, -1)
for _, match := range imgMatches {
if len(match) > 0 {
mediaURLs = append(mediaURLs, match[0])
}
}
// find video URLs
vidMatches := videoExtensionMatcher.FindAllStringSubmatch(event.Content, -1)
for _, match := range vidMatches {
if len(match) > 0 {
mediaURLs = append(mediaURLs, match[0])
}
}
// check each URL for explicit content
for _, mediaURL := range mediaURLs {
isExplicit, err := isExplicitContent(ctx, mediaURL)
if err != nil {
log.Warn().Err(err).Str("url", mediaURL).Msg("failed to check media content")
continue
}
if isExplicit {
return true
}
}
return false
}
// list copied from https://jsr.io/@gleasonator/policy/0.2.0/data/porntags.json // list copied from https://jsr.io/@gleasonator/policy/0.2.0/data/porntags.json
var pornTags = []string{ var pornTags = []string{
"adult", "adult",

View File

@@ -29,6 +29,7 @@ type Settings struct {
TailwindDebug bool `envconfig:"TAILWIND_DEBUG"` TailwindDebug bool `envconfig:"TAILWIND_DEBUG"`
RelayConfigPath string `envconfig:"RELAY_CONFIG_PATH"` RelayConfigPath string `envconfig:"RELAY_CONFIG_PATH"`
TrustedPubKeys []string `envconfig:"TRUSTED_PUBKEYS"` TrustedPubKeys []string `envconfig:"TRUSTED_PUBKEYS"`
MediaAlertAPIKey string `envconfig:"MEDIA_ALERT_API_KEY"`
} }
//go:embed static/* //go:embed static/*

117
media_alert.go Normal file
View File

@@ -0,0 +1,117 @@
package main
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"time"
"github.com/dgraph-io/ristretto"
)
var mediaAlertCache, _ = ristretto.NewCache(&ristretto.Config[string, bool]{
NumCounters: 1e6, // number of keys to track frequency of (1M)
MaxCost: 1 << 24, // maximum cost of cache (64MB)
BufferItems: 64, // number of keys per Get buffer
})
type mediaAlertResponse struct {
Message string `json:"message"`
Score float64 `json:"score"`
}
// isExplicitContent checks if the provided URL contains explicit content
// it returns true if the content is explicit, false otherwise
// the function handles caching and retries for timeout errors
func isExplicitContent(ctx context.Context, mediaURL string) (bool, error) {
// check cache first
if val, found := mediaAlertCache.Get(mediaURL); found {
return val, nil
}
// make the API request
isExplicit, err := checkMediaAlert(ctx, mediaURL, false)
if err != nil {
return false, err
}
// store result in cache
mediaAlertCache.SetWithTTL(mediaURL, isExplicit, 1, 24*time.Hour)
return isExplicit, nil
}
// checkMediaAlert makes the actual API request to the Media Alert service
// if retry is true, this is a retry attempt after a timeout
func checkMediaAlert(ctx context.Context, mediaURL string, retry bool) (bool, error) {
if s.MediaAlertAPIKey == "" {
return false, nil // skip check if no API key is configured
}
ctx, cancel := context.WithTimeout(ctx, time.Second*30)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", "https://nostr-media-alert.com/score?"+url.Values{
"key": {s.MediaAlertAPIKey},
"url": {mediaURL},
}.Encode(), nil)
if err != nil {
return false, fmt.Errorf("failed to create request: %w", err)
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return false, fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
msg, _ := io.ReadAll(resp.Body)
return false, fmt.Errorf("got unexpected response %d: %s", resp.StatusCode, string(msg))
}
var result mediaAlertResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return false, fmt.Errorf("failed to decode response: %w", err)
}
// handle different response types
switch result.Message {
case "SUCCESS":
return result.Score >= 0.90, nil
case "TIMEOUT":
if retry {
// if this is already a retry, don't retry again
return false, nil
}
// handle timeout by retrying after delay
go func() {
// create a new context with timeout for the retry
retryCtx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
// wait before retrying
time.Sleep(20 * time.Second)
// retry the request
isExplicit, err := checkMediaAlert(retryCtx, mediaURL, true)
if err == nil {
// update cache with the result (the expensive stuff we store for longer )
mediaAlertCache.SetWithTTL(mediaURL, isExplicit, 1, time.Hour*72)
}
}()
return false, nil
case "RATE LIMITED":
log.Warn().Str("url", mediaURL).Msg("media alert API rate limited")
return false, nil
case "INVALID MEDIA":
log.Debug().Str("url", mediaURL).Msg("invalid media for content check")
return false, nil
default:
return false, fmt.Errorf("unknown response message: %s", result.Message)
}
}

View File

@@ -96,8 +96,10 @@ func renderEvent(w http.ResponseWriter, r *http.Request) {
return return
} }
hasURL := urlRegex.MatchString(data.event.Content) hasURL := urlRegex.MatchString(data.event.Content)
if isMaliciousBridged(data.event.author) || (hasURL && hasProhibitedWordOrTag(data.event.Event)) { if isMaliciousBridged(data.event.author) ||
log.Warn().Str("event", data.nevent).Msg("detect prohibited porn content") (hasURL && hasProhibitedWordOrTag(data.event.Event)) ||
(hasURL && hasExplicitMedia(ctx, data.event.Event)) {
log.Warn().Str("event", data.nevent).Msg("detect prohibited content")
http.Error(w, "event is not allowed", http.StatusNotFound) http.Error(w, "event is not allowed", http.StatusNotFound)
return return
} }

View File

@@ -71,7 +71,9 @@ func renderImage(w http.ResponseWriter, r *http.Request) {
return return
} }
hasURL := urlRegex.MatchString(data.event.Content) hasURL := urlRegex.MatchString(data.event.Content)
if isMaliciousBridged(data.event.author) || (hasURL && hasProhibitedWordOrTag(data.event.Event)) { if isMaliciousBridged(data.event.author) ||
(hasURL && hasProhibitedWordOrTag(data.event.Event)) ||
(hasURL && hasExplicitMedia(ctx, data.event.Event)) {
http.Error(w, "event is not allowed", http.StatusNotFound) http.Error(w, "event is not allowed", http.StatusNotFound)
return return
} }

View File

@@ -44,7 +44,11 @@ func renderProfile(ctx context.Context, r *http.Request, w http.ResponseWriter,
return return
} }
if isMaliciousBridged(profile) { if isMaliciousBridged(profile) {
http.Error(w, "event is not allowed", http.StatusNotFound) http.Error(w, "profile is malicious", http.StatusNotFound)
return
}
if is, _ := isExplicitContent(ctx, profile.Picture); is {
http.Error(w, "profile is not allowed", http.StatusNotFound)
return return
} }