refactored event store

This commit is contained in:
pippellia-btc
2025-06-06 17:01:29 +02:00
parent 56a0ed88dc
commit c35711710f
8 changed files with 213 additions and 63 deletions

5
.gitignore vendored
View File

@@ -4,5 +4,10 @@
# ignore all .env # ignore all .env
**/*.env **/*.env
# ignore all sqlite files
**/*.sqlite
**/*-shm
**/*-wal
# ignore the debugs # ignore the debugs
.vscode .vscode

View File

@@ -47,7 +47,7 @@ type Config struct {
Firehose pipe.FirehoseConfig Firehose pipe.FirehoseConfig
Fetcher pipe.FetcherConfig Fetcher pipe.FetcherConfig
Arbiter pipe.ArbiterConfig Arbiter pipe.ArbiterConfig
Processor pipe.ProcessorConfig Engine pipe.EngineConfig
} }
// NewConfig returns a config with default parameters // NewConfig returns a config with default parameters
@@ -57,7 +57,7 @@ func NewConfig() *Config {
Firehose: pipe.NewFirehoseConfig(), Firehose: pipe.NewFirehoseConfig(),
Fetcher: pipe.NewFetcherConfig(), Fetcher: pipe.NewFetcherConfig(),
Arbiter: pipe.NewArbiterConfig(), Arbiter: pipe.NewArbiterConfig(),
Processor: pipe.NewProcessorConfig(), Engine: pipe.NewEngineConfig(),
} }
} }
@@ -66,7 +66,7 @@ func (c *Config) Print() {
c.Firehose.Print() c.Firehose.Print()
c.Fetcher.Print() c.Fetcher.Print()
c.Arbiter.Print() c.Arbiter.Print()
c.Processor.Print() c.Engine.Print()
} }
// LoadConfig reads the enviroment variables and parses them into a [Config] struct // LoadConfig reads the enviroment variables and parses them into a [Config] struct
@@ -174,14 +174,26 @@ func LoadConfig() (*Config, error) {
} }
config.Arbiter.PromotionWait = time.Duration(wait) * time.Second config.Arbiter.PromotionWait = time.Duration(wait) * time.Second
case "PROCESSOR_CACHE_CAPACITY": case "ENGINE_PRINT_EVERY":
config.Processor.CacheCapacity, err = strconv.Atoi(val) config.Engine.PrintEvery, err = strconv.Atoi(val)
if err != nil { if err != nil {
return nil, fmt.Errorf("error parsing %v: %v", keyVal, err) return nil, fmt.Errorf("error parsing %v: %v", keyVal, err)
} }
case "PROCESSOR_PRINT_EVERY": case "ENGINE_UPDATER_CAPACITY":
config.Processor.PrintEvery, err = strconv.Atoi(val) config.Engine.UpdaterCapacity, err = strconv.Atoi(val)
if err != nil {
return nil, fmt.Errorf("error parsing %v: %v", keyVal, err)
}
case "ENGINE_CACHE_CAPACITY":
config.Engine.CacheCapacity, err = strconv.Atoi(val)
if err != nil {
return nil, fmt.Errorf("error parsing %v: %v", keyVal, err)
}
case "ENGINE_ARCHIVE_CAPACITY":
config.Engine.ArchiverCapacity, err = strconv.Atoi(val)
if err != nil { if err != nil {
return nil, fmt.Errorf("error parsing %v: %v", keyVal, err) return nil, fmt.Errorf("error parsing %v: %v", keyVal, err)
} }

View File

@@ -17,6 +17,7 @@ import (
"github.com/nbd-wtf/go-nostr" "github.com/nbd-wtf/go-nostr"
"github.com/redis/go-redis/v9" "github.com/redis/go-redis/v9"
"github.com/vertex-lab/relay/pkg/eventstore"
) )
func main() { func main() {
@@ -32,6 +33,11 @@ func main() {
events := make(chan *nostr.Event, config.EventsCapacity) events := make(chan *nostr.Event, config.EventsCapacity)
pubkeys := make(chan string, config.PubkeysCapacity) pubkeys := make(chan string, config.PubkeysCapacity)
store, err := eventstore.New(config.SQLiteURL)
if err != nil {
panic(err)
}
db := redb.New(&redis.Options{Addr: config.RedisAddress}) db := redb.New(&redis.Options{Addr: config.RedisAddress})
count, err := db.NodeCount(ctx) count, err := db.NodeCount(ctx)
if err != nil { if err != nil {
@@ -63,30 +69,36 @@ func main() {
log.Printf("correctly added %d init pubkeys", len(config.InitPubkeys)) log.Printf("correctly added %d init pubkeys", len(config.InitPubkeys))
} }
var wg sync.WaitGroup var producers sync.WaitGroup
wg.Add(4) var consumers sync.WaitGroup
producers.Add(3)
go func() { go func() {
defer wg.Done() defer producers.Done()
pipe.Firehose(ctx, config.Firehose, db, enqueue(events)) pipe.Firehose(ctx, config.Firehose, db, enqueue(events))
}() }()
go func() { go func() {
defer wg.Done() defer producers.Done()
pipe.Fetcher(ctx, config.Fetcher, pubkeys, enqueue(events)) pipe.Fetcher(ctx, config.Fetcher, pubkeys, enqueue(events))
}() }()
go func() { go func() {
defer wg.Done() defer producers.Done()
pipe.Arbiter(ctx, config.Arbiter, db, enqueue(pubkeys)) pipe.Arbiter(ctx, config.Arbiter, db, enqueue(pubkeys))
close(pubkeys) // Arbiter is the only pubkey sender
}() }()
consumers.Add(1)
go func() { go func() {
defer wg.Done() defer consumers.Done()
pipe.Processor(ctx, config.Processor, db, events) pipe.Engine(ctx, config.Engine, store, db, events)
}() }()
wg.Wait() producers.Wait()
close(events)
consumers.Wait()
} }
// handleSignals listens for OS signals and triggers context cancellation. // handleSignals listens for OS signals and triggers context cancellation.

2
go.mod
View File

@@ -8,6 +8,7 @@ require (
github.com/joho/godotenv v1.5.1 github.com/joho/godotenv v1.5.1
github.com/nbd-wtf/go-nostr v0.51.12 github.com/nbd-wtf/go-nostr v0.51.12
github.com/redis/go-redis/v9 v9.8.0 github.com/redis/go-redis/v9 v9.8.0
github.com/vertex-lab/relay v0.4.5
) )
require ( require (
@@ -26,6 +27,7 @@ require (
github.com/json-iterator/go v1.1.12 // indirect github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/cpuid/v2 v2.2.10 // indirect github.com/klauspost/cpuid/v2 v2.2.10 // indirect
github.com/mailru/easyjson v0.9.0 // indirect github.com/mailru/easyjson v0.9.0 // indirect
github.com/mattn/go-sqlite3 v1.14.24 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/puzpuzpuz/xsync/v3 v3.5.1 // indirect github.com/puzpuzpuz/xsync/v3 v3.5.1 // indirect

4
go.sum
View File

@@ -43,6 +43,8 @@ github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQe
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM=
github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -76,6 +78,8 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/vertex-lab/relay v0.4.5 h1:6elykMvZFV+y/JYNhaNo3+pxbK66QnrmAIv3/3CN+VU=
github.com/vertex-lab/relay v0.4.5/go.mod h1:K3Utw2y0FhhDigT86A2gc/ZqK6MrF06mw6p0CihD4I4=
golang.org/x/arch v0.15.0 h1:QtOrQd0bTUnhNVNndMpLHNWrDmYzZ2KDqSrEymqInZw= golang.org/x/arch v0.15.0 h1:QtOrQd0bTUnhNVNndMpLHNWrDmYzZ2KDqSrEymqInZw=
golang.org/x/arch v0.15.0/go.mod h1:JmwW7aLIoRUKgaTzhkiEFxvcEiQGyOg9BMonBJUS7EE= golang.org/x/arch v0.15.0/go.mod h1:JmwW7aLIoRUKgaTzhkiEFxvcEiQGyOg9BMonBJUS7EE=
golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 h1:nDVHiLt8aIbd/VzvPWN6kSOPE7+F/fNFDSXLVYkE/Iw= golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 h1:nDVHiLt8aIbd/VzvPWN6kSOPE7+F/fNFDSXLVYkE/Iw=

View File

@@ -101,10 +101,11 @@ func (b *buffer) Contains(ID string) bool {
// Firehose connects to a list of relays and pulls [relevantKinds] events that are newer than [FirehoseConfig.Since]. // Firehose connects to a list of relays and pulls [relevantKinds] events that are newer than [FirehoseConfig.Since].
// It discards events from unknown pubkeys as an anti-spam mechanism. // It discards events from unknown pubkeys as an anti-spam mechanism.
func Firehose(ctx context.Context, config FirehoseConfig, check PubkeyChecker, send func(*nostr.Event) error) { func Firehose(ctx context.Context, config FirehoseConfig, check PubkeyChecker, send func(*nostr.Event) error) {
pool := nostr.NewSimplePool(ctx)
defer close(pool)
defer log.Println("Firehose: shutting down...") defer log.Println("Firehose: shutting down...")
pool := nostr.NewSimplePool(ctx)
defer shutdown(pool)
filter := nostr.Filter{ filter := nostr.Filter{
Kinds: relevantKinds, Kinds: relevantKinds,
Since: config.Since(), Since: config.Since(),
@@ -160,19 +161,24 @@ func (c FetcherConfig) Print() {
// - when the batch is bigger than config.Batch // - when the batch is bigger than config.Batch
// - after config.Interval since the last query. // - after config.Interval since the last query.
func Fetcher(ctx context.Context, config FetcherConfig, pubkeys <-chan string, send func(*nostr.Event) error) { func Fetcher(ctx context.Context, config FetcherConfig, pubkeys <-chan string, send func(*nostr.Event) error) {
defer log.Println("Fetcher: shutting down...")
batch := make([]string, 0, config.Batch) batch := make([]string, 0, config.Batch)
timer := time.After(config.Interval) timer := time.After(config.Interval)
pool := nostr.NewSimplePool(ctx) pool := nostr.NewSimplePool(ctx)
defer close(pool) defer shutdown(pool)
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
log.Println("Fetcher: shutting down...")
return return
case pubkey := <-pubkeys: case pubkey, ok := <-pubkeys:
if !ok {
return
}
batch = append(batch, pubkey) batch = append(batch, pubkey)
if len(batch) < config.Batch { if len(batch) < config.Batch {
continue continue
@@ -244,8 +250,8 @@ func fetch(ctx context.Context, pool *nostr.SimplePool, relays, pubkeys []string
return events, nil return events, nil
} }
// Close iterates over the relays in the pool and closes all connections. // Shutdown iterates over the relays in the pool and closes all connections.
func close(pool *nostr.SimplePool) { func shutdown(pool *nostr.SimplePool) {
pool.Relays.Range(func(_ string, relay *nostr.Relay) bool { pool.Relays.Range(func(_ string, relay *nostr.Relay) bool {
relay.Close() relay.Close()
return true return true

View File

@@ -10,39 +10,140 @@ import (
"github/pippellia-btc/crawler/pkg/walks" "github/pippellia-btc/crawler/pkg/walks"
"log" "log"
"slices" "slices"
"sync/atomic"
"time" "time"
"github.com/nbd-wtf/go-nostr" "github.com/nbd-wtf/go-nostr"
"github.com/vertex-lab/relay/pkg/eventstore"
) )
// EventTracker tracks the number of events processed
var EventTracker atomic.Int32
var ErrUnsupportedKind = errors.New("unsupported event kind") var ErrUnsupportedKind = errors.New("unsupported event kind")
type ProcessorConfig struct { type EngineConfig struct {
CacheCapacity int
PrintEvery int PrintEvery int
// for the GraphUpdater
UpdaterCapacity int
CacheCapacity int
// for the archiveEngine
ArchiverCapacity int
} }
func NewProcessorConfig() ProcessorConfig { func NewEngineConfig() EngineConfig {
return ProcessorConfig{ return EngineConfig{
CacheCapacity: 10000, PrintEvery: 5000,
PrintEvery: 5000} UpdaterCapacity: 1000,
CacheCapacity: 100_000,
ArchiverCapacity: 1000,
}
} }
func (c ProcessorConfig) Print() { func (c EngineConfig) Print() {
fmt.Printf("Processor\n") fmt.Printf("Engine\n")
fmt.Printf(" CacheCapacity: %d\n", c.CacheCapacity)
fmt.Printf(" PrintEvery: %d\n", c.PrintEvery) fmt.Printf(" PrintEvery: %d\n", c.PrintEvery)
fmt.Printf(" UpdaterCapacity: %d\n", c.UpdaterCapacity)
fmt.Printf(" CacheCapacity: %d\n", c.CacheCapacity)
fmt.Printf(" ArchiveCapacity: %d\n", c.ArchiverCapacity)
} }
func Processor( // Engine is responsible for dispacting the correct events to the [Archiver] or [GraphUpdater].
func Engine(
ctx context.Context, ctx context.Context,
config ProcessorConfig, config EngineConfig,
store *eventstore.Store,
db redb.RedisDB, db redb.RedisDB,
//store *eventstore.Store,
events chan *nostr.Event) { events chan *nostr.Event) {
var err error defer log.Println("Engine: shutting down...")
var processed int
graphEvents := make(chan *nostr.Event, config.UpdaterCapacity)
archiveEvents := make(chan *nostr.Event, config.ArchiverCapacity)
defer close(graphEvents)
defer close(archiveEvents)
go GraphUpdater(ctx, config, store, db, graphEvents)
go Archiver(ctx, config, store, archiveEvents)
log.Println("Engine: ready to process events")
for {
select {
case <-ctx.Done():
return
case event, ok := <-events:
if !ok {
return
}
switch event.Kind {
case nostr.KindFollowList:
graphEvents <- event
case nostr.KindProfileMetadata:
archiveEvents <- event
default:
logEvent(event, ErrUnsupportedKind)
}
}
}
}
// Archiver consumes events that are not graph-related and stores them.
func Archiver(
ctx context.Context,
config EngineConfig,
store *eventstore.Store,
events chan *nostr.Event) {
for {
select {
case <-ctx.Done():
return
case event, ok := <-events:
if !ok {
return
}
err := func() error {
opctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
switch {
case nostr.IsRegularKind(event.Kind):
return store.Save(opctx, event)
case nostr.IsReplaceableKind(event.Kind):
_, err := store.Replace(opctx, event)
return err
default:
return nil
}
}()
if err != nil {
logEvent(event, err)
}
EventTracker.Add(1)
}
}
}
// GraphUpdater consumes events to update the graph and random walks.
func GraphUpdater(
ctx context.Context,
config EngineConfig,
store *eventstore.Store,
db redb.RedisDB,
events chan *nostr.Event) {
cache := walks.NewWalker( cache := walks.NewWalker(
walks.WithCapacity(config.CacheCapacity), walks.WithCapacity(config.CacheCapacity),
@@ -50,34 +151,36 @@ func Processor(
walks.WithLogFile("cache.log"), walks.WithLogFile("cache.log"),
) )
log.Println("Processor: ready to process events")
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
log.Println("Processor: shutting down...")
return return
case event := <-events: case event, ok := <-events:
switch event.Kind { if !ok {
case nostr.KindFollowList: return
err = processFollowList(cache, db, event)
case nostr.KindProfileMetadata:
err = nil
default:
err = ErrUnsupportedKind
} }
err := func() error {
opctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
replaced, err := store.Replace(opctx, event)
if err != nil {
return err
}
if replaced {
return processFollowList(opctx, db, cache, event)
}
return nil
}()
if err != nil { if err != nil {
log.Printf("Processor: event ID %s, kind %d by %s: %v", event.ID, event.Kind, event.PubKey, err) logEvent(event, err)
} }
processed++ EventTracker.Add(1)
if processed%config.PrintEvery == 0 {
log.Printf("Processor: processed %d events", processed)
}
} }
} }
} }
@@ -85,10 +188,7 @@ func Processor(
// processFollowList parses the pubkeys listed in the event, and uses them to: // processFollowList parses the pubkeys listed in the event, and uses them to:
// - update the follows of the author (db and cache) // - update the follows of the author (db and cache)
// - update the author's random walks and signal the number to the [WalksTracker] // - update the author's random walks and signal the number to the [WalksTracker]
func processFollowList(cache *walks.CachedWalker, db redb.RedisDB, event *nostr.Event) error { func processFollowList(ctx context.Context, db redb.RedisDB, cache *walks.CachedWalker, event *nostr.Event) error {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
author, err := db.NodeByKey(ctx, event.PubKey) author, err := db.NodeByKey(ctx, event.PubKey)
if err != nil { if err != nil {
return err return err
@@ -173,6 +273,11 @@ func parsePubkeys(event *nostr.Event) []string {
return unique(pubkeys) return unique(pubkeys)
} }
func logEvent(e *nostr.Event, extra any) {
msg := fmt.Sprintf("Engine: event ID %s, kind %d by %s: ", e.ID, e.Kind, e.PubKey)
log.Printf(msg+"%v", extra)
}
// Unique returns a slice of unique elements of the input slice. // Unique returns a slice of unique elements of the input slice.
func unique[E cmp.Ordered](slice []E) []E { func unique[E cmp.Ordered](slice []E) []E {
if len(slice) == 0 { if len(slice) == 0 {

View File

@@ -447,9 +447,13 @@ func (db RedisDB) Pubkeys(ctx context.Context, nodes ...graph.ID) ([]string, err
type MissingHandler func(ctx context.Context, db RedisDB, pubkey string) (graph.ID, error) type MissingHandler func(ctx context.Context, db RedisDB, pubkey string) (graph.ID, error)
// Ignore pubkeys that are not found
func Ignore(context.Context, RedisDB, string) (graph.ID, error) { return "", nil } func Ignore(context.Context, RedisDB, string) (graph.ID, error) { return "", nil }
// Return a sentinel value ("-1") as the node ID of pubkeys not found
func Sentinel(context.Context, RedisDB, string) (graph.ID, error) { return "-1", nil } func Sentinel(context.Context, RedisDB, string) (graph.ID, error) { return "-1", nil }
// AddValid pubkeys to the database if they were not already present
func AddValid(ctx context.Context, db RedisDB, pubkey string) (graph.ID, error) { func AddValid(ctx context.Context, db RedisDB, pubkey string) (graph.ID, error) {
if !nostr.IsValidPublicKey(pubkey) { if !nostr.IsValidPublicKey(pubkey) {
return "", nil return "", nil