added sync cmd

This commit is contained in:
pippellia-btc
2025-06-06 17:58:17 +02:00
parent 3d28ad1c5a
commit be2e093422
5 changed files with 193 additions and 14 deletions

View File

@@ -3,6 +3,7 @@ package main
import ( import (
"context" "context"
"fmt" "fmt"
"github/pippellia-btc/crawler/pkg/config"
"github/pippellia-btc/crawler/pkg/graph" "github/pippellia-btc/crawler/pkg/graph"
"github/pippellia-btc/crawler/pkg/pipe" "github/pippellia-btc/crawler/pkg/pipe"
"github/pippellia-btc/crawler/pkg/redb" "github/pippellia-btc/crawler/pkg/redb"
@@ -25,7 +26,7 @@ func main() {
defer cancel() defer cancel()
go handleSignals(cancel) go handleSignals(cancel)
config, err := LoadConfig() config, err := config.Load()
if err != nil { if err != nil {
panic(err) panic(err)
} }
@@ -45,9 +46,13 @@ func main() {
} }
if count == 0 { if count == 0 {
log.Println("initialize from empty database...") if len(config.InitPubkeys) == 0 {
panic("init pubkeys are empty")
}
log.Println("initialize from empty database...")
nodes := make([]graph.ID, len(config.InitPubkeys)) nodes := make([]graph.ID, len(config.InitPubkeys))
for i, pk := range config.InitPubkeys { for i, pk := range config.InitPubkeys {
nodes[i], err = db.AddNode(ctx, pk) nodes[i], err = db.AddNode(ctx, pk)
if err != nil { if err != nil {

174
cmd/sync/main.go Normal file
View File

@@ -0,0 +1,174 @@
package main
import (
"context"
"fmt"
"github/pippellia-btc/crawler/pkg/config"
"github/pippellia-btc/crawler/pkg/graph"
"github/pippellia-btc/crawler/pkg/pipe"
"github/pippellia-btc/crawler/pkg/redb"
"github/pippellia-btc/crawler/pkg/walks"
"log"
"os"
"os/signal"
"runtime"
"sync"
"syscall"
"time"
"github.com/nbd-wtf/go-nostr"
"github.com/redis/go-redis/v9"
"github.com/vertex-lab/relay/pkg/eventstore"
)
/*
This program syncronize the Redis database using the events already stored in the EventStore.
If Redis and the eventstore are already in sync, run the executable at /cmd/crawler/.
*/
func main() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go handleSignals(cancel)
config, err := config.Load()
if err != nil {
panic(err)
}
events := make(chan *nostr.Event, config.EventsCapacity)
pubkeys := make(chan string, config.PubkeysCapacity)
store, err := eventstore.New(config.SQLiteURL)
if err != nil {
panic(err)
}
db := redb.New(&redis.Options{Addr: config.RedisAddress})
count, err := db.NodeCount(ctx)
if err != nil {
panic(err)
}
if count != 0 {
panic("refusing to run sync when redis is not empty")
}
if len(config.InitPubkeys) == 0 {
panic("init pubkeys are empty")
}
log.Println("initialize from empty database...")
nodes := make([]graph.ID, len(config.InitPubkeys))
for i, pk := range config.InitPubkeys {
nodes[i], err = db.AddNode(ctx, pk)
if err != nil {
panic(err)
}
pubkeys <- pk // add to queue
}
walks, err := walks.Generate(ctx, db, nodes...)
if err != nil {
panic(err)
}
if err := db.AddWalks(ctx, walks...); err != nil {
panic(err)
}
log.Printf("correctly added %d init pubkeys", len(config.InitPubkeys))
pipe.Kinds = []int{
nostr.KindFollowList, // no need to sync other event kinds
}
var producers sync.WaitGroup
var consumers sync.WaitGroup
producers.Add(3)
go func() {
defer producers.Done()
pipe.Firehose(ctx, config.Firehose, db, enqueue(events))
}()
go func() {
defer producers.Done()
pipe.Fetcher(ctx, config.Fetcher, pubkeys, enqueue(events)) // TODO: fetch from the event store
}()
go func() {
defer producers.Done()
pipe.Arbiter(ctx, config.Arbiter, db, enqueue(pubkeys))
close(pubkeys) // Arbiter is the only pubkey sender
}()
consumers.Add(1)
go func() {
defer consumers.Done()
pipe.GraphUpdater(ctx, config.Engine, store, db, events)
}()
producers.Wait()
close(events)
consumers.Wait()
}
// handleSignals listens for OS signals and triggers context cancellation.
func handleSignals(cancel context.CancelFunc) {
signals := make(chan os.Signal, 1)
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
<-signals
log.Println(" Signal received. Shutting down...")
cancel()
}
// enqueue things into the specified channel or return an error if full.
func enqueue[T any](queue chan T) func(t T) error {
return func(t T) error {
select {
case queue <- t:
return nil
default:
return fmt.Errorf("channel is full, dropping %v", t)
}
}
}
func printStats(ctx context.Context, events chan *nostr.Event, pubkeys chan string) {
filename := "stats.log"
file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
panic(fmt.Errorf("failed to open log file %s: %w", filename, err))
}
defer file.Close()
log := log.New(file, "stats: ", log.LstdFlags)
ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
goroutines := runtime.NumGoroutine()
memStats := new(runtime.MemStats)
runtime.ReadMemStats(memStats)
log.Println("---------------------------------------")
log.Printf("events queue: %d/%d\n", len(events), cap(events))
log.Printf("pubkeys queue: %d/%d\n", len(pubkeys), cap(pubkeys))
log.Printf("walks tracker: %v\n", pipe.WalksTracker.Load())
log.Printf("goroutines: %d\n", goroutines)
log.Printf("memory usage: %.2f MB\n", float64(memStats.Alloc)/(1024*1024))
log.Println("---------------------------------------")
}
}
}

View File

@@ -1,4 +1,4 @@
package main package config
import ( import (
"fmt" "fmt"
@@ -50,8 +50,8 @@ type Config struct {
Engine pipe.EngineConfig Engine pipe.EngineConfig
} }
// NewConfig returns a config with default parameters // New returns a config with default parameters
func NewConfig() *Config { func New() *Config {
return &Config{ return &Config{
SystemConfig: NewSystemConfig(), SystemConfig: NewSystemConfig(),
Firehose: pipe.NewFirehoseConfig(), Firehose: pipe.NewFirehoseConfig(),
@@ -69,9 +69,9 @@ func (c *Config) Print() {
c.Engine.Print() c.Engine.Print()
} }
// LoadConfig reads the enviroment variables and parses them into a [Config] struct // Load reads the enviroment variables and parses them into a [Config] struct
func LoadConfig() (*Config, error) { func Load() (*Config, error) {
var config = NewConfig() var config = New()
var err error var err error
for _, item := range os.Environ() { for _, item := range os.Environ() {

View File

@@ -11,7 +11,7 @@ import (
) )
var ( var (
relevantKinds = []int{ Kinds = []int{
//nostr.KindProfileMetadata, //nostr.KindProfileMetadata,
nostr.KindFollowList, nostr.KindFollowList,
} }
@@ -98,7 +98,7 @@ func (b *buffer) Contains(ID string) bool {
return slices.Contains(b.IDs, ID) return slices.Contains(b.IDs, ID)
} }
// Firehose connects to a list of relays and pulls [relevantKinds] events that are newer than [FirehoseConfig.Since]. // Firehose connects to a list of relays and pulls [Kinds] events that are newer than [FirehoseConfig.Since].
// It discards events from unknown pubkeys as an anti-spam mechanism. // It discards events from unknown pubkeys as an anti-spam mechanism.
func Firehose(ctx context.Context, config FirehoseConfig, check PubkeyChecker, send func(*nostr.Event) error) { func Firehose(ctx context.Context, config FirehoseConfig, check PubkeyChecker, send func(*nostr.Event) error) {
defer log.Println("Firehose: shutting down...") defer log.Println("Firehose: shutting down...")
@@ -107,7 +107,7 @@ func Firehose(ctx context.Context, config FirehoseConfig, check PubkeyChecker, s
defer shutdown(pool) defer shutdown(pool)
filter := nostr.Filter{ filter := nostr.Filter{
Kinds: relevantKinds, Kinds: Kinds,
Since: config.Since(), Since: config.Since(),
} }
@@ -218,7 +218,7 @@ func Fetcher(ctx context.Context, config FetcherConfig, pubkeys <-chan string, s
} }
} }
// fetch queries the [relevantKinds] of the specified pubkeys. // fetch queries the [Kinds] of the specified pubkeys.
func fetch(ctx context.Context, pool *nostr.SimplePool, relays, pubkeys []string) ([]*nostr.Event, error) { func fetch(ctx context.Context, pool *nostr.SimplePool, relays, pubkeys []string) ([]*nostr.Event, error) {
if len(pubkeys) == 0 { if len(pubkeys) == 0 {
return nil, nil return nil, nil
@@ -228,7 +228,7 @@ func fetch(ctx context.Context, pool *nostr.SimplePool, relays, pubkeys []string
defer cancel() defer cancel()
filter := nostr.Filter{ filter := nostr.Filter{
Kinds: relevantKinds, Kinds: Kinds,
Authors: pubkeys, Authors: pubkeys,
} }

View File

@@ -37,7 +37,7 @@ func TestFetch(t *testing.T) {
t.Fatalf("expected error nil, got %v", err) t.Fatalf("expected error nil, got %v", err)
} }
expected := len(pubkeys) * len(relevantKinds) expected := len(pubkeys) * len(Kinds)
if len(events) != expected { if len(events) != expected {
t.Fatalf("expected %d events, got %d", expected, len(events)) t.Fatalf("expected %d events, got %d", expected, len(events))
} }