mirror of
https://github.com/aljazceru/crawler_v2.git
synced 2025-12-17 15:34:26 +01:00
added sync cmd
This commit is contained in:
@@ -3,6 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github/pippellia-btc/crawler/pkg/config"
|
||||||
"github/pippellia-btc/crawler/pkg/graph"
|
"github/pippellia-btc/crawler/pkg/graph"
|
||||||
"github/pippellia-btc/crawler/pkg/pipe"
|
"github/pippellia-btc/crawler/pkg/pipe"
|
||||||
"github/pippellia-btc/crawler/pkg/redb"
|
"github/pippellia-btc/crawler/pkg/redb"
|
||||||
@@ -25,7 +26,7 @@ func main() {
|
|||||||
defer cancel()
|
defer cancel()
|
||||||
go handleSignals(cancel)
|
go handleSignals(cancel)
|
||||||
|
|
||||||
config, err := LoadConfig()
|
config, err := config.Load()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@@ -45,9 +46,13 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if count == 0 {
|
if count == 0 {
|
||||||
log.Println("initialize from empty database...")
|
if len(config.InitPubkeys) == 0 {
|
||||||
|
panic("init pubkeys are empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("initialize from empty database...")
|
||||||
nodes := make([]graph.ID, len(config.InitPubkeys))
|
nodes := make([]graph.ID, len(config.InitPubkeys))
|
||||||
|
|
||||||
for i, pk := range config.InitPubkeys {
|
for i, pk := range config.InitPubkeys {
|
||||||
nodes[i], err = db.AddNode(ctx, pk)
|
nodes[i], err = db.AddNode(ctx, pk)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
174
cmd/sync/main.go
Normal file
174
cmd/sync/main.go
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"github/pippellia-btc/crawler/pkg/config"
|
||||||
|
"github/pippellia-btc/crawler/pkg/graph"
|
||||||
|
"github/pippellia-btc/crawler/pkg/pipe"
|
||||||
|
"github/pippellia-btc/crawler/pkg/redb"
|
||||||
|
"github/pippellia-btc/crawler/pkg/walks"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
|
"runtime"
|
||||||
|
"sync"
|
||||||
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/nbd-wtf/go-nostr"
|
||||||
|
"github.com/redis/go-redis/v9"
|
||||||
|
"github.com/vertex-lab/relay/pkg/eventstore"
|
||||||
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
This program syncronize the Redis database using the events already stored in the EventStore.
|
||||||
|
If Redis and the eventstore are already in sync, run the executable at /cmd/crawler/.
|
||||||
|
*/
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
go handleSignals(cancel)
|
||||||
|
|
||||||
|
config, err := config.Load()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
events := make(chan *nostr.Event, config.EventsCapacity)
|
||||||
|
pubkeys := make(chan string, config.PubkeysCapacity)
|
||||||
|
|
||||||
|
store, err := eventstore.New(config.SQLiteURL)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
db := redb.New(&redis.Options{Addr: config.RedisAddress})
|
||||||
|
count, err := db.NodeCount(ctx)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if count != 0 {
|
||||||
|
panic("refusing to run sync when redis is not empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(config.InitPubkeys) == 0 {
|
||||||
|
panic("init pubkeys are empty")
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("initialize from empty database...")
|
||||||
|
nodes := make([]graph.ID, len(config.InitPubkeys))
|
||||||
|
|
||||||
|
for i, pk := range config.InitPubkeys {
|
||||||
|
nodes[i], err = db.AddNode(ctx, pk)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pubkeys <- pk // add to queue
|
||||||
|
}
|
||||||
|
|
||||||
|
walks, err := walks.Generate(ctx, db, nodes...)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := db.AddWalks(ctx, walks...); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Printf("correctly added %d init pubkeys", len(config.InitPubkeys))
|
||||||
|
|
||||||
|
pipe.Kinds = []int{
|
||||||
|
nostr.KindFollowList, // no need to sync other event kinds
|
||||||
|
}
|
||||||
|
|
||||||
|
var producers sync.WaitGroup
|
||||||
|
var consumers sync.WaitGroup
|
||||||
|
|
||||||
|
producers.Add(3)
|
||||||
|
go func() {
|
||||||
|
defer producers.Done()
|
||||||
|
pipe.Firehose(ctx, config.Firehose, db, enqueue(events))
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer producers.Done()
|
||||||
|
pipe.Fetcher(ctx, config.Fetcher, pubkeys, enqueue(events)) // TODO: fetch from the event store
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer producers.Done()
|
||||||
|
pipe.Arbiter(ctx, config.Arbiter, db, enqueue(pubkeys))
|
||||||
|
close(pubkeys) // Arbiter is the only pubkey sender
|
||||||
|
}()
|
||||||
|
|
||||||
|
consumers.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer consumers.Done()
|
||||||
|
pipe.GraphUpdater(ctx, config.Engine, store, db, events)
|
||||||
|
}()
|
||||||
|
|
||||||
|
producers.Wait()
|
||||||
|
close(events)
|
||||||
|
|
||||||
|
consumers.Wait()
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleSignals listens for OS signals and triggers context cancellation.
|
||||||
|
func handleSignals(cancel context.CancelFunc) {
|
||||||
|
signals := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
|
||||||
|
<-signals
|
||||||
|
|
||||||
|
log.Println(" Signal received. Shutting down...")
|
||||||
|
cancel()
|
||||||
|
}
|
||||||
|
|
||||||
|
// enqueue things into the specified channel or return an error if full.
|
||||||
|
func enqueue[T any](queue chan T) func(t T) error {
|
||||||
|
return func(t T) error {
|
||||||
|
select {
|
||||||
|
case queue <- t:
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("channel is full, dropping %v", t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func printStats(ctx context.Context, events chan *nostr.Event, pubkeys chan string) {
|
||||||
|
filename := "stats.log"
|
||||||
|
file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Errorf("failed to open log file %s: %w", filename, err))
|
||||||
|
}
|
||||||
|
|
||||||
|
defer file.Close()
|
||||||
|
log := log.New(file, "stats: ", log.LstdFlags)
|
||||||
|
|
||||||
|
ticker := time.NewTicker(10 * time.Second)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
|
||||||
|
case <-ticker.C:
|
||||||
|
goroutines := runtime.NumGoroutine()
|
||||||
|
memStats := new(runtime.MemStats)
|
||||||
|
runtime.ReadMemStats(memStats)
|
||||||
|
|
||||||
|
log.Println("---------------------------------------")
|
||||||
|
log.Printf("events queue: %d/%d\n", len(events), cap(events))
|
||||||
|
log.Printf("pubkeys queue: %d/%d\n", len(pubkeys), cap(pubkeys))
|
||||||
|
log.Printf("walks tracker: %v\n", pipe.WalksTracker.Load())
|
||||||
|
log.Printf("goroutines: %d\n", goroutines)
|
||||||
|
log.Printf("memory usage: %.2f MB\n", float64(memStats.Alloc)/(1024*1024))
|
||||||
|
log.Println("---------------------------------------")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
package main
|
package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -50,8 +50,8 @@ type Config struct {
|
|||||||
Engine pipe.EngineConfig
|
Engine pipe.EngineConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewConfig returns a config with default parameters
|
// New returns a config with default parameters
|
||||||
func NewConfig() *Config {
|
func New() *Config {
|
||||||
return &Config{
|
return &Config{
|
||||||
SystemConfig: NewSystemConfig(),
|
SystemConfig: NewSystemConfig(),
|
||||||
Firehose: pipe.NewFirehoseConfig(),
|
Firehose: pipe.NewFirehoseConfig(),
|
||||||
@@ -69,9 +69,9 @@ func (c *Config) Print() {
|
|||||||
c.Engine.Print()
|
c.Engine.Print()
|
||||||
}
|
}
|
||||||
|
|
||||||
// LoadConfig reads the enviroment variables and parses them into a [Config] struct
|
// Load reads the enviroment variables and parses them into a [Config] struct
|
||||||
func LoadConfig() (*Config, error) {
|
func Load() (*Config, error) {
|
||||||
var config = NewConfig()
|
var config = New()
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
for _, item := range os.Environ() {
|
for _, item := range os.Environ() {
|
||||||
@@ -11,7 +11,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
relevantKinds = []int{
|
Kinds = []int{
|
||||||
//nostr.KindProfileMetadata,
|
//nostr.KindProfileMetadata,
|
||||||
nostr.KindFollowList,
|
nostr.KindFollowList,
|
||||||
}
|
}
|
||||||
@@ -98,7 +98,7 @@ func (b *buffer) Contains(ID string) bool {
|
|||||||
return slices.Contains(b.IDs, ID)
|
return slices.Contains(b.IDs, ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Firehose connects to a list of relays and pulls [relevantKinds] events that are newer than [FirehoseConfig.Since].
|
// Firehose connects to a list of relays and pulls [Kinds] events that are newer than [FirehoseConfig.Since].
|
||||||
// It discards events from unknown pubkeys as an anti-spam mechanism.
|
// It discards events from unknown pubkeys as an anti-spam mechanism.
|
||||||
func Firehose(ctx context.Context, config FirehoseConfig, check PubkeyChecker, send func(*nostr.Event) error) {
|
func Firehose(ctx context.Context, config FirehoseConfig, check PubkeyChecker, send func(*nostr.Event) error) {
|
||||||
defer log.Println("Firehose: shutting down...")
|
defer log.Println("Firehose: shutting down...")
|
||||||
@@ -107,7 +107,7 @@ func Firehose(ctx context.Context, config FirehoseConfig, check PubkeyChecker, s
|
|||||||
defer shutdown(pool)
|
defer shutdown(pool)
|
||||||
|
|
||||||
filter := nostr.Filter{
|
filter := nostr.Filter{
|
||||||
Kinds: relevantKinds,
|
Kinds: Kinds,
|
||||||
Since: config.Since(),
|
Since: config.Since(),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -218,7 +218,7 @@ func Fetcher(ctx context.Context, config FetcherConfig, pubkeys <-chan string, s
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// fetch queries the [relevantKinds] of the specified pubkeys.
|
// fetch queries the [Kinds] of the specified pubkeys.
|
||||||
func fetch(ctx context.Context, pool *nostr.SimplePool, relays, pubkeys []string) ([]*nostr.Event, error) {
|
func fetch(ctx context.Context, pool *nostr.SimplePool, relays, pubkeys []string) ([]*nostr.Event, error) {
|
||||||
if len(pubkeys) == 0 {
|
if len(pubkeys) == 0 {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
@@ -228,7 +228,7 @@ func fetch(ctx context.Context, pool *nostr.SimplePool, relays, pubkeys []string
|
|||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
filter := nostr.Filter{
|
filter := nostr.Filter{
|
||||||
Kinds: relevantKinds,
|
Kinds: Kinds,
|
||||||
Authors: pubkeys,
|
Authors: pubkeys,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ func TestFetch(t *testing.T) {
|
|||||||
t.Fatalf("expected error nil, got %v", err)
|
t.Fatalf("expected error nil, got %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
expected := len(pubkeys) * len(relevantKinds)
|
expected := len(pubkeys) * len(Kinds)
|
||||||
if len(events) != expected {
|
if len(events) != expected {
|
||||||
t.Fatalf("expected %d events, got %d", expected, len(events))
|
t.Fatalf("expected %d events, got %d", expected, len(events))
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user