mirror of
https://github.com/aljazceru/crawler_v2.git
synced 2025-12-17 07:24:21 +01:00
138 lines
3.2 KiB
Go
138 lines
3.2 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"runtime"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/vertex-lab/crawler_v2/pkg/config"
|
|
"github.com/vertex-lab/crawler_v2/pkg/pipe"
|
|
"github.com/vertex-lab/crawler_v2/pkg/redb"
|
|
"github.com/vertex-lab/crawler_v2/pkg/store"
|
|
|
|
"github.com/nbd-wtf/go-nostr"
|
|
"github.com/redis/go-redis/v9"
|
|
)
|
|
|
|
/*
|
|
This program syncronize the Redis database to the events already stored in the event store.
|
|
If Redis and the eventstore are already in sync, go run /cmd/crawler/.
|
|
*/
|
|
|
|
func main() {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
go pipe.HandleSignals(cancel)
|
|
|
|
log.Printf("--------- starting up the sync process --------")
|
|
defer log.Printf("------------------------------------------------")
|
|
|
|
config, err := config.Load()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
config.Fetcher.Kinds = []int{nostr.KindFollowList} // no need to sync other event kinds
|
|
|
|
db := redb.New(&redis.Options{
|
|
Addr: config.RedisAddress,
|
|
})
|
|
|
|
store, err := store.New(config.SQLiteURL)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
grapherQueue := make(chan *nostr.Event, config.ChannelCapacity)
|
|
fetcherQueue := make(chan string, config.ChannelCapacity)
|
|
|
|
nodes, err := db.NodeCount(ctx)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
if nodes != 0 {
|
|
panic("refuse to sync when redis is not empty")
|
|
}
|
|
|
|
log.Println("initialize from empty database...")
|
|
|
|
if err := pipe.InitGraph(ctx, db, config.InitPubkeys); err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
for _, pk := range config.InitPubkeys {
|
|
fetcherQueue <- pk
|
|
}
|
|
|
|
log.Printf("correctly added %d pubkeys", len(config.InitPubkeys))
|
|
|
|
if config.PrintStats {
|
|
go printStats(ctx, grapherQueue, fetcherQueue)
|
|
}
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(3)
|
|
|
|
go func() {
|
|
defer wg.Done()
|
|
pipe.FetcherDB(ctx, config.Fetcher, store, fetcherQueue, pipe.Send(grapherQueue))
|
|
close(grapherQueue) // FetcherDB is the only event producer
|
|
}()
|
|
|
|
go func() {
|
|
defer wg.Done()
|
|
pipe.Arbiter(ctx, config.Arbiter, db, pipe.Send(fetcherQueue))
|
|
close(fetcherQueue) // Arbiter is the only pubkey producer
|
|
}()
|
|
|
|
go func() {
|
|
defer wg.Done()
|
|
pipe.Grapher(ctx, config.Engine.Grapher, db, grapherQueue)
|
|
}()
|
|
|
|
wg.Wait()
|
|
}
|
|
|
|
func printStats(
|
|
ctx context.Context,
|
|
grapherQueue chan *nostr.Event,
|
|
fetcherQueue chan string,
|
|
) {
|
|
filename := "stats.log"
|
|
file, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
|
|
if err != nil {
|
|
panic(fmt.Errorf("failed to open log file %s: %w", filename, err))
|
|
}
|
|
|
|
defer file.Close()
|
|
log := log.New(file, "stats: ", log.LstdFlags)
|
|
|
|
ticker := time.NewTicker(10 * time.Second)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
|
|
case <-ticker.C:
|
|
goroutines := runtime.NumGoroutine()
|
|
memStats := new(runtime.MemStats)
|
|
runtime.ReadMemStats(memStats)
|
|
|
|
log.Println("---------------------------------------")
|
|
log.Printf("Grapher queue: %d/%d\n", len(grapherQueue), cap(grapherQueue))
|
|
log.Printf("FetcherDB queue: %d/%d\n", len(fetcherQueue), cap(fetcherQueue))
|
|
log.Printf("walks tracker: %v\n", pipe.WalksTracker.Load())
|
|
log.Printf("goroutines: %d\n", goroutines)
|
|
log.Printf("memory usage: %.2f MB\n", float64(memStats.Alloc)/(1024*1024))
|
|
log.Println("---------------------------------------")
|
|
}
|
|
}
|
|
}
|