From cce82de057a8b0e9e286874a0f3c69483b4aa0a4 Mon Sep 17 00:00:00 2001 From: pippellia-btc Date: Tue, 22 Jul 2025 14:21:38 +0200 Subject: [PATCH] moved event store definition to crawler from relay --- cmd/crawler/main.go | 8 +++- cmd/sync/main.go | 7 +--- go.mod | 3 +- go.sum | 10 +---- pkg/store/store.go | 84 +++++++++++++++++++++++++++++++++++++++++ pkg/store/store_test.go | 64 +++++++++++++++++++++++++++++++ 6 files changed, 159 insertions(+), 17 deletions(-) create mode 100644 pkg/store/store.go create mode 100644 pkg/store/store_test.go diff --git a/cmd/crawler/main.go b/cmd/crawler/main.go index 91d9169..6085000 100644 --- a/cmd/crawler/main.go +++ b/cmd/crawler/main.go @@ -9,11 +9,12 @@ import ( "sync" "time" + "github.com/pippellia-btc/nastro/sqlite" "github.com/vertex-lab/crawler_v2/pkg/config" "github.com/vertex-lab/crawler_v2/pkg/graph" "github.com/vertex-lab/crawler_v2/pkg/pipe" "github.com/vertex-lab/crawler_v2/pkg/redb" - "github.com/vertex-lab/relay/pkg/store" + "github.com/vertex-lab/crawler_v2/pkg/store" "github.com/nbd-wtf/go-nostr" "github.com/redis/go-redis/v9" @@ -41,7 +42,10 @@ func main() { events := make(chan *nostr.Event, config.EventsCapacity) pubkeys := make(chan string, config.PubkeysCapacity) - store, err := store.New(config.SQLiteURL) + store, err := store.New( + config.SQLiteURL, + sqlite.WithEventPolicy(pipe.EventTooBig), + ) if err != nil { panic(err) } diff --git a/cmd/sync/main.go b/cmd/sync/main.go index 11d11e0..93d017b 100644 --- a/cmd/sync/main.go +++ b/cmd/sync/main.go @@ -13,7 +13,7 @@ import ( "github.com/vertex-lab/crawler_v2/pkg/graph" "github.com/vertex-lab/crawler_v2/pkg/pipe" "github.com/vertex-lab/crawler_v2/pkg/redb" - "github.com/vertex-lab/relay/pkg/store" + "github.com/vertex-lab/crawler_v2/pkg/store" "github.com/nbd-wtf/go-nostr" "github.com/redis/go-redis/v9" @@ -77,14 +77,11 @@ func main() { } log.Printf("correctly added %d init pubkeys", len(config.InitPubkeys)) - pipe.Kinds = []int{ - nostr.KindFollowList, // no need to sync other event kinds - } - if config.PrintStats { go printStats(ctx, events, pubkeys) } + pipe.Kinds = []int{nostr.KindFollowList} // no need to sync other event kinds var wg sync.WaitGroup wg.Add(3) diff --git a/go.mod b/go.mod index 348872d..b324f29 100644 --- a/go.mod +++ b/go.mod @@ -7,10 +7,9 @@ toolchain go1.24.3 require ( github.com/joho/godotenv v1.5.1 github.com/nbd-wtf/go-nostr v0.51.12 - github.com/pippellia-btc/nastro v0.2.0 + github.com/pippellia-btc/nastro v0.3.0 github.com/pippellia-btc/slicex v0.2.4 github.com/redis/go-redis/v9 v9.8.0 - github.com/vertex-lab/relay v0.5.1 ) require ( diff --git a/go.sum b/go.sum index 70cd137..4375925 100644 --- a/go.sum +++ b/go.sum @@ -52,10 +52,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/nbd-wtf/go-nostr v0.51.12 h1:MRQcrShiW/cHhnYSVDQ4SIEc7DlYV7U7gg/l4H4gbbE= github.com/nbd-wtf/go-nostr v0.51.12/go.mod h1:IF30/Cm4AS90wd1GjsFJbBqq7oD1txo+2YUFYXqK3Nc= -github.com/pippellia-btc/nastro v0.1.3 h1:fL2AUy/1ZcBwIW9tqnlxTC3tg8e8AC/DABqv/Bpe998= -github.com/pippellia-btc/nastro v0.1.3/go.mod h1:K5EWZqmFjSvPwZceUL3VfJRMagVydXeb1Adnd1rXwkI= -github.com/pippellia-btc/nastro v0.2.0 h1:KzjQUosn7yPDGJe5ahP+NS/M0qfSNxKc4JT0/Lbzv/Y= -github.com/pippellia-btc/nastro v0.2.0/go.mod h1:K5EWZqmFjSvPwZceUL3VfJRMagVydXeb1Adnd1rXwkI= +github.com/pippellia-btc/nastro v0.3.0 h1:tsD8Oi0mqf54R0r4eU19ak6O7avpTG/j2Q4aNMkjE/0= +github.com/pippellia-btc/nastro v0.3.0/go.mod h1:K5EWZqmFjSvPwZceUL3VfJRMagVydXeb1Adnd1rXwkI= github.com/pippellia-btc/slicex v0.2.4 h1:zVZ7c0pZu01LL0f+cWPrgBdPJHQ3iMyGjf7ucGoO6RY= github.com/pippellia-btc/slicex v0.2.4/go.mod h1:fu7VjA9Cdk76wIUlkzWOYiMG8/VEs1fJiUhkKqEopd8= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -84,10 +82,6 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= -github.com/vertex-lab/relay v0.5.0 h1:LtIeXLI7LTPBWTjszidyS4zAKys8VXdah4jBiTc+SsM= -github.com/vertex-lab/relay v0.5.0/go.mod h1:USio/wBCi1QjMFsDzcsugL81eVkTjmT3Hi2xvD7oanY= -github.com/vertex-lab/relay v0.5.1 h1:itzKYf55IYi9FKflPGfIFbbf5kshfRd2zendPAgDGZc= -github.com/vertex-lab/relay v0.5.1/go.mod h1:hG8QIxmkAo7jn5QVgE3iPsaEwyOf/81rpxBE2rYW86U= golang.org/x/arch v0.15.0 h1:QtOrQd0bTUnhNVNndMpLHNWrDmYzZ2KDqSrEymqInZw= golang.org/x/arch v0.15.0/go.mod h1:JmwW7aLIoRUKgaTzhkiEFxvcEiQGyOg9BMonBJUS7EE= golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 h1:nDVHiLt8aIbd/VzvPWN6kSOPE7+F/fNFDSXLVYkE/Iw= diff --git a/pkg/store/store.go b/pkg/store/store.go new file mode 100644 index 0000000..2d86e9b --- /dev/null +++ b/pkg/store/store.go @@ -0,0 +1,84 @@ +package store + +import ( + "github.com/pippellia-btc/nastro/sqlite" +) + +var ( + profileFTS = ` + CREATE VIRTUAL TABLE IF NOT EXISTS profiles_fts USING fts5( + id UNINDEXED, + pubkey UNINDEXED, + name, + display_name, + about, + website, + nip05, + tokenize = 'trigram', + ); + + CREATE TRIGGER IF NOT EXISTS profiles_ai AFTER INSERT ON events + WHEN NEW.kind = 0 + BEGIN + INSERT INTO profiles_fts (id, pubkey, name, display_name, about, website, nip05) + VALUES ( + NEW.id, + NEW.pubkey, + NEW.content ->> '$.name', + COALESCE( NEW.content ->> '$.display_name', NEW.content ->> '$.displayName'), + NEW.content ->> '$.about', + NEW.content ->> '$.website', + NEW.content ->> '$.nip05' + ); + END; + + CREATE TRIGGER IF NOT EXISTS profiles_ad AFTER DELETE ON events + WHEN OLD.kind = 0 + BEGIN + DELETE FROM profiles_fts WHERE id = OLD.id; + END;` + + // indexing a-z and A-Z tags of responses for efficient look-up + responseTagsIndex = ` + CREATE TRIGGER IF NOT EXISTS response_tags_ai AFTER INSERT ON events + WHEN (NEW.kind BETWEEN 6312 AND 6315 OR NEW.kind = 7000) + BEGIN + INSERT INTO event_tags (event_id, key, value) + SELECT NEW.id, json_extract(value, '$[0]'), json_extract(value, '$[1]') + FROM json_each(NEW.tags) + WHERE json_type(value) = 'array' + AND json_array_length(value) > 1 + AND typeof(json_extract(value, '$[0]')) = 'text' + AND json_extract(value, '$[0]') GLOB '[a-zA-Z]'; + END;` +) + +func New(URL string, opts ...sqlite.Option) (*sqlite.Store, error) { + store, err := sqlite.New(URL, + sqlite.WithAdditionalSchema(profileFTS), + sqlite.WithAdditionalSchema(responseTagsIndex), + sqlite.WithRetries(2), + ) + + if err != nil { + return nil, err + } + + for _, opt := range opts { + if err := opt(store); err != nil { + return nil, err + } + } + return store, nil +} + +// Profile represent the internal representation of the content of kind:0s, used for full-text-search. +type Profile struct { + ID string + Pubkey string + Name string + DisplayName string + About string + Website string + Nip05 string +} diff --git a/pkg/store/store_test.go b/pkg/store/store_test.go new file mode 100644 index 0000000..2967c0d --- /dev/null +++ b/pkg/store/store_test.go @@ -0,0 +1,64 @@ +package store + +import ( + "context" + "os" + "reflect" + "testing" + + "github.com/nbd-wtf/go-nostr" +) + +var ( + ctx = context.Background() + URL = "test.sqlite" + + event = nostr.Event{ + ID: "f7a73d54e45714f5e3ca97b789dfc7898e7dd31f77981989d71a54030e627ff6", + Kind: 0, + PubKey: "f683e87035f7ad4f44e0b98cfbd9537e16455a92cd38cefc4cb31db7557f5ef2", + CreatedAt: 1739547448, + Sig: "51a89ee1e24d83bd8e9209daf6a38245c974b49206ecb66fe156c9d7875c782f653b40cd73582f6bc9de5d1db497b925a13a828d521f8b78982fea359206e4e8", + Content: "{\"name\":\"pippellia\",\"nip05\":\"pip@vertexlab.io\",\"about\":\"simplifying social graph analysis so you can focus on building great experiences https://vertexlab.io/\",\"lud16\":\"whitebat1@primal.net\",\"display_name\":\"Pip the social graph guy\",\"picture\":\"https://m.primal.net/IfSZ.jpg\",\"banner\":\"https://m.primal.net/IfSc.png\",\"website\":\"pippellia.com\",\"displayName\":\"Pip the social graph guy\",\"pubkey\":\"f683e87035f7ad4f44e0b98cfbd9537e16455a92cd38cefc4cb31db7557f5ef2\",\"npub\":\"npub176p7sup477k5738qhxx0hk2n0cty2k5je5uvalzvkvwmw4tltmeqw7vgup\",\"created_at\":1738783677}", + } + + profile = Profile{ + ID: event.ID, + Pubkey: event.PubKey, + Name: "pippellia", + DisplayName: "Pip the social graph guy", + About: "simplifying social graph analysis so you can focus on building great experiences https://vertexlab.io/", + Website: "pippellia.com", + Nip05: "pip@vertexlab.io", + } +) + +func TestSaveProfile(t *testing.T) { + store, err := New(URL) + if err != nil { + t.Fatal(err) + } + defer Remove(URL) + + if err := store.Save(ctx, &event); err != nil { + t.Fatal(err) + } + + var p Profile + row := store.DB.QueryRowContext(ctx, "SELECT * FROM profiles_fts WHERE id = ?", event.ID) + err = row.Scan(&p.ID, &p.Pubkey, &p.Name, &p.DisplayName, &p.About, &p.Website, &p.Nip05) + + if err != nil { + t.Fatalf("failed to query for event ID %s in profiles_fts: %v", event.ID, err) + } + + if !reflect.DeepEqual(p, profile) { + t.Fatalf("expected profile %v, got %v", profile, p) + } +} + +func Remove(URL string) { + os.Remove(URL) + os.Remove(URL + "-shm") + os.Remove(URL + "-wal") +}