From b637e35991ccce69f65d7237526ddba70d1c22dc Mon Sep 17 00:00:00 2001 From: pippellia-btc Date: Mon, 23 Jun 2025 18:55:15 +0200 Subject: [PATCH] removed duplicated code thanks to slicex --- go.mod | 1 + go.sum | 8 ++-- pkg/graph/graph.go | 32 ++----------- pkg/graph/graph_test.go | 10 ++--- pkg/pagerank/pagerank.go | 8 +--- pkg/pipe/engine.go | 28 +----------- pkg/redb/walks.go | 37 ++++----------- pkg/redb/walks_test.go | 85 +++++++++++++---------------------- pkg/walks/walks.go | 10 ++--- tests/random/pagerank_test.go | 8 +--- 10 files changed, 62 insertions(+), 165 deletions(-) diff --git a/go.mod b/go.mod index 9c50950..ff03426 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ toolchain go1.24.3 require ( github.com/joho/godotenv v1.5.1 github.com/nbd-wtf/go-nostr v0.51.12 + github.com/pippellia-btc/slicex v0.2.4 github.com/redis/go-redis/v9 v9.8.0 github.com/vertex-lab/relay v0.4.7 ) diff --git a/go.sum b/go.sum index 0d7971e..2248a83 100644 --- a/go.sum +++ b/go.sum @@ -52,6 +52,10 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/nbd-wtf/go-nostr v0.51.12 h1:MRQcrShiW/cHhnYSVDQ4SIEc7DlYV7U7gg/l4H4gbbE= github.com/nbd-wtf/go-nostr v0.51.12/go.mod h1:IF30/Cm4AS90wd1GjsFJbBqq7oD1txo+2YUFYXqK3Nc= +github.com/pippellia-btc/slicex v0.2.3 h1:QNGp1UtdlAOeTPvYrttS6tFZnFISQRcuhVMMTZqx/B4= +github.com/pippellia-btc/slicex v0.2.3/go.mod h1:fu7VjA9Cdk76wIUlkzWOYiMG8/VEs1fJiUhkKqEopd8= +github.com/pippellia-btc/slicex v0.2.4 h1:zVZ7c0pZu01LL0f+cWPrgBdPJHQ3iMyGjf7ucGoO6RY= +github.com/pippellia-btc/slicex v0.2.4/go.mod h1:fu7VjA9Cdk76wIUlkzWOYiMG8/VEs1fJiUhkKqEopd8= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/puzpuzpuz/xsync/v3 v3.5.1 h1:GJYJZwO6IdxN/IKbneznS6yPkVC+c3zyY/j19c++5Fg= @@ -78,10 +82,6 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= -github.com/vertex-lab/relay v0.4.5 h1:6elykMvZFV+y/JYNhaNo3+pxbK66QnrmAIv3/3CN+VU= -github.com/vertex-lab/relay v0.4.5/go.mod h1:K3Utw2y0FhhDigT86A2gc/ZqK6MrF06mw6p0CihD4I4= -github.com/vertex-lab/relay v0.4.6 h1:wNMr440tcIqcBZulIrrPKkaELI8NG3sPwsm3huq5aSo= -github.com/vertex-lab/relay v0.4.6/go.mod h1:K3Utw2y0FhhDigT86A2gc/ZqK6MrF06mw6p0CihD4I4= github.com/vertex-lab/relay v0.4.7 h1:VsOmkJNfMI9iz+UonD1StHemOvgt/33COgqpJA1COEk= github.com/vertex-lab/relay v0.4.7/go.mod h1:K3Utw2y0FhhDigT86A2gc/ZqK6MrF06mw6p0CihD4I4= golang.org/x/arch v0.15.0 h1:QtOrQd0bTUnhNVNndMpLHNWrDmYzZ2KDqSrEymqInZw= diff --git a/pkg/graph/graph.go b/pkg/graph/graph.go index ae87efb..9e9ab5a 100644 --- a/pkg/graph/graph.go +++ b/pkg/graph/graph.go @@ -5,9 +5,10 @@ package graph import ( "errors" "math/rand/v2" - "slices" "strconv" "time" + + "github.com/pippellia-btc/slicex" ) const ( @@ -76,34 +77,7 @@ func NewDelta(kind int, node ID, old, new []ID) Delta { Node: node, } - slices.Sort(old) - slices.Sort(new) - i, j := 0, 0 - oldLen, newLen := len(old), len(new) - - for i < oldLen && j < newLen { - switch { - case old[i] < new[j]: - // ID is in old but not in new => remove - delta.Remove = append(delta.Remove, old[i]) - i++ - - case old[i] > new[j]: - // ID is in new but not in old => add - delta.Add = append(delta.Add, new[j]) - j++ - - default: - // ID is in both => keep - delta.Keep = append(delta.Keep, old[i]) - i++ - j++ - } - } - - // add all elements not traversed - delta.Remove = append(delta.Remove, old[i:]...) - delta.Add = append(delta.Add, new[j:]...) + delta.Remove, delta.Keep, delta.Add = slicex.Partition(old, new) return delta } diff --git a/pkg/graph/graph_test.go b/pkg/graph/graph_test.go index dc70de8..15d5acd 100644 --- a/pkg/graph/graph_test.go +++ b/pkg/graph/graph_test.go @@ -15,26 +15,26 @@ func TestNewDelta(t *testing.T) { }{ { name: "nil slices", - expected: Delta{Kind: 3, Node: "0"}, + expected: Delta{Kind: 3, Node: "0", Remove: []ID{}, Keep: []ID{}, Add: []ID{}}, }, { name: "empty slices", - expected: Delta{Kind: 3, Node: "0"}, + expected: Delta{Kind: 3, Node: "0", Remove: []ID{}, Keep: []ID{}, Add: []ID{}}, }, { name: "only removals", old: []ID{"0", "1", "2", "19", "111"}, new: []ID{"2", "19"}, - expected: Delta{Kind: 3, Node: "0", Remove: []ID{"0", "1", "111"}, Keep: []ID{"19", "2"}}, + expected: Delta{Kind: 3, Node: "0", Remove: []ID{"0", "1", "111"}, Keep: []ID{"2", "19"}, Add: []ID{}}, }, { name: "only additions", old: []ID{"0", "1"}, new: []ID{"420", "0", "1", "69"}, - expected: Delta{Kind: 3, Node: "0", Keep: []ID{"0", "1"}, Add: []ID{"420", "69"}}, + expected: Delta{Kind: 3, Node: "0", Remove: []ID{}, Keep: []ID{"0", "1"}, Add: []ID{"420", "69"}}, }, { - name: "both additions", + name: "both", old: []ID{"0", "1", "111"}, new: []ID{"420", "0", "1", "69"}, expected: Delta{Kind: 3, Node: "0", Remove: []ID{"111"}, Keep: []ID{"0", "1"}, Add: []ID{"420", "69"}}, diff --git a/pkg/pagerank/pagerank.go b/pkg/pagerank/pagerank.go index 1c518bf..30ae63d 100644 --- a/pkg/pagerank/pagerank.go +++ b/pkg/pagerank/pagerank.go @@ -8,6 +8,7 @@ import ( "fmt" "math/rand/v2" + "github.com/pippellia-btc/slicex" "github.com/vertex-lab/crawler_v2/pkg/graph" "github.com/vertex-lab/crawler_v2/pkg/walks" ) @@ -278,7 +279,7 @@ func personalizedWalk( continue } - node := randomElement(follows) + node := slicex.RandomElement(follows) if walk.ongoing.Visits(node) { // found a cycle, stop walk.Reset() @@ -307,8 +308,3 @@ func frequencyMap(path []graph.ID) map[graph.ID]float64 { return freqs } - -// returns a random element of a slice. It panics if the slice is empty or nil. -func randomElement[S []E, E any](s S) E { - return s[rand.IntN(len(s))] -} diff --git a/pkg/pipe/engine.go b/pkg/pipe/engine.go index bdbde32..78d0984 100644 --- a/pkg/pipe/engine.go +++ b/pkg/pipe/engine.go @@ -2,14 +2,13 @@ package pipe import ( - "cmp" "context" "errors" "fmt" "log" - "slices" "time" + "github.com/pippellia-btc/slicex" "github.com/vertex-lab/crawler_v2/pkg/graph" "github.com/vertex-lab/crawler_v2/pkg/redb" "github.com/vertex-lab/crawler_v2/pkg/walks" @@ -265,28 +264,5 @@ func ParsePubkeys(event *nostr.Event) []string { pubkeys = append(pubkeys, pubkey) } - return unique(pubkeys) -} - -func logEvent(prefix string, e *nostr.Event, extra any) { - log.Printf("%s: event ID %s, kind %d by %s: %v", prefix, e.ID, e.Kind, e.PubKey, extra) -} - -// Unique returns a slice of unique elements of the input slice. -func unique[E cmp.Ordered](slice []E) []E { - if len(slice) == 0 { - return nil - } - - slices.Sort(slice) - unique := make([]E, 0, len(slice)) - unique = append(unique, slice[0]) - - for i := 1; i < len(slice); i++ { - if slice[i] != slice[i-1] { - unique = append(unique, slice[i]) - } - } - - return unique + return slicex.Unique(pubkeys) } diff --git a/pkg/redb/walks.go b/pkg/redb/walks.go index 1c415b6..439b072 100644 --- a/pkg/redb/walks.go +++ b/pkg/redb/walks.go @@ -1,13 +1,12 @@ package redb import ( - "cmp" "context" "errors" "fmt" - "slices" "strconv" + "github.com/pippellia-btc/slicex" "github.com/vertex-lab/crawler_v2/pkg/graph" "github.com/vertex-lab/crawler_v2/pkg/walks" @@ -25,9 +24,10 @@ const ( ) var ( - ErrWalkNotFound = errors.New("walk not found") - ErrInvalidReplacement = errors.New("invalid walk replacement") - ErrInvalidLimit = errors.New("limit must be a positive integer, or -1 to fetch all walks") + ErrInvalidWalkParameters = errors.New("invalid walk parameters") + ErrWalkNotFound = errors.New("walk not found") + ErrInvalidReplacement = errors.New("invalid walk replacement") + ErrInvalidLimit = errors.New("limit must be a positive integer, or -1 to fetch all walks") ) // init the walk store checking the existence of [KeyRWS]. @@ -59,11 +59,11 @@ func (db RedisDB) init() error { } if alpha != walks.Alpha { - return errors.New("alpha and walks.Alpha are different") + return fmt.Errorf("%w: alpha and walks.Alpha are different", ErrInvalidWalkParameters) } if N != walks.N { - return errors.New("N and walks.N are different") + return fmt.Errorf("%w: N and walks.N are different", ErrInvalidWalkParameters) } case 0: @@ -172,7 +172,7 @@ func (db RedisDB) WalksVisitingAny(ctx context.Context, nodes []graph.ID, limit IDs = append(IDs, cmd.Val()...) } - unique := unique(IDs) + unique := slicex.Unique(IDs) return db.Walks(ctx, toWalks(unique)...) case limit > 0: @@ -198,7 +198,7 @@ func (db RedisDB) WalksVisitingAny(ctx context.Context, nodes []graph.ID, limit IDs = append(IDs, cmd.Val()...) } - unique := unique(IDs) + unique := slicex.Unique(IDs) return db.Walks(ctx, toWalks(unique)...) default: @@ -397,22 +397,3 @@ func (db RedisDB) ScanWalks(ctx context.Context, cursor uint64, limit int) ([]wa return batch, cursor, nil } - -// unique returns a slice of unique elements of the input slice. -func unique[E cmp.Ordered](slice []E) []E { - if len(slice) == 0 { - return nil - } - - slices.Sort(slice) - unique := make([]E, 0, len(slice)) - unique = append(unique, slice[0]) - - for i := 1; i < len(slice); i++ { - if slice[i] != slice[i-1] { - unique = append(unique, slice[i]) - } - } - - return unique -} diff --git a/pkg/redb/walks_test.go b/pkg/redb/walks_test.go index fb54a42..a6ff8fb 100644 --- a/pkg/redb/walks_test.go +++ b/pkg/redb/walks_test.go @@ -12,30 +12,31 @@ import ( "github.com/redis/go-redis/v9" ) -// func TestValidate(t *testing.T) { -// tests := []struct { -// name string -// setup func() (RedisDB, error) -// err error -// }{ -// {name: "empty", setup: Empty, err: ErrValueIsNil}, -// {name: "valid", setup: SomeWalks(0)}, -// } +func TestInit(t *testing.T) { + tests := []struct { + name string + setup func() (RedisDB, error) + err error + }{ + {name: "seed", setup: Empty}, + {name: "invalid", setup: Invalid, err: ErrInvalidWalkParameters}, + {name: "valid", setup: SomeWalks(0)}, + } -// for _, test := range tests { -// t.Run(test.name, func(t *testing.T) { -// db, err := test.setup() -// if err != nil { -// t.Fatalf("setup failed: %v", err) -// } -// defer db.flushAll() + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + db, err := test.setup() + if err != nil { + t.Fatalf("setup failed: %v", err) + } + defer db.flushAll() -// if err = db.validateWalks(); !errors.Is(err, test.err) { -// t.Fatalf("expected error %v, got %v", test.err, err) -// } -// }) -// } -// } + if err = db.init(); !errors.Is(err, test.err) { + t.Fatalf("expected error %v, got %v", test.err, err) + } + }) + } +} func TestWalksVisiting(t *testing.T) { tests := []struct { @@ -353,38 +354,6 @@ func TestValidateReplacement(t *testing.T) { } } -func TestUnique(t *testing.T) { - tests := []struct { - slice []walks.ID - expected []walks.ID - }{ - {slice: nil, expected: nil}, - {slice: []walks.ID{}, expected: nil}, - {slice: []walks.ID{"1", "2", "0"}, expected: []walks.ID{"0", "1", "2"}}, - {slice: []walks.ID{"1", "2", "0", "3", "1", "0"}, expected: []walks.ID{"0", "1", "2", "3"}}, - } - - for _, test := range tests { - unique := unique(test.slice) - if !reflect.DeepEqual(unique, test.expected) { - t.Errorf("expected %v, got %v", test.expected, unique) - } - } -} - -func BenchmarkUnique(b *testing.B) { - size := 1000000 - IDs := make([]walks.ID, size) - for i := 0; i < size; i++ { - IDs[i] = walks.ID(strconv.Itoa(i)) - } - - b.ResetTimer() - for range b.N { - unique(IDs) - } -} - var defaultWalk = walks.Walk{Path: []graph.ID{"0", "1"}} func SomeWalks(n int) func() (RedisDB, error) { @@ -403,3 +372,11 @@ func SomeWalks(n int) func() (RedisDB, error) { return db, nil } } + +func Invalid() (RedisDB, error) { + db := RedisDB{Client: redis.NewClient(&redis.Options{Addr: testAddress})} + if err := db.Client.HSet(ctx, KeyRWS, KeyAlpha, 69, KeyWalksPerNode, 420).Err(); err != nil { + return RedisDB{}, err + } + return db, nil +} diff --git a/pkg/walks/walks.go b/pkg/walks/walks.go index 8f2b706..632fdea 100644 --- a/pkg/walks/walks.go +++ b/pkg/walks/walks.go @@ -9,6 +9,7 @@ import ( "math/rand/v2" "slices" + "github.com/pippellia-btc/slicex" "github.com/vertex-lab/crawler_v2/pkg/graph" ) @@ -143,7 +144,7 @@ func generate(ctx context.Context, walker Walker, start ...graph.ID) ([]graph.ID return nil, nil } - node := randomElement(start) + node := slicex.RandomElement(start) path := make([]graph.ID, 0, expectedLenght(Alpha)) path = append(path, node) @@ -162,7 +163,7 @@ func generate(ctx context.Context, walker Walker, start ...graph.ID) ([]graph.ID break } - node = randomElement(follows) + node = slicex.RandomElement(follows) if slices.Contains(path, node) { // found a cycle, stop break @@ -276,11 +277,6 @@ func expectedUpdates(walks []Walk, delta graph.Delta) int { return int(expected + 0.5) } -// returns a random element of a slice. It panics if the slice is empty or nil. -func randomElement[S []E, E any](s S) E { - return s[rand.IntN(len(s))] -} - // Find the position of the first repetition in a slice. If there are no cycles, -1 is returned func findCycle[S []K, K comparable](s S) int { seen := make(map[K]struct{}) diff --git a/tests/random/pagerank_test.go b/tests/random/pagerank_test.go index 8395fed..a94620f 100644 --- a/tests/random/pagerank_test.go +++ b/tests/random/pagerank_test.go @@ -5,6 +5,7 @@ import ( "math/rand/v2" "testing" + "github.com/pippellia-btc/slicex" "github.com/vertex-lab/crawler_v2/pkg/pagerank" "github.com/vertex-lab/crawler_v2/pkg/walks" ) @@ -88,7 +89,7 @@ func TestPagerankDynamic(t *testing.T) { store := NewWalkStore() // apply a random delta to the graph - delta := randomElement(test.deltas) + delta := slicex.RandomElement(test.deltas) test.walker.Update(ctx, delta) rwalks, err := walks.Generate(ctx, test.walker, test.nodes...) @@ -172,8 +173,3 @@ func TestPersonalized(t *testing.T) { }) } } - -// returns a random element of a slice. It panics if the slice is empty or nil. -func randomElement[S []E, E any](s S) E { - return s[rand.IntN(len(s))] -}