mirror of
https://github.com/aljazceru/crawler_v2.git
synced 2025-12-16 23:14:19 +01:00
removed duplicated code thanks to slicex
This commit is contained in:
1
go.mod
1
go.mod
@@ -7,6 +7,7 @@ toolchain go1.24.3
|
||||
require (
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/nbd-wtf/go-nostr v0.51.12
|
||||
github.com/pippellia-btc/slicex v0.2.4
|
||||
github.com/redis/go-redis/v9 v9.8.0
|
||||
github.com/vertex-lab/relay v0.4.7
|
||||
)
|
||||
|
||||
8
go.sum
8
go.sum
@@ -52,6 +52,10 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/nbd-wtf/go-nostr v0.51.12 h1:MRQcrShiW/cHhnYSVDQ4SIEc7DlYV7U7gg/l4H4gbbE=
|
||||
github.com/nbd-wtf/go-nostr v0.51.12/go.mod h1:IF30/Cm4AS90wd1GjsFJbBqq7oD1txo+2YUFYXqK3Nc=
|
||||
github.com/pippellia-btc/slicex v0.2.3 h1:QNGp1UtdlAOeTPvYrttS6tFZnFISQRcuhVMMTZqx/B4=
|
||||
github.com/pippellia-btc/slicex v0.2.3/go.mod h1:fu7VjA9Cdk76wIUlkzWOYiMG8/VEs1fJiUhkKqEopd8=
|
||||
github.com/pippellia-btc/slicex v0.2.4 h1:zVZ7c0pZu01LL0f+cWPrgBdPJHQ3iMyGjf7ucGoO6RY=
|
||||
github.com/pippellia-btc/slicex v0.2.4/go.mod h1:fu7VjA9Cdk76wIUlkzWOYiMG8/VEs1fJiUhkKqEopd8=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/puzpuzpuz/xsync/v3 v3.5.1 h1:GJYJZwO6IdxN/IKbneznS6yPkVC+c3zyY/j19c++5Fg=
|
||||
@@ -78,10 +82,6 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
|
||||
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||
github.com/vertex-lab/relay v0.4.5 h1:6elykMvZFV+y/JYNhaNo3+pxbK66QnrmAIv3/3CN+VU=
|
||||
github.com/vertex-lab/relay v0.4.5/go.mod h1:K3Utw2y0FhhDigT86A2gc/ZqK6MrF06mw6p0CihD4I4=
|
||||
github.com/vertex-lab/relay v0.4.6 h1:wNMr440tcIqcBZulIrrPKkaELI8NG3sPwsm3huq5aSo=
|
||||
github.com/vertex-lab/relay v0.4.6/go.mod h1:K3Utw2y0FhhDigT86A2gc/ZqK6MrF06mw6p0CihD4I4=
|
||||
github.com/vertex-lab/relay v0.4.7 h1:VsOmkJNfMI9iz+UonD1StHemOvgt/33COgqpJA1COEk=
|
||||
github.com/vertex-lab/relay v0.4.7/go.mod h1:K3Utw2y0FhhDigT86A2gc/ZqK6MrF06mw6p0CihD4I4=
|
||||
golang.org/x/arch v0.15.0 h1:QtOrQd0bTUnhNVNndMpLHNWrDmYzZ2KDqSrEymqInZw=
|
||||
|
||||
@@ -5,9 +5,10 @@ package graph
|
||||
import (
|
||||
"errors"
|
||||
"math/rand/v2"
|
||||
"slices"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/pippellia-btc/slicex"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -76,34 +77,7 @@ func NewDelta(kind int, node ID, old, new []ID) Delta {
|
||||
Node: node,
|
||||
}
|
||||
|
||||
slices.Sort(old)
|
||||
slices.Sort(new)
|
||||
i, j := 0, 0
|
||||
oldLen, newLen := len(old), len(new)
|
||||
|
||||
for i < oldLen && j < newLen {
|
||||
switch {
|
||||
case old[i] < new[j]:
|
||||
// ID is in old but not in new => remove
|
||||
delta.Remove = append(delta.Remove, old[i])
|
||||
i++
|
||||
|
||||
case old[i] > new[j]:
|
||||
// ID is in new but not in old => add
|
||||
delta.Add = append(delta.Add, new[j])
|
||||
j++
|
||||
|
||||
default:
|
||||
// ID is in both => keep
|
||||
delta.Keep = append(delta.Keep, old[i])
|
||||
i++
|
||||
j++
|
||||
}
|
||||
}
|
||||
|
||||
// add all elements not traversed
|
||||
delta.Remove = append(delta.Remove, old[i:]...)
|
||||
delta.Add = append(delta.Add, new[j:]...)
|
||||
delta.Remove, delta.Keep, delta.Add = slicex.Partition(old, new)
|
||||
return delta
|
||||
}
|
||||
|
||||
|
||||
@@ -15,26 +15,26 @@ func TestNewDelta(t *testing.T) {
|
||||
}{
|
||||
{
|
||||
name: "nil slices",
|
||||
expected: Delta{Kind: 3, Node: "0"},
|
||||
expected: Delta{Kind: 3, Node: "0", Remove: []ID{}, Keep: []ID{}, Add: []ID{}},
|
||||
},
|
||||
{
|
||||
name: "empty slices",
|
||||
expected: Delta{Kind: 3, Node: "0"},
|
||||
expected: Delta{Kind: 3, Node: "0", Remove: []ID{}, Keep: []ID{}, Add: []ID{}},
|
||||
},
|
||||
{
|
||||
name: "only removals",
|
||||
old: []ID{"0", "1", "2", "19", "111"},
|
||||
new: []ID{"2", "19"},
|
||||
expected: Delta{Kind: 3, Node: "0", Remove: []ID{"0", "1", "111"}, Keep: []ID{"19", "2"}},
|
||||
expected: Delta{Kind: 3, Node: "0", Remove: []ID{"0", "1", "111"}, Keep: []ID{"2", "19"}, Add: []ID{}},
|
||||
},
|
||||
{
|
||||
name: "only additions",
|
||||
old: []ID{"0", "1"},
|
||||
new: []ID{"420", "0", "1", "69"},
|
||||
expected: Delta{Kind: 3, Node: "0", Keep: []ID{"0", "1"}, Add: []ID{"420", "69"}},
|
||||
expected: Delta{Kind: 3, Node: "0", Remove: []ID{}, Keep: []ID{"0", "1"}, Add: []ID{"420", "69"}},
|
||||
},
|
||||
{
|
||||
name: "both additions",
|
||||
name: "both",
|
||||
old: []ID{"0", "1", "111"},
|
||||
new: []ID{"420", "0", "1", "69"},
|
||||
expected: Delta{Kind: 3, Node: "0", Remove: []ID{"111"}, Keep: []ID{"0", "1"}, Add: []ID{"420", "69"}},
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"fmt"
|
||||
"math/rand/v2"
|
||||
|
||||
"github.com/pippellia-btc/slicex"
|
||||
"github.com/vertex-lab/crawler_v2/pkg/graph"
|
||||
"github.com/vertex-lab/crawler_v2/pkg/walks"
|
||||
)
|
||||
@@ -278,7 +279,7 @@ func personalizedWalk(
|
||||
continue
|
||||
}
|
||||
|
||||
node := randomElement(follows)
|
||||
node := slicex.RandomElement(follows)
|
||||
if walk.ongoing.Visits(node) {
|
||||
// found a cycle, stop
|
||||
walk.Reset()
|
||||
@@ -307,8 +308,3 @@ func frequencyMap(path []graph.ID) map[graph.ID]float64 {
|
||||
|
||||
return freqs
|
||||
}
|
||||
|
||||
// returns a random element of a slice. It panics if the slice is empty or nil.
|
||||
func randomElement[S []E, E any](s S) E {
|
||||
return s[rand.IntN(len(s))]
|
||||
}
|
||||
|
||||
@@ -2,14 +2,13 @@
|
||||
package pipe
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
"github.com/pippellia-btc/slicex"
|
||||
"github.com/vertex-lab/crawler_v2/pkg/graph"
|
||||
"github.com/vertex-lab/crawler_v2/pkg/redb"
|
||||
"github.com/vertex-lab/crawler_v2/pkg/walks"
|
||||
@@ -265,28 +264,5 @@ func ParsePubkeys(event *nostr.Event) []string {
|
||||
pubkeys = append(pubkeys, pubkey)
|
||||
}
|
||||
|
||||
return unique(pubkeys)
|
||||
}
|
||||
|
||||
func logEvent(prefix string, e *nostr.Event, extra any) {
|
||||
log.Printf("%s: event ID %s, kind %d by %s: %v", prefix, e.ID, e.Kind, e.PubKey, extra)
|
||||
}
|
||||
|
||||
// Unique returns a slice of unique elements of the input slice.
|
||||
func unique[E cmp.Ordered](slice []E) []E {
|
||||
if len(slice) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
slices.Sort(slice)
|
||||
unique := make([]E, 0, len(slice))
|
||||
unique = append(unique, slice[0])
|
||||
|
||||
for i := 1; i < len(slice); i++ {
|
||||
if slice[i] != slice[i-1] {
|
||||
unique = append(unique, slice[i])
|
||||
}
|
||||
}
|
||||
|
||||
return unique
|
||||
return slicex.Unique(pubkeys)
|
||||
}
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
package redb
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strconv"
|
||||
|
||||
"github.com/pippellia-btc/slicex"
|
||||
"github.com/vertex-lab/crawler_v2/pkg/graph"
|
||||
"github.com/vertex-lab/crawler_v2/pkg/walks"
|
||||
|
||||
@@ -25,9 +24,10 @@ const (
|
||||
)
|
||||
|
||||
var (
|
||||
ErrWalkNotFound = errors.New("walk not found")
|
||||
ErrInvalidReplacement = errors.New("invalid walk replacement")
|
||||
ErrInvalidLimit = errors.New("limit must be a positive integer, or -1 to fetch all walks")
|
||||
ErrInvalidWalkParameters = errors.New("invalid walk parameters")
|
||||
ErrWalkNotFound = errors.New("walk not found")
|
||||
ErrInvalidReplacement = errors.New("invalid walk replacement")
|
||||
ErrInvalidLimit = errors.New("limit must be a positive integer, or -1 to fetch all walks")
|
||||
)
|
||||
|
||||
// init the walk store checking the existence of [KeyRWS].
|
||||
@@ -59,11 +59,11 @@ func (db RedisDB) init() error {
|
||||
}
|
||||
|
||||
if alpha != walks.Alpha {
|
||||
return errors.New("alpha and walks.Alpha are different")
|
||||
return fmt.Errorf("%w: alpha and walks.Alpha are different", ErrInvalidWalkParameters)
|
||||
}
|
||||
|
||||
if N != walks.N {
|
||||
return errors.New("N and walks.N are different")
|
||||
return fmt.Errorf("%w: N and walks.N are different", ErrInvalidWalkParameters)
|
||||
}
|
||||
|
||||
case 0:
|
||||
@@ -172,7 +172,7 @@ func (db RedisDB) WalksVisitingAny(ctx context.Context, nodes []graph.ID, limit
|
||||
IDs = append(IDs, cmd.Val()...)
|
||||
}
|
||||
|
||||
unique := unique(IDs)
|
||||
unique := slicex.Unique(IDs)
|
||||
return db.Walks(ctx, toWalks(unique)...)
|
||||
|
||||
case limit > 0:
|
||||
@@ -198,7 +198,7 @@ func (db RedisDB) WalksVisitingAny(ctx context.Context, nodes []graph.ID, limit
|
||||
IDs = append(IDs, cmd.Val()...)
|
||||
}
|
||||
|
||||
unique := unique(IDs)
|
||||
unique := slicex.Unique(IDs)
|
||||
return db.Walks(ctx, toWalks(unique)...)
|
||||
|
||||
default:
|
||||
@@ -397,22 +397,3 @@ func (db RedisDB) ScanWalks(ctx context.Context, cursor uint64, limit int) ([]wa
|
||||
|
||||
return batch, cursor, nil
|
||||
}
|
||||
|
||||
// unique returns a slice of unique elements of the input slice.
|
||||
func unique[E cmp.Ordered](slice []E) []E {
|
||||
if len(slice) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
slices.Sort(slice)
|
||||
unique := make([]E, 0, len(slice))
|
||||
unique = append(unique, slice[0])
|
||||
|
||||
for i := 1; i < len(slice); i++ {
|
||||
if slice[i] != slice[i-1] {
|
||||
unique = append(unique, slice[i])
|
||||
}
|
||||
}
|
||||
|
||||
return unique
|
||||
}
|
||||
|
||||
@@ -12,30 +12,31 @@ import (
|
||||
"github.com/redis/go-redis/v9"
|
||||
)
|
||||
|
||||
// func TestValidate(t *testing.T) {
|
||||
// tests := []struct {
|
||||
// name string
|
||||
// setup func() (RedisDB, error)
|
||||
// err error
|
||||
// }{
|
||||
// {name: "empty", setup: Empty, err: ErrValueIsNil},
|
||||
// {name: "valid", setup: SomeWalks(0)},
|
||||
// }
|
||||
func TestInit(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
setup func() (RedisDB, error)
|
||||
err error
|
||||
}{
|
||||
{name: "seed", setup: Empty},
|
||||
{name: "invalid", setup: Invalid, err: ErrInvalidWalkParameters},
|
||||
{name: "valid", setup: SomeWalks(0)},
|
||||
}
|
||||
|
||||
// for _, test := range tests {
|
||||
// t.Run(test.name, func(t *testing.T) {
|
||||
// db, err := test.setup()
|
||||
// if err != nil {
|
||||
// t.Fatalf("setup failed: %v", err)
|
||||
// }
|
||||
// defer db.flushAll()
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
db, err := test.setup()
|
||||
if err != nil {
|
||||
t.Fatalf("setup failed: %v", err)
|
||||
}
|
||||
defer db.flushAll()
|
||||
|
||||
// if err = db.validateWalks(); !errors.Is(err, test.err) {
|
||||
// t.Fatalf("expected error %v, got %v", test.err, err)
|
||||
// }
|
||||
// })
|
||||
// }
|
||||
// }
|
||||
if err = db.init(); !errors.Is(err, test.err) {
|
||||
t.Fatalf("expected error %v, got %v", test.err, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestWalksVisiting(t *testing.T) {
|
||||
tests := []struct {
|
||||
@@ -353,38 +354,6 @@ func TestValidateReplacement(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnique(t *testing.T) {
|
||||
tests := []struct {
|
||||
slice []walks.ID
|
||||
expected []walks.ID
|
||||
}{
|
||||
{slice: nil, expected: nil},
|
||||
{slice: []walks.ID{}, expected: nil},
|
||||
{slice: []walks.ID{"1", "2", "0"}, expected: []walks.ID{"0", "1", "2"}},
|
||||
{slice: []walks.ID{"1", "2", "0", "3", "1", "0"}, expected: []walks.ID{"0", "1", "2", "3"}},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
unique := unique(test.slice)
|
||||
if !reflect.DeepEqual(unique, test.expected) {
|
||||
t.Errorf("expected %v, got %v", test.expected, unique)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkUnique(b *testing.B) {
|
||||
size := 1000000
|
||||
IDs := make([]walks.ID, size)
|
||||
for i := 0; i < size; i++ {
|
||||
IDs[i] = walks.ID(strconv.Itoa(i))
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
unique(IDs)
|
||||
}
|
||||
}
|
||||
|
||||
var defaultWalk = walks.Walk{Path: []graph.ID{"0", "1"}}
|
||||
|
||||
func SomeWalks(n int) func() (RedisDB, error) {
|
||||
@@ -403,3 +372,11 @@ func SomeWalks(n int) func() (RedisDB, error) {
|
||||
return db, nil
|
||||
}
|
||||
}
|
||||
|
||||
func Invalid() (RedisDB, error) {
|
||||
db := RedisDB{Client: redis.NewClient(&redis.Options{Addr: testAddress})}
|
||||
if err := db.Client.HSet(ctx, KeyRWS, KeyAlpha, 69, KeyWalksPerNode, 420).Err(); err != nil {
|
||||
return RedisDB{}, err
|
||||
}
|
||||
return db, nil
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"math/rand/v2"
|
||||
"slices"
|
||||
|
||||
"github.com/pippellia-btc/slicex"
|
||||
"github.com/vertex-lab/crawler_v2/pkg/graph"
|
||||
)
|
||||
|
||||
@@ -143,7 +144,7 @@ func generate(ctx context.Context, walker Walker, start ...graph.ID) ([]graph.ID
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
node := randomElement(start)
|
||||
node := slicex.RandomElement(start)
|
||||
path := make([]graph.ID, 0, expectedLenght(Alpha))
|
||||
path = append(path, node)
|
||||
|
||||
@@ -162,7 +163,7 @@ func generate(ctx context.Context, walker Walker, start ...graph.ID) ([]graph.ID
|
||||
break
|
||||
}
|
||||
|
||||
node = randomElement(follows)
|
||||
node = slicex.RandomElement(follows)
|
||||
if slices.Contains(path, node) {
|
||||
// found a cycle, stop
|
||||
break
|
||||
@@ -276,11 +277,6 @@ func expectedUpdates(walks []Walk, delta graph.Delta) int {
|
||||
return int(expected + 0.5)
|
||||
}
|
||||
|
||||
// returns a random element of a slice. It panics if the slice is empty or nil.
|
||||
func randomElement[S []E, E any](s S) E {
|
||||
return s[rand.IntN(len(s))]
|
||||
}
|
||||
|
||||
// Find the position of the first repetition in a slice. If there are no cycles, -1 is returned
|
||||
func findCycle[S []K, K comparable](s S) int {
|
||||
seen := make(map[K]struct{})
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"math/rand/v2"
|
||||
"testing"
|
||||
|
||||
"github.com/pippellia-btc/slicex"
|
||||
"github.com/vertex-lab/crawler_v2/pkg/pagerank"
|
||||
"github.com/vertex-lab/crawler_v2/pkg/walks"
|
||||
)
|
||||
@@ -88,7 +89,7 @@ func TestPagerankDynamic(t *testing.T) {
|
||||
store := NewWalkStore()
|
||||
|
||||
// apply a random delta to the graph
|
||||
delta := randomElement(test.deltas)
|
||||
delta := slicex.RandomElement(test.deltas)
|
||||
test.walker.Update(ctx, delta)
|
||||
|
||||
rwalks, err := walks.Generate(ctx, test.walker, test.nodes...)
|
||||
@@ -172,8 +173,3 @@ func TestPersonalized(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// returns a random element of a slice. It panics if the slice is empty or nil.
|
||||
func randomElement[S []E, E any](s S) E {
|
||||
return s[rand.IntN(len(s))]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user