From 9b02f87b25dfa4d58357ad3b113c9f08b65ae6d4 Mon Sep 17 00:00:00 2001 From: pippellia-btc Date: Fri, 23 May 2025 12:17:58 +0200 Subject: [PATCH] personalized walk --- pkg/graph/graph.go | 2 + pkg/pagerank/pagerank.go | 103 ++++++++++++++++++++++++++++++++++++++- pkg/walks/walks.go | 10 ++++ 3 files changed, 114 insertions(+), 1 deletion(-) diff --git a/pkg/graph/graph.go b/pkg/graph/graph.go index 25a3a0c..1885a6b 100644 --- a/pkg/graph/graph.go +++ b/pkg/graph/graph.go @@ -12,10 +12,12 @@ type Delta struct { Added []ID } +// Old returns the old state of the delta func (d Delta) Old() []ID { return append(d.Common, d.Removed...) } +// New returns the new state of the delta func (d Delta) New() []ID { return append(d.Common, d.Added...) } diff --git a/pkg/pagerank/pagerank.go b/pkg/pagerank/pagerank.go index a3adc59..e869269 100644 --- a/pkg/pagerank/pagerank.go +++ b/pkg/pagerank/pagerank.go @@ -5,6 +5,8 @@ import ( "errors" "fmt" "github/pippellia-btc/crawler/pkg/graph" + "github/pippellia-btc/crawler/pkg/walks" + "math/rand/v2" ) var ErrEmptyWalkStore = errors.New("the walk store is empty") @@ -30,7 +32,7 @@ func Global(ctx context.Context, count VisitCounter, nodes ...graph.ID) ([]float return nil, fmt.Errorf("Global: failed to get the visits total: %w", err) } - if total == 0 { + if total <= 0 { return nil, ErrEmptyWalkStore } @@ -47,3 +49,102 @@ func Global(ctx context.Context, count VisitCounter, nodes ...graph.ID) ([]float return pageranks, nil } + +// pWalk is a personalized walk, which is a random walk that resets to a specified node +// and continues until it reaches a specified target lenght. +type pWalk struct { + start graph.ID // the starting node + node graph.ID // the current node + + ongoing walks.Walk // the current walk + union []graph.ID // the sum of all previous walk paths +} + +func newPersonalizedWalk(start graph.ID, target int) *pWalk { + return &pWalk{ + start: start, + node: start, + ongoing: walks.Walk{Path: []graph.ID{start}}, + union: make([]graph.ID, 0, target), + } +} + +// Reached returns whether the personalized walk is long enough +func (w *pWalk) Reached(lenght int) bool { + return len(w.union) >= lenght +} + +// Reset the walk to its base state after appending the ongoing walk to the union +func (w *pWalk) Reset() { + w.union = append(w.union, w.ongoing.Path...) + w.ongoing = walks.Walk{Path: []graph.ID{w.start}} + w.node = w.start +} + +// WalkPool makes sure a walk is returned only once, avoiding bias in the [Personalized] +type WalkPool interface { + // Next returns a path that starts with the provided node + Next(node graph.ID) ([]graph.ID, bool) +} + +// The personalizedWalk() function simulates a long personalized random walk +// starting from a node with reset to itself. Whenever possible, walks from the +// [WalkCache] are used to speed up the computation. +func personalizedWalk( + ctx context.Context, + walker walks.Walker, + pool WalkPool, + start graph.ID, + lenght int) ([]graph.ID, error) { + + var path []graph.ID + var exists bool + walk := newPersonalizedWalk(start, lenght) + + for { + if walk.Reached(lenght) { + return walk.union, nil + } + + if rand.Float64() > walks.Alpha { + walk.Reset() + continue + } + + path, exists = pool.Next(walk.node) + switch exists { + case true: + // graft the given path + walk.ongoing.Graft(path) + walk.Reset() + + case false: + // perform one manual step + follows, err := walker.Follows(ctx, walk.node) + if err != nil { + return nil, err + } + + if len(follows) == 0 { + // found a dandling node, stop + walk.Reset() + continue + } + + node := randomElement(follows) + if walk.ongoing.Visits(node) { + // found a cycle, stop + walk.Reset() + continue + } + + walk.node = node + walk.ongoing.Append(node) + } + } +} + +// returns a random element of a slice. It panics if the slice is empty or nil. +func randomElement[S []E, E any](s S) E { + return s[rand.IntN(len(s))] +} diff --git a/pkg/walks/walks.go b/pkg/walks/walks.go index faf388f..2436df9 100644 --- a/pkg/walks/walks.go +++ b/pkg/walks/walks.go @@ -29,6 +29,11 @@ type Walker interface { Follows(ctx context.Context, node graph.ID) ([]graph.ID, error) } +// New returns a new walk with a preallocated empty path +func New(n int) Walk { + return Walk{Path: make([]graph.ID, 0, n)} +} + // Len returns the lenght of the walk func (w Walk) Len() int { return len(w.Path) @@ -51,6 +56,11 @@ func (w Walk) Copy() Walk { return Walk{ID: w.ID, Path: path} } +// Append some nodes to the end of the walk +func (w *Walk) Append(nodes ...graph.ID) { + w.Path = append(w.Path, nodes...) +} + // Prune the walk at the specified index (excluded). // It panics if the index is not within the bounds of the walk func (w *Walk) Prune(cut int) {