personalized walk

This commit is contained in:
pippellia-btc
2025-05-23 12:17:58 +02:00
parent d27252275b
commit 9b02f87b25
3 changed files with 114 additions and 1 deletions

View File

@@ -12,10 +12,12 @@ type Delta struct {
Added []ID
}
// Old returns the old state of the delta
func (d Delta) Old() []ID {
return append(d.Common, d.Removed...)
}
// New returns the new state of the delta
func (d Delta) New() []ID {
return append(d.Common, d.Added...)
}

View File

@@ -5,6 +5,8 @@ import (
"errors"
"fmt"
"github/pippellia-btc/crawler/pkg/graph"
"github/pippellia-btc/crawler/pkg/walks"
"math/rand/v2"
)
var ErrEmptyWalkStore = errors.New("the walk store is empty")
@@ -30,7 +32,7 @@ func Global(ctx context.Context, count VisitCounter, nodes ...graph.ID) ([]float
return nil, fmt.Errorf("Global: failed to get the visits total: %w", err)
}
if total == 0 {
if total <= 0 {
return nil, ErrEmptyWalkStore
}
@@ -47,3 +49,102 @@ func Global(ctx context.Context, count VisitCounter, nodes ...graph.ID) ([]float
return pageranks, nil
}
// pWalk is a personalized walk, which is a random walk that resets to a specified node
// and continues until it reaches a specified target lenght.
type pWalk struct {
start graph.ID // the starting node
node graph.ID // the current node
ongoing walks.Walk // the current walk
union []graph.ID // the sum of all previous walk paths
}
func newPersonalizedWalk(start graph.ID, target int) *pWalk {
return &pWalk{
start: start,
node: start,
ongoing: walks.Walk{Path: []graph.ID{start}},
union: make([]graph.ID, 0, target),
}
}
// Reached returns whether the personalized walk is long enough
func (w *pWalk) Reached(lenght int) bool {
return len(w.union) >= lenght
}
// Reset the walk to its base state after appending the ongoing walk to the union
func (w *pWalk) Reset() {
w.union = append(w.union, w.ongoing.Path...)
w.ongoing = walks.Walk{Path: []graph.ID{w.start}}
w.node = w.start
}
// WalkPool makes sure a walk is returned only once, avoiding bias in the [Personalized]
type WalkPool interface {
// Next returns a path that starts with the provided node
Next(node graph.ID) ([]graph.ID, bool)
}
// The personalizedWalk() function simulates a long personalized random walk
// starting from a node with reset to itself. Whenever possible, walks from the
// [WalkCache] are used to speed up the computation.
func personalizedWalk(
ctx context.Context,
walker walks.Walker,
pool WalkPool,
start graph.ID,
lenght int) ([]graph.ID, error) {
var path []graph.ID
var exists bool
walk := newPersonalizedWalk(start, lenght)
for {
if walk.Reached(lenght) {
return walk.union, nil
}
if rand.Float64() > walks.Alpha {
walk.Reset()
continue
}
path, exists = pool.Next(walk.node)
switch exists {
case true:
// graft the given path
walk.ongoing.Graft(path)
walk.Reset()
case false:
// perform one manual step
follows, err := walker.Follows(ctx, walk.node)
if err != nil {
return nil, err
}
if len(follows) == 0 {
// found a dandling node, stop
walk.Reset()
continue
}
node := randomElement(follows)
if walk.ongoing.Visits(node) {
// found a cycle, stop
walk.Reset()
continue
}
walk.node = node
walk.ongoing.Append(node)
}
}
}
// returns a random element of a slice. It panics if the slice is empty or nil.
func randomElement[S []E, E any](s S) E {
return s[rand.IntN(len(s))]
}

View File

@@ -29,6 +29,11 @@ type Walker interface {
Follows(ctx context.Context, node graph.ID) ([]graph.ID, error)
}
// New returns a new walk with a preallocated empty path
func New(n int) Walk {
return Walk{Path: make([]graph.ID, 0, n)}
}
// Len returns the lenght of the walk
func (w Walk) Len() int {
return len(w.Path)
@@ -51,6 +56,11 @@ func (w Walk) Copy() Walk {
return Walk{ID: w.ID, Path: path}
}
// Append some nodes to the end of the walk
func (w *Walk) Append(nodes ...graph.ID) {
w.Path = append(w.Path, nodes...)
}
// Prune the walk at the specified index (excluded).
// It panics if the index is not within the bounds of the walk
func (w *Walk) Prune(cut int) {