mirror of
https://github.com/aljazceru/crawler_v2.git
synced 2025-12-17 07:24:21 +01:00
personalized walk
This commit is contained in:
@@ -12,10 +12,12 @@ type Delta struct {
|
|||||||
Added []ID
|
Added []ID
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Old returns the old state of the delta
|
||||||
func (d Delta) Old() []ID {
|
func (d Delta) Old() []ID {
|
||||||
return append(d.Common, d.Removed...)
|
return append(d.Common, d.Removed...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// New returns the new state of the delta
|
||||||
func (d Delta) New() []ID {
|
func (d Delta) New() []ID {
|
||||||
return append(d.Common, d.Added...)
|
return append(d.Common, d.Added...)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github/pippellia-btc/crawler/pkg/graph"
|
"github/pippellia-btc/crawler/pkg/graph"
|
||||||
|
"github/pippellia-btc/crawler/pkg/walks"
|
||||||
|
"math/rand/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
var ErrEmptyWalkStore = errors.New("the walk store is empty")
|
var ErrEmptyWalkStore = errors.New("the walk store is empty")
|
||||||
@@ -30,7 +32,7 @@ func Global(ctx context.Context, count VisitCounter, nodes ...graph.ID) ([]float
|
|||||||
return nil, fmt.Errorf("Global: failed to get the visits total: %w", err)
|
return nil, fmt.Errorf("Global: failed to get the visits total: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if total == 0 {
|
if total <= 0 {
|
||||||
return nil, ErrEmptyWalkStore
|
return nil, ErrEmptyWalkStore
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -47,3 +49,102 @@ func Global(ctx context.Context, count VisitCounter, nodes ...graph.ID) ([]float
|
|||||||
|
|
||||||
return pageranks, nil
|
return pageranks, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pWalk is a personalized walk, which is a random walk that resets to a specified node
|
||||||
|
// and continues until it reaches a specified target lenght.
|
||||||
|
type pWalk struct {
|
||||||
|
start graph.ID // the starting node
|
||||||
|
node graph.ID // the current node
|
||||||
|
|
||||||
|
ongoing walks.Walk // the current walk
|
||||||
|
union []graph.ID // the sum of all previous walk paths
|
||||||
|
}
|
||||||
|
|
||||||
|
func newPersonalizedWalk(start graph.ID, target int) *pWalk {
|
||||||
|
return &pWalk{
|
||||||
|
start: start,
|
||||||
|
node: start,
|
||||||
|
ongoing: walks.Walk{Path: []graph.ID{start}},
|
||||||
|
union: make([]graph.ID, 0, target),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reached returns whether the personalized walk is long enough
|
||||||
|
func (w *pWalk) Reached(lenght int) bool {
|
||||||
|
return len(w.union) >= lenght
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset the walk to its base state after appending the ongoing walk to the union
|
||||||
|
func (w *pWalk) Reset() {
|
||||||
|
w.union = append(w.union, w.ongoing.Path...)
|
||||||
|
w.ongoing = walks.Walk{Path: []graph.ID{w.start}}
|
||||||
|
w.node = w.start
|
||||||
|
}
|
||||||
|
|
||||||
|
// WalkPool makes sure a walk is returned only once, avoiding bias in the [Personalized]
|
||||||
|
type WalkPool interface {
|
||||||
|
// Next returns a path that starts with the provided node
|
||||||
|
Next(node graph.ID) ([]graph.ID, bool)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The personalizedWalk() function simulates a long personalized random walk
|
||||||
|
// starting from a node with reset to itself. Whenever possible, walks from the
|
||||||
|
// [WalkCache] are used to speed up the computation.
|
||||||
|
func personalizedWalk(
|
||||||
|
ctx context.Context,
|
||||||
|
walker walks.Walker,
|
||||||
|
pool WalkPool,
|
||||||
|
start graph.ID,
|
||||||
|
lenght int) ([]graph.ID, error) {
|
||||||
|
|
||||||
|
var path []graph.ID
|
||||||
|
var exists bool
|
||||||
|
walk := newPersonalizedWalk(start, lenght)
|
||||||
|
|
||||||
|
for {
|
||||||
|
if walk.Reached(lenght) {
|
||||||
|
return walk.union, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if rand.Float64() > walks.Alpha {
|
||||||
|
walk.Reset()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
path, exists = pool.Next(walk.node)
|
||||||
|
switch exists {
|
||||||
|
case true:
|
||||||
|
// graft the given path
|
||||||
|
walk.ongoing.Graft(path)
|
||||||
|
walk.Reset()
|
||||||
|
|
||||||
|
case false:
|
||||||
|
// perform one manual step
|
||||||
|
follows, err := walker.Follows(ctx, walk.node)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(follows) == 0 {
|
||||||
|
// found a dandling node, stop
|
||||||
|
walk.Reset()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
node := randomElement(follows)
|
||||||
|
if walk.ongoing.Visits(node) {
|
||||||
|
// found a cycle, stop
|
||||||
|
walk.Reset()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
walk.node = node
|
||||||
|
walk.ongoing.Append(node)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns a random element of a slice. It panics if the slice is empty or nil.
|
||||||
|
func randomElement[S []E, E any](s S) E {
|
||||||
|
return s[rand.IntN(len(s))]
|
||||||
|
}
|
||||||
|
|||||||
@@ -29,6 +29,11 @@ type Walker interface {
|
|||||||
Follows(ctx context.Context, node graph.ID) ([]graph.ID, error)
|
Follows(ctx context.Context, node graph.ID) ([]graph.ID, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// New returns a new walk with a preallocated empty path
|
||||||
|
func New(n int) Walk {
|
||||||
|
return Walk{Path: make([]graph.ID, 0, n)}
|
||||||
|
}
|
||||||
|
|
||||||
// Len returns the lenght of the walk
|
// Len returns the lenght of the walk
|
||||||
func (w Walk) Len() int {
|
func (w Walk) Len() int {
|
||||||
return len(w.Path)
|
return len(w.Path)
|
||||||
@@ -51,6 +56,11 @@ func (w Walk) Copy() Walk {
|
|||||||
return Walk{ID: w.ID, Path: path}
|
return Walk{ID: w.ID, Path: path}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Append some nodes to the end of the walk
|
||||||
|
func (w *Walk) Append(nodes ...graph.ID) {
|
||||||
|
w.Path = append(w.Path, nodes...)
|
||||||
|
}
|
||||||
|
|
||||||
// Prune the walk at the specified index (excluded).
|
// Prune the walk at the specified index (excluded).
|
||||||
// It panics if the index is not within the bounds of the walk
|
// It panics if the index is not within the bounds of the walk
|
||||||
func (w *Walk) Prune(cut int) {
|
func (w *Walk) Prune(cut int) {
|
||||||
|
|||||||
Reference in New Issue
Block a user