rewrite and simplification

This commit is contained in:
pippellia-btc
2025-05-21 16:10:29 +02:00
commit 17f692a5f4
9 changed files with 1014 additions and 0 deletions

49
pkg/pagerank/pagerank.go Normal file
View File

@@ -0,0 +1,49 @@
package pagerank
import (
"context"
"errors"
"fmt"
"github/pippellia-btc/crawler/pkg/graph"
)
var ErrEmptyWalkStore = errors.New("the walk store is empty")
type VisitCounter interface {
// TotalVisits returns the total number of visits, which is the sum of the lengths of all walks.
TotalVisits(ctx context.Context) (int, error)
// Visits returns the number of times each specified node was visited during the walks.
// The returned slice contains counts in the same order as the input nodes.
Visits(ctx context.Context, nodes ...graph.ID) ([]int, error)
}
// Global computes the global pagerank score for the specified nodes.
// If a node is not found, its pagerank is assumed to be 0.
func Global(ctx context.Context, count VisitCounter, nodes ...graph.ID) ([]float64, error) {
if len(nodes) == 0 {
return nil, nil
}
total, err := count.TotalVisits(ctx)
if err != nil {
return nil, fmt.Errorf("Global: failed to get the visits total: %w", err)
}
if total == 0 {
return nil, ErrEmptyWalkStore
}
visits, err := count.Visits(ctx, nodes...)
if err != nil {
return nil, fmt.Errorf("Global: failed to get the nodes visits: %w", err)
}
pageranks := make([]float64, len(visits))
for i, v := range visits {
pageranks[i] = float64(v) / float64(total)
}
return pageranks, nil
}

91
pkg/pagerank/utils.go Normal file
View File

@@ -0,0 +1,91 @@
package pagerank
import (
"context"
"github/pippellia-btc/crawler/pkg/graph"
"github/pippellia-btc/crawler/pkg/walks"
"math"
"strconv"
)
type WalkStore struct {
nextID int
Walks map[walks.ID]walks.Walk
}
func NewWalkStore() *WalkStore {
return &WalkStore{
Walks: make(map[walks.ID]walks.Walk, 1000),
}
}
// AddWalks adds walks with sequentials IDs
func (s *WalkStore) AddWalks(w []walks.Walk) {
for _, walk := range w {
ID := walks.ID(strconv.Itoa(s.nextID))
s.nextID++
walk.ID = ID
s.Walks[ID] = walk
}
}
// ReplaceWalks reassigns the ID --> walk
func (s *WalkStore) ReplaceWalks(w []walks.Walk) {
for _, walk := range w {
s.Walks[walk.ID] = walk
}
}
func (s *WalkStore) WalksVisiting(node graph.ID) []walks.Walk {
visiting := make([]walks.Walk, 0, walks.N)
for _, walk := range s.Walks {
if walk.Visits(node) {
visiting = append(visiting, walk)
}
}
return visiting
}
func (s *WalkStore) TotalVisits(ctx context.Context) (int, error) {
total := 0
for _, walk := range s.Walks {
total += walk.Len()
}
return total, nil
}
func (s *WalkStore) Visits(ctx context.Context, nodes ...graph.ID) ([]int, error) {
if len(nodes) == 0 {
return nil, nil
}
count := make(map[graph.ID]int, len(nodes))
for _, walk := range s.Walks {
for _, node := range walk.Path {
count[node]++
}
}
visits := make([]int, len(nodes))
for i, node := range nodes {
visits[i] = count[node]
}
return visits, nil
}
// Distance returns the L1 distance between two lists of ranks.
func Distance(r1, r2 []float64) float64 {
if len(r1) != len(r2) {
return math.MaxFloat64
}
var dist float64 = 0
for i := range r1 {
dist += math.Abs(r1[i] - r2[i])
}
return dist
}