mirror of
https://github.com/aljazceru/crawler_v2.git
synced 2025-12-17 07:24:21 +01:00
rewrite and simplification
This commit is contained in:
49
pkg/pagerank/pagerank.go
Normal file
49
pkg/pagerank/pagerank.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package pagerank
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github/pippellia-btc/crawler/pkg/graph"
|
||||
)
|
||||
|
||||
var ErrEmptyWalkStore = errors.New("the walk store is empty")
|
||||
|
||||
type VisitCounter interface {
|
||||
// TotalVisits returns the total number of visits, which is the sum of the lengths of all walks.
|
||||
TotalVisits(ctx context.Context) (int, error)
|
||||
|
||||
// Visits returns the number of times each specified node was visited during the walks.
|
||||
// The returned slice contains counts in the same order as the input nodes.
|
||||
Visits(ctx context.Context, nodes ...graph.ID) ([]int, error)
|
||||
}
|
||||
|
||||
// Global computes the global pagerank score for the specified nodes.
|
||||
// If a node is not found, its pagerank is assumed to be 0.
|
||||
func Global(ctx context.Context, count VisitCounter, nodes ...graph.ID) ([]float64, error) {
|
||||
if len(nodes) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
total, err := count.TotalVisits(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Global: failed to get the visits total: %w", err)
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
return nil, ErrEmptyWalkStore
|
||||
}
|
||||
|
||||
visits, err := count.Visits(ctx, nodes...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Global: failed to get the nodes visits: %w", err)
|
||||
}
|
||||
|
||||
pageranks := make([]float64, len(visits))
|
||||
for i, v := range visits {
|
||||
pageranks[i] = float64(v) / float64(total)
|
||||
|
||||
}
|
||||
|
||||
return pageranks, nil
|
||||
}
|
||||
91
pkg/pagerank/utils.go
Normal file
91
pkg/pagerank/utils.go
Normal file
@@ -0,0 +1,91 @@
|
||||
package pagerank
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github/pippellia-btc/crawler/pkg/graph"
|
||||
"github/pippellia-btc/crawler/pkg/walks"
|
||||
"math"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type WalkStore struct {
|
||||
nextID int
|
||||
Walks map[walks.ID]walks.Walk
|
||||
}
|
||||
|
||||
func NewWalkStore() *WalkStore {
|
||||
return &WalkStore{
|
||||
Walks: make(map[walks.ID]walks.Walk, 1000),
|
||||
}
|
||||
}
|
||||
|
||||
// AddWalks adds walks with sequentials IDs
|
||||
func (s *WalkStore) AddWalks(w []walks.Walk) {
|
||||
for _, walk := range w {
|
||||
ID := walks.ID(strconv.Itoa(s.nextID))
|
||||
s.nextID++
|
||||
|
||||
walk.ID = ID
|
||||
s.Walks[ID] = walk
|
||||
}
|
||||
}
|
||||
|
||||
// ReplaceWalks reassigns the ID --> walk
|
||||
func (s *WalkStore) ReplaceWalks(w []walks.Walk) {
|
||||
for _, walk := range w {
|
||||
s.Walks[walk.ID] = walk
|
||||
}
|
||||
}
|
||||
|
||||
func (s *WalkStore) WalksVisiting(node graph.ID) []walks.Walk {
|
||||
visiting := make([]walks.Walk, 0, walks.N)
|
||||
for _, walk := range s.Walks {
|
||||
if walk.Visits(node) {
|
||||
visiting = append(visiting, walk)
|
||||
}
|
||||
}
|
||||
|
||||
return visiting
|
||||
}
|
||||
|
||||
func (s *WalkStore) TotalVisits(ctx context.Context) (int, error) {
|
||||
total := 0
|
||||
for _, walk := range s.Walks {
|
||||
total += walk.Len()
|
||||
}
|
||||
return total, nil
|
||||
}
|
||||
|
||||
func (s *WalkStore) Visits(ctx context.Context, nodes ...graph.ID) ([]int, error) {
|
||||
if len(nodes) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
count := make(map[graph.ID]int, len(nodes))
|
||||
for _, walk := range s.Walks {
|
||||
for _, node := range walk.Path {
|
||||
count[node]++
|
||||
}
|
||||
}
|
||||
|
||||
visits := make([]int, len(nodes))
|
||||
for i, node := range nodes {
|
||||
visits[i] = count[node]
|
||||
}
|
||||
|
||||
return visits, nil
|
||||
}
|
||||
|
||||
// Distance returns the L1 distance between two lists of ranks.
|
||||
func Distance(r1, r2 []float64) float64 {
|
||||
if len(r1) != len(r2) {
|
||||
return math.MaxFloat64
|
||||
}
|
||||
|
||||
var dist float64 = 0
|
||||
for i := range r1 {
|
||||
dist += math.Abs(r1[i] - r2[i])
|
||||
}
|
||||
|
||||
return dist
|
||||
}
|
||||
Reference in New Issue
Block a user