mirror of
https://github.com/aljazceru/crawler_v2.git
synced 2025-12-17 07:24:21 +01:00
tested personalized pagerank
This commit is contained in:
@@ -93,7 +93,7 @@ func PersonalizedWithTargets(
|
||||
/*
|
||||
Personalized computes the personalized pagerank of node by simulating a
|
||||
long random walk starting at and resetting to itself. This long walk is generated
|
||||
from the random walks stored in the storage layer.
|
||||
using the random walks in the storage layer whenever possible.
|
||||
|
||||
# REFERENCES
|
||||
|
||||
|
||||
@@ -40,15 +40,15 @@ func TestPagerankStatic(t *testing.T) {
|
||||
}
|
||||
store.AddWalks(walks)
|
||||
|
||||
ranks, err := pagerank.Global(ctx, store, test.nodes...)
|
||||
global, err := pagerank.Global(ctx, store, test.nodes...)
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil, pr %v", err)
|
||||
}
|
||||
|
||||
distance := Distance(ranks, test.ranks)
|
||||
distance := Distance(global, test.global)
|
||||
if distance > expectedDistance {
|
||||
t.Errorf("expected distance %f, got %f\n", expectedDistance, distance)
|
||||
t.Errorf("expected ranks %v, got %v", test.ranks, ranks)
|
||||
t.Errorf("expected ranks %v,\n got %v", test.global, global)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -96,7 +96,7 @@ func TestPagerankDynamic(t *testing.T) {
|
||||
}
|
||||
|
||||
store.AddWalks(rwalks)
|
||||
rwalks = store.WalksVisiting(delta.Node)
|
||||
rwalks = store.WalksVisiting(delta.Node, -1)
|
||||
|
||||
// apply the opposite delta, returning to the original state
|
||||
inv := delta.Inverse()
|
||||
@@ -108,15 +108,65 @@ func TestPagerankDynamic(t *testing.T) {
|
||||
}
|
||||
store.ReplaceWalks(toUpdate)
|
||||
|
||||
ranks, err := pagerank.Global(ctx, store, test.nodes...)
|
||||
global, err := pagerank.Global(ctx, store, test.nodes...)
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil, pr %v", err)
|
||||
}
|
||||
|
||||
distance := Distance(ranks, test.ranks)
|
||||
distance := Distance(global, test.global)
|
||||
if distance > expectedDistance {
|
||||
t.Errorf("inverse delta %v; expected distance %f, got %f\n", inv, expectedDistance, distance)
|
||||
t.Errorf("expected ranks %v,\n got %v", test.ranks, ranks)
|
||||
t.Errorf("expected ranks %v,\n got %v", test.global, global)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestPersonalized(t *testing.T) {
|
||||
expectedDistance := 0.01
|
||||
targetLenght := 1000000
|
||||
walks.Alpha = 0.85
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
Setup
|
||||
}{
|
||||
{name: "all dandling nodes", Setup: Dandlings(11)},
|
||||
{name: "long cycle", Setup: Cyclic(50)},
|
||||
{name: "acyclic graph 1", Setup: Acyclic1},
|
||||
{name: "acyclic graph 2", Setup: Acyclic2},
|
||||
{name: "acyclic graph 3", Setup: Acyclic3},
|
||||
{name: "acyclic graph 4", Setup: Acyclic4},
|
||||
{name: "acyclic graph 5", Setup: Acyclic5},
|
||||
{name: "acyclic graph 6", Setup: Acyclic6},
|
||||
{name: "acyclic graph 7", Setup: Acyclic7},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
// the number of walks should only make the algorithm faster,
|
||||
// without changing its precision. To test this we simply randomize it
|
||||
walks.N = rand.IntN(3000)
|
||||
|
||||
ctx := context.Background()
|
||||
loader := NewMockLoader(test.walker)
|
||||
|
||||
rwalks, err := walks.Generate(ctx, test.walker, test.nodes...)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to generate the walks: %v", err)
|
||||
}
|
||||
loader.AddWalks(rwalks)
|
||||
|
||||
personalized, err := pagerank.PersonalizedWithTargets(ctx, loader, "0", test.nodes, targetLenght)
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil, pr %v", err)
|
||||
}
|
||||
|
||||
distance := Distance(personalized, test.personalized)
|
||||
if distance > expectedDistance {
|
||||
t.Errorf("expected distance %f, got %f\n", expectedDistance, distance)
|
||||
t.Errorf("walks per node %d", walks.N)
|
||||
t.Errorf("expected ranks %v,\n got %v", test.personalized, personalized)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
"github/pippellia-btc/crawler/pkg/graph"
|
||||
"github/pippellia-btc/crawler/pkg/walks"
|
||||
"math"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
@@ -37,9 +36,17 @@ func (s *WalkStore) ReplaceWalks(w []walks.Walk) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *WalkStore) WalksVisiting(node graph.ID) []walks.Walk {
|
||||
func (s *WalkStore) WalksVisiting(node graph.ID, limit int) []walks.Walk {
|
||||
if limit == -1 {
|
||||
limit = 1000000
|
||||
}
|
||||
|
||||
visiting := make([]walks.Walk, 0, walks.N)
|
||||
for _, walk := range s.Walks {
|
||||
if len(visiting) >= limit {
|
||||
break
|
||||
}
|
||||
|
||||
if walk.Visits(node) {
|
||||
visiting = append(visiting, walk)
|
||||
}
|
||||
@@ -76,16 +83,58 @@ func (s *WalkStore) Visits(ctx context.Context, nodes ...graph.ID) ([]int, error
|
||||
return visits, nil
|
||||
}
|
||||
|
||||
// Distance returns the L1 distance between two lists of ranks.
|
||||
func Distance(r1, r2 []float64) float64 {
|
||||
if len(r1) != len(r2) {
|
||||
return math.MaxFloat64
|
||||
type mockLoader struct {
|
||||
walker walks.Walker
|
||||
store *WalkStore
|
||||
}
|
||||
|
||||
var dist float64 = 0
|
||||
for i := range r1 {
|
||||
dist += math.Abs(r1[i] - r2[i])
|
||||
func NewMockLoader(walker walks.Walker) *mockLoader {
|
||||
return &mockLoader{
|
||||
walker: walker,
|
||||
store: NewWalkStore(),
|
||||
}
|
||||
}
|
||||
|
||||
return dist
|
||||
func (l *mockLoader) Follows(ctx context.Context, node graph.ID) ([]graph.ID, error) {
|
||||
return l.walker.Follows(ctx, node)
|
||||
}
|
||||
|
||||
func (l *mockLoader) BulkFollows(ctx context.Context, nodes []graph.ID) (map[graph.ID][]graph.ID, error) {
|
||||
followsMap := make(map[graph.ID][]graph.ID, len(nodes))
|
||||
for _, node := range nodes {
|
||||
follows, err := l.walker.Follows(ctx, node)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
followsMap[node] = follows
|
||||
}
|
||||
|
||||
return followsMap, nil
|
||||
}
|
||||
|
||||
func (l *mockLoader) AddWalks(w []walks.Walk) {
|
||||
l.store.AddWalks(w)
|
||||
}
|
||||
|
||||
func (l *mockLoader) WalksVisitingAny(ctx context.Context, nodes []graph.ID, limit int) ([]walks.Walk, error) {
|
||||
if len(nodes) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if limit == -1 {
|
||||
limit = 1000000
|
||||
}
|
||||
|
||||
limitPerNode := limit / len(nodes)
|
||||
if limitPerNode <= 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
visiting := make([]walks.Walk, 0, limit)
|
||||
for _, node := range nodes {
|
||||
visiting = append(visiting, l.store.WalksVisiting(node, limitPerNode)...)
|
||||
}
|
||||
|
||||
return visiting, nil
|
||||
}
|
||||
|
||||
@@ -3,20 +3,40 @@ package random_test
|
||||
import (
|
||||
"github/pippellia-btc/crawler/pkg/graph"
|
||||
"github/pippellia-btc/crawler/pkg/walks"
|
||||
"math"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type Setup struct {
|
||||
walker *walks.MapWalker
|
||||
nodes []graph.ID
|
||||
ranks []float64
|
||||
deltas []graph.Delta
|
||||
|
||||
nodes []graph.ID
|
||||
global []float64
|
||||
personalized []float64 // according to node "0"
|
||||
}
|
||||
|
||||
// Distance returns the L1 distance between two lists of ranks.
|
||||
func Distance(r1, r2 []float64) float64 {
|
||||
if len(r1) != len(r2) {
|
||||
return math.MaxFloat64
|
||||
}
|
||||
|
||||
var dist float64 = 0
|
||||
for i := range r1 {
|
||||
dist += math.Abs(r1[i] - r2[i])
|
||||
}
|
||||
|
||||
return dist
|
||||
}
|
||||
|
||||
// Dandlings returns a setup consisting of n dandling nodes
|
||||
func Dandlings(n int) Setup {
|
||||
nodes := make([]graph.ID, n)
|
||||
ranks := make([]float64, n)
|
||||
global := make([]float64, n)
|
||||
|
||||
personalized := make([]float64, n)
|
||||
personalized[0] = 1
|
||||
|
||||
added := make([]graph.ID, 0, n-1)
|
||||
deltas := make([]graph.Delta, 0, n-1)
|
||||
@@ -24,7 +44,7 @@ func Dandlings(n int) Setup {
|
||||
for i := range n {
|
||||
node := graph.ID(strconv.Itoa(i))
|
||||
nodes[i] = node
|
||||
ranks[i] = 1.0 / float64(n)
|
||||
global[i] = 1.0 / float64(n)
|
||||
|
||||
if i > 0 {
|
||||
// all the possible deltas modulo graph isomorphism; 0 --> [1,2, ... k] for 1 <= k <= n
|
||||
@@ -35,9 +55,10 @@ func Dandlings(n int) Setup {
|
||||
|
||||
return Setup{
|
||||
walker: walks.NewWalker(make(map[graph.ID][]graph.ID)),
|
||||
nodes: nodes,
|
||||
ranks: ranks,
|
||||
deltas: deltas,
|
||||
nodes: nodes,
|
||||
global: global,
|
||||
personalized: personalized,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,22 +66,27 @@ func Dandlings(n int) Setup {
|
||||
func Cyclic(n int) Setup {
|
||||
mid := graph.ID(strconv.Itoa(n / 2))
|
||||
nodes := make([]graph.ID, n)
|
||||
ranks := make([]float64, n)
|
||||
global := make([]float64, n)
|
||||
personalized := make([]float64, n)
|
||||
a := walks.Alpha
|
||||
|
||||
for i := range n {
|
||||
nodes[i] = graph.ID(strconv.Itoa(i))
|
||||
ranks[i] = 1.0 / float64(n)
|
||||
global[i] = 1.0 / float64(n)
|
||||
personalized[i] = math.Pow(a, float64(i)) * (1.0 - a) / (1.0 - math.Pow(a, float64(n)))
|
||||
}
|
||||
|
||||
return Setup{
|
||||
walker: walks.NewCyclicWalker(n),
|
||||
nodes: nodes,
|
||||
ranks: ranks,
|
||||
deltas: []graph.Delta{
|
||||
{Node: "0", Removed: []graph.ID{"1"}},
|
||||
{Node: "0", Common: []graph.ID{"1"}, Added: []graph.ID{mid}},
|
||||
{Node: "0", Removed: []graph.ID{"1"}, Added: []graph.ID{mid}},
|
||||
},
|
||||
|
||||
nodes: nodes,
|
||||
global: global,
|
||||
personalized: personalized,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,8 +100,6 @@ var Acyclic1 = Setup{
|
||||
"3": {"1"},
|
||||
"4": {},
|
||||
}),
|
||||
nodes: []graph.ID{"0", "1", "2", "3", "4"},
|
||||
ranks: []float64{0.11185, 0.36950, 0.15943, 0.24736, 0.11185},
|
||||
deltas: []graph.Delta{
|
||||
// removals
|
||||
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}},
|
||||
@@ -106,6 +130,9 @@ var Acyclic1 = Setup{
|
||||
{Node: "2", Removed: []graph.ID{"3"}, Added: []graph.ID{"4"}},
|
||||
{Node: "2", Removed: []graph.ID{"3"}, Added: []graph.ID{"1", "4"}},
|
||||
},
|
||||
nodes: []graph.ID{"0", "1", "2", "3", "4"},
|
||||
global: []float64{0.11185, 0.36950, 0.15943, 0.24736, 0.11185},
|
||||
personalized: []float64{0.39709, 0.29070, 0.16876, 0.14345, 0.0},
|
||||
}
|
||||
|
||||
var Acyclic2 = Setup{
|
||||
@@ -117,8 +144,6 @@ var Acyclic2 = Setup{
|
||||
"4": {"3", "5"},
|
||||
"5": {},
|
||||
}),
|
||||
nodes: []graph.ID{"0", "1", "2", "3", "4", "5"},
|
||||
ranks: []float64{0.12987, 0.18506, 0.18506, 0.18506, 0.12987, 0.18506},
|
||||
deltas: []graph.Delta{
|
||||
// removals
|
||||
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}},
|
||||
@@ -136,6 +161,9 @@ var Acyclic2 = Setup{
|
||||
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}, Added: []graph.ID{"3", "5"}},
|
||||
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}, Added: []graph.ID{"3", "4", "5"}},
|
||||
},
|
||||
nodes: []graph.ID{"0", "1", "2", "3", "4", "5"},
|
||||
global: []float64{0.12987, 0.18506, 0.18506, 0.18506, 0.12987, 0.18506},
|
||||
personalized: []float64{0.54054, 0.22973, 0.22973, 0.0, 0.0, 0.0},
|
||||
}
|
||||
|
||||
var Acyclic3 = Setup{
|
||||
@@ -145,8 +173,6 @@ var Acyclic3 = Setup{
|
||||
"2": {},
|
||||
"3": {"1", "2"},
|
||||
}),
|
||||
nodes: []graph.ID{"0", "1", "2", "3"},
|
||||
ranks: []float64{0.17544, 0.32456, 0.32456, 0.17544},
|
||||
deltas: []graph.Delta{
|
||||
// removals
|
||||
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}},
|
||||
@@ -158,6 +184,9 @@ var Acyclic3 = Setup{
|
||||
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}, Added: []graph.ID{"3"}},
|
||||
{Node: "0", Removed: []graph.ID{"1", "2"}, Added: []graph.ID{"3"}},
|
||||
},
|
||||
nodes: []graph.ID{"0", "1", "2", "3"},
|
||||
global: []float64{0.17544, 0.32456, 0.32456, 0.17544},
|
||||
personalized: []float64{0.54054, 0.22973, 0.22973, 0.0},
|
||||
}
|
||||
|
||||
var Acyclic4 = Setup{
|
||||
@@ -167,8 +196,6 @@ var Acyclic4 = Setup{
|
||||
"2": {},
|
||||
"3": {"1"},
|
||||
}),
|
||||
nodes: []graph.ID{"0", "1", "2", "3"},
|
||||
ranks: []float64{0.17544, 0.39912, 0.25, 0.17544},
|
||||
deltas: []graph.Delta{
|
||||
// removals
|
||||
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}},
|
||||
@@ -185,6 +212,9 @@ var Acyclic4 = Setup{
|
||||
{Node: "3", Removed: []graph.ID{"1"}, Added: []graph.ID{"0"}},
|
||||
{Node: "3", Removed: []graph.ID{"1"}, Added: []graph.ID{"0", "2"}},
|
||||
},
|
||||
nodes: []graph.ID{"0", "1", "2", "3"},
|
||||
global: []float64{0.17544, 0.39912, 0.25, 0.17544},
|
||||
personalized: []float64{0.54054, 0.22973, 0.22973, 0.0},
|
||||
}
|
||||
|
||||
var Acyclic5 = Setup{
|
||||
@@ -194,8 +224,6 @@ var Acyclic5 = Setup{
|
||||
"2": {},
|
||||
"3": {"2"},
|
||||
}),
|
||||
nodes: []graph.ID{"0", "1", "2", "3"},
|
||||
ranks: []float64{0.21489, 0.11616, 0.37015, 0.29881},
|
||||
deltas: []graph.Delta{
|
||||
// removals
|
||||
{Node: "0", Removed: []graph.ID{"3"}},
|
||||
@@ -212,6 +240,9 @@ var Acyclic5 = Setup{
|
||||
{Node: "1", Removed: []graph.ID{"0"}, Added: []graph.ID{"3"}},
|
||||
{Node: "1", Removed: []graph.ID{"0"}, Added: []graph.ID{"2", "3"}},
|
||||
},
|
||||
nodes: []graph.ID{"0", "1", "2", "3"},
|
||||
global: []float64{0.21489, 0.11616, 0.37015, 0.29881},
|
||||
personalized: []float64{0.38873, 0.0, 0.28085, 0.33042},
|
||||
}
|
||||
|
||||
var Acyclic6 = Setup{
|
||||
@@ -222,8 +253,6 @@ var Acyclic6 = Setup{
|
||||
"3": {"1", "4"},
|
||||
"4": {"2"},
|
||||
}),
|
||||
nodes: []graph.ID{"0", "1", "2", "3", "4"},
|
||||
ranks: []float64{0.18820, 0.12128, 0.32417, 0.08511, 0.28125},
|
||||
deltas: []graph.Delta{
|
||||
// removals
|
||||
{Node: "0", Removed: []graph.ID{"4"}},
|
||||
@@ -255,6 +284,9 @@ var Acyclic6 = Setup{
|
||||
{Node: "3", Removed: []graph.ID{"1", "4"}, Added: []graph.ID{"2"}},
|
||||
{Node: "3", Removed: []graph.ID{"1", "4"}, Added: []graph.ID{"0", "2"}},
|
||||
},
|
||||
nodes: []graph.ID{"0", "1", "2", "3", "4"},
|
||||
global: []float64{0.18820, 0.12128, 0.32417, 0.08511, 0.28125},
|
||||
personalized: []float64{0.38873, 0.0, 0.28086, 0.0, 0.33042},
|
||||
}
|
||||
|
||||
var Acyclic7 = Setup{
|
||||
@@ -265,8 +297,6 @@ var Acyclic7 = Setup{
|
||||
"3": {},
|
||||
"4": {"0", "1", "2", "3"},
|
||||
}),
|
||||
nodes: []graph.ID{"0", "1", "2", "3", "4"},
|
||||
ranks: []float64{0.17622, 0.22615, 0.22615, 0.22615, 0.14534},
|
||||
deltas: []graph.Delta{
|
||||
// removals
|
||||
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2", "3"}},
|
||||
@@ -281,4 +311,7 @@ var Acyclic7 = Setup{
|
||||
{Node: "1", Added: []graph.ID{"2"}},
|
||||
{Node: "1", Added: []graph.ID{"2", "3"}},
|
||||
},
|
||||
nodes: []graph.ID{"0", "1", "2", "3", "4"},
|
||||
global: []float64{0.17622, 0.22615, 0.22615, 0.22615, 0.14534},
|
||||
personalized: []float64{0.54054, 0.15315, 0.15315, 0.15315, 0.0},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user