tested personalized pagerank

This commit is contained in:
pippellia-btc
2025-05-26 12:12:12 +02:00
parent 0076999e74
commit 11c5afd4f7
4 changed files with 180 additions and 48 deletions

View File

@@ -93,7 +93,7 @@ func PersonalizedWithTargets(
/* /*
Personalized computes the personalized pagerank of node by simulating a Personalized computes the personalized pagerank of node by simulating a
long random walk starting at and resetting to itself. This long walk is generated long random walk starting at and resetting to itself. This long walk is generated
from the random walks stored in the storage layer. using the random walks in the storage layer whenever possible.
# REFERENCES # REFERENCES

View File

@@ -40,15 +40,15 @@ func TestPagerankStatic(t *testing.T) {
} }
store.AddWalks(walks) store.AddWalks(walks)
ranks, err := pagerank.Global(ctx, store, test.nodes...) global, err := pagerank.Global(ctx, store, test.nodes...)
if err != nil { if err != nil {
t.Fatalf("expected nil, pr %v", err) t.Fatalf("expected nil, pr %v", err)
} }
distance := Distance(ranks, test.ranks) distance := Distance(global, test.global)
if distance > expectedDistance { if distance > expectedDistance {
t.Errorf("expected distance %f, got %f\n", expectedDistance, distance) t.Errorf("expected distance %f, got %f\n", expectedDistance, distance)
t.Errorf("expected ranks %v, got %v", test.ranks, ranks) t.Errorf("expected ranks %v,\n got %v", test.global, global)
} }
}) })
} }
@@ -96,7 +96,7 @@ func TestPagerankDynamic(t *testing.T) {
} }
store.AddWalks(rwalks) store.AddWalks(rwalks)
rwalks = store.WalksVisiting(delta.Node) rwalks = store.WalksVisiting(delta.Node, -1)
// apply the opposite delta, returning to the original state // apply the opposite delta, returning to the original state
inv := delta.Inverse() inv := delta.Inverse()
@@ -108,15 +108,65 @@ func TestPagerankDynamic(t *testing.T) {
} }
store.ReplaceWalks(toUpdate) store.ReplaceWalks(toUpdate)
ranks, err := pagerank.Global(ctx, store, test.nodes...) global, err := pagerank.Global(ctx, store, test.nodes...)
if err != nil { if err != nil {
t.Fatalf("expected nil, pr %v", err) t.Fatalf("expected nil, pr %v", err)
} }
distance := Distance(ranks, test.ranks) distance := Distance(global, test.global)
if distance > expectedDistance { if distance > expectedDistance {
t.Errorf("inverse delta %v; expected distance %f, got %f\n", inv, expectedDistance, distance) t.Errorf("inverse delta %v; expected distance %f, got %f\n", inv, expectedDistance, distance)
t.Errorf("expected ranks %v,\n got %v", test.ranks, ranks) t.Errorf("expected ranks %v,\n got %v", test.global, global)
}
})
}
}
func TestPersonalized(t *testing.T) {
expectedDistance := 0.01
targetLenght := 1000000
walks.Alpha = 0.85
tests := []struct {
name string
Setup
}{
{name: "all dandling nodes", Setup: Dandlings(11)},
{name: "long cycle", Setup: Cyclic(50)},
{name: "acyclic graph 1", Setup: Acyclic1},
{name: "acyclic graph 2", Setup: Acyclic2},
{name: "acyclic graph 3", Setup: Acyclic3},
{name: "acyclic graph 4", Setup: Acyclic4},
{name: "acyclic graph 5", Setup: Acyclic5},
{name: "acyclic graph 6", Setup: Acyclic6},
{name: "acyclic graph 7", Setup: Acyclic7},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
// the number of walks should only make the algorithm faster,
// without changing its precision. To test this we simply randomize it
walks.N = rand.IntN(3000)
ctx := context.Background()
loader := NewMockLoader(test.walker)
rwalks, err := walks.Generate(ctx, test.walker, test.nodes...)
if err != nil {
t.Fatalf("failed to generate the walks: %v", err)
}
loader.AddWalks(rwalks)
personalized, err := pagerank.PersonalizedWithTargets(ctx, loader, "0", test.nodes, targetLenght)
if err != nil {
t.Fatalf("expected nil, pr %v", err)
}
distance := Distance(personalized, test.personalized)
if distance > expectedDistance {
t.Errorf("expected distance %f, got %f\n", expectedDistance, distance)
t.Errorf("walks per node %d", walks.N)
t.Errorf("expected ranks %v,\n got %v", test.personalized, personalized)
} }
}) })
} }

View File

@@ -4,7 +4,6 @@ import (
"context" "context"
"github/pippellia-btc/crawler/pkg/graph" "github/pippellia-btc/crawler/pkg/graph"
"github/pippellia-btc/crawler/pkg/walks" "github/pippellia-btc/crawler/pkg/walks"
"math"
"strconv" "strconv"
) )
@@ -37,9 +36,17 @@ func (s *WalkStore) ReplaceWalks(w []walks.Walk) {
} }
} }
func (s *WalkStore) WalksVisiting(node graph.ID) []walks.Walk { func (s *WalkStore) WalksVisiting(node graph.ID, limit int) []walks.Walk {
if limit == -1 {
limit = 1000000
}
visiting := make([]walks.Walk, 0, walks.N) visiting := make([]walks.Walk, 0, walks.N)
for _, walk := range s.Walks { for _, walk := range s.Walks {
if len(visiting) >= limit {
break
}
if walk.Visits(node) { if walk.Visits(node) {
visiting = append(visiting, walk) visiting = append(visiting, walk)
} }
@@ -76,16 +83,58 @@ func (s *WalkStore) Visits(ctx context.Context, nodes ...graph.ID) ([]int, error
return visits, nil return visits, nil
} }
// Distance returns the L1 distance between two lists of ranks. type mockLoader struct {
func Distance(r1, r2 []float64) float64 { walker walks.Walker
if len(r1) != len(r2) { store *WalkStore
return math.MaxFloat64 }
}
func NewMockLoader(walker walks.Walker) *mockLoader {
var dist float64 = 0 return &mockLoader{
for i := range r1 { walker: walker,
dist += math.Abs(r1[i] - r2[i]) store: NewWalkStore(),
} }
}
return dist
func (l *mockLoader) Follows(ctx context.Context, node graph.ID) ([]graph.ID, error) {
return l.walker.Follows(ctx, node)
}
func (l *mockLoader) BulkFollows(ctx context.Context, nodes []graph.ID) (map[graph.ID][]graph.ID, error) {
followsMap := make(map[graph.ID][]graph.ID, len(nodes))
for _, node := range nodes {
follows, err := l.walker.Follows(ctx, node)
if err != nil {
return nil, err
}
followsMap[node] = follows
}
return followsMap, nil
}
func (l *mockLoader) AddWalks(w []walks.Walk) {
l.store.AddWalks(w)
}
func (l *mockLoader) WalksVisitingAny(ctx context.Context, nodes []graph.ID, limit int) ([]walks.Walk, error) {
if len(nodes) == 0 {
return nil, nil
}
if limit == -1 {
limit = 1000000
}
limitPerNode := limit / len(nodes)
if limitPerNode <= 0 {
return nil, nil
}
visiting := make([]walks.Walk, 0, limit)
for _, node := range nodes {
visiting = append(visiting, l.store.WalksVisiting(node, limitPerNode)...)
}
return visiting, nil
} }

View File

@@ -3,20 +3,40 @@ package random_test
import ( import (
"github/pippellia-btc/crawler/pkg/graph" "github/pippellia-btc/crawler/pkg/graph"
"github/pippellia-btc/crawler/pkg/walks" "github/pippellia-btc/crawler/pkg/walks"
"math"
"strconv" "strconv"
) )
type Setup struct { type Setup struct {
walker *walks.MapWalker walker *walks.MapWalker
nodes []graph.ID
ranks []float64
deltas []graph.Delta deltas []graph.Delta
nodes []graph.ID
global []float64
personalized []float64 // according to node "0"
}
// Distance returns the L1 distance between two lists of ranks.
func Distance(r1, r2 []float64) float64 {
if len(r1) != len(r2) {
return math.MaxFloat64
}
var dist float64 = 0
for i := range r1 {
dist += math.Abs(r1[i] - r2[i])
}
return dist
} }
// Dandlings returns a setup consisting of n dandling nodes // Dandlings returns a setup consisting of n dandling nodes
func Dandlings(n int) Setup { func Dandlings(n int) Setup {
nodes := make([]graph.ID, n) nodes := make([]graph.ID, n)
ranks := make([]float64, n) global := make([]float64, n)
personalized := make([]float64, n)
personalized[0] = 1
added := make([]graph.ID, 0, n-1) added := make([]graph.ID, 0, n-1)
deltas := make([]graph.Delta, 0, n-1) deltas := make([]graph.Delta, 0, n-1)
@@ -24,7 +44,7 @@ func Dandlings(n int) Setup {
for i := range n { for i := range n {
node := graph.ID(strconv.Itoa(i)) node := graph.ID(strconv.Itoa(i))
nodes[i] = node nodes[i] = node
ranks[i] = 1.0 / float64(n) global[i] = 1.0 / float64(n)
if i > 0 { if i > 0 {
// all the possible deltas modulo graph isomorphism; 0 --> [1,2, ... k] for 1 <= k <= n // all the possible deltas modulo graph isomorphism; 0 --> [1,2, ... k] for 1 <= k <= n
@@ -34,10 +54,11 @@ func Dandlings(n int) Setup {
} }
return Setup{ return Setup{
walker: walks.NewWalker(make(map[graph.ID][]graph.ID)), walker: walks.NewWalker(make(map[graph.ID][]graph.ID)),
nodes: nodes, deltas: deltas,
ranks: ranks, nodes: nodes,
deltas: deltas, global: global,
personalized: personalized,
} }
} }
@@ -45,22 +66,27 @@ func Dandlings(n int) Setup {
func Cyclic(n int) Setup { func Cyclic(n int) Setup {
mid := graph.ID(strconv.Itoa(n / 2)) mid := graph.ID(strconv.Itoa(n / 2))
nodes := make([]graph.ID, n) nodes := make([]graph.ID, n)
ranks := make([]float64, n) global := make([]float64, n)
personalized := make([]float64, n)
a := walks.Alpha
for i := range n { for i := range n {
nodes[i] = graph.ID(strconv.Itoa(i)) nodes[i] = graph.ID(strconv.Itoa(i))
ranks[i] = 1.0 / float64(n) global[i] = 1.0 / float64(n)
personalized[i] = math.Pow(a, float64(i)) * (1.0 - a) / (1.0 - math.Pow(a, float64(n)))
} }
return Setup{ return Setup{
walker: walks.NewCyclicWalker(n), walker: walks.NewCyclicWalker(n),
nodes: nodes,
ranks: ranks,
deltas: []graph.Delta{ deltas: []graph.Delta{
{Node: "0", Removed: []graph.ID{"1"}}, {Node: "0", Removed: []graph.ID{"1"}},
{Node: "0", Common: []graph.ID{"1"}, Added: []graph.ID{mid}}, {Node: "0", Common: []graph.ID{"1"}, Added: []graph.ID{mid}},
{Node: "0", Removed: []graph.ID{"1"}, Added: []graph.ID{mid}}, {Node: "0", Removed: []graph.ID{"1"}, Added: []graph.ID{mid}},
}, },
nodes: nodes,
global: global,
personalized: personalized,
} }
} }
@@ -74,8 +100,6 @@ var Acyclic1 = Setup{
"3": {"1"}, "3": {"1"},
"4": {}, "4": {},
}), }),
nodes: []graph.ID{"0", "1", "2", "3", "4"},
ranks: []float64{0.11185, 0.36950, 0.15943, 0.24736, 0.11185},
deltas: []graph.Delta{ deltas: []graph.Delta{
// removals // removals
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}}, {Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}},
@@ -106,6 +130,9 @@ var Acyclic1 = Setup{
{Node: "2", Removed: []graph.ID{"3"}, Added: []graph.ID{"4"}}, {Node: "2", Removed: []graph.ID{"3"}, Added: []graph.ID{"4"}},
{Node: "2", Removed: []graph.ID{"3"}, Added: []graph.ID{"1", "4"}}, {Node: "2", Removed: []graph.ID{"3"}, Added: []graph.ID{"1", "4"}},
}, },
nodes: []graph.ID{"0", "1", "2", "3", "4"},
global: []float64{0.11185, 0.36950, 0.15943, 0.24736, 0.11185},
personalized: []float64{0.39709, 0.29070, 0.16876, 0.14345, 0.0},
} }
var Acyclic2 = Setup{ var Acyclic2 = Setup{
@@ -117,8 +144,6 @@ var Acyclic2 = Setup{
"4": {"3", "5"}, "4": {"3", "5"},
"5": {}, "5": {},
}), }),
nodes: []graph.ID{"0", "1", "2", "3", "4", "5"},
ranks: []float64{0.12987, 0.18506, 0.18506, 0.18506, 0.12987, 0.18506},
deltas: []graph.Delta{ deltas: []graph.Delta{
// removals // removals
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}}, {Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}},
@@ -136,6 +161,9 @@ var Acyclic2 = Setup{
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}, Added: []graph.ID{"3", "5"}}, {Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}, Added: []graph.ID{"3", "5"}},
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}, Added: []graph.ID{"3", "4", "5"}}, {Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}, Added: []graph.ID{"3", "4", "5"}},
}, },
nodes: []graph.ID{"0", "1", "2", "3", "4", "5"},
global: []float64{0.12987, 0.18506, 0.18506, 0.18506, 0.12987, 0.18506},
personalized: []float64{0.54054, 0.22973, 0.22973, 0.0, 0.0, 0.0},
} }
var Acyclic3 = Setup{ var Acyclic3 = Setup{
@@ -145,8 +173,6 @@ var Acyclic3 = Setup{
"2": {}, "2": {},
"3": {"1", "2"}, "3": {"1", "2"},
}), }),
nodes: []graph.ID{"0", "1", "2", "3"},
ranks: []float64{0.17544, 0.32456, 0.32456, 0.17544},
deltas: []graph.Delta{ deltas: []graph.Delta{
// removals // removals
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}}, {Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}},
@@ -158,6 +184,9 @@ var Acyclic3 = Setup{
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}, Added: []graph.ID{"3"}}, {Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}, Added: []graph.ID{"3"}},
{Node: "0", Removed: []graph.ID{"1", "2"}, Added: []graph.ID{"3"}}, {Node: "0", Removed: []graph.ID{"1", "2"}, Added: []graph.ID{"3"}},
}, },
nodes: []graph.ID{"0", "1", "2", "3"},
global: []float64{0.17544, 0.32456, 0.32456, 0.17544},
personalized: []float64{0.54054, 0.22973, 0.22973, 0.0},
} }
var Acyclic4 = Setup{ var Acyclic4 = Setup{
@@ -167,8 +196,6 @@ var Acyclic4 = Setup{
"2": {}, "2": {},
"3": {"1"}, "3": {"1"},
}), }),
nodes: []graph.ID{"0", "1", "2", "3"},
ranks: []float64{0.17544, 0.39912, 0.25, 0.17544},
deltas: []graph.Delta{ deltas: []graph.Delta{
// removals // removals
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}}, {Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2"}},
@@ -185,6 +212,9 @@ var Acyclic4 = Setup{
{Node: "3", Removed: []graph.ID{"1"}, Added: []graph.ID{"0"}}, {Node: "3", Removed: []graph.ID{"1"}, Added: []graph.ID{"0"}},
{Node: "3", Removed: []graph.ID{"1"}, Added: []graph.ID{"0", "2"}}, {Node: "3", Removed: []graph.ID{"1"}, Added: []graph.ID{"0", "2"}},
}, },
nodes: []graph.ID{"0", "1", "2", "3"},
global: []float64{0.17544, 0.39912, 0.25, 0.17544},
personalized: []float64{0.54054, 0.22973, 0.22973, 0.0},
} }
var Acyclic5 = Setup{ var Acyclic5 = Setup{
@@ -194,8 +224,6 @@ var Acyclic5 = Setup{
"2": {}, "2": {},
"3": {"2"}, "3": {"2"},
}), }),
nodes: []graph.ID{"0", "1", "2", "3"},
ranks: []float64{0.21489, 0.11616, 0.37015, 0.29881},
deltas: []graph.Delta{ deltas: []graph.Delta{
// removals // removals
{Node: "0", Removed: []graph.ID{"3"}}, {Node: "0", Removed: []graph.ID{"3"}},
@@ -212,6 +240,9 @@ var Acyclic5 = Setup{
{Node: "1", Removed: []graph.ID{"0"}, Added: []graph.ID{"3"}}, {Node: "1", Removed: []graph.ID{"0"}, Added: []graph.ID{"3"}},
{Node: "1", Removed: []graph.ID{"0"}, Added: []graph.ID{"2", "3"}}, {Node: "1", Removed: []graph.ID{"0"}, Added: []graph.ID{"2", "3"}},
}, },
nodes: []graph.ID{"0", "1", "2", "3"},
global: []float64{0.21489, 0.11616, 0.37015, 0.29881},
personalized: []float64{0.38873, 0.0, 0.28085, 0.33042},
} }
var Acyclic6 = Setup{ var Acyclic6 = Setup{
@@ -222,8 +253,6 @@ var Acyclic6 = Setup{
"3": {"1", "4"}, "3": {"1", "4"},
"4": {"2"}, "4": {"2"},
}), }),
nodes: []graph.ID{"0", "1", "2", "3", "4"},
ranks: []float64{0.18820, 0.12128, 0.32417, 0.08511, 0.28125},
deltas: []graph.Delta{ deltas: []graph.Delta{
// removals // removals
{Node: "0", Removed: []graph.ID{"4"}}, {Node: "0", Removed: []graph.ID{"4"}},
@@ -255,6 +284,9 @@ var Acyclic6 = Setup{
{Node: "3", Removed: []graph.ID{"1", "4"}, Added: []graph.ID{"2"}}, {Node: "3", Removed: []graph.ID{"1", "4"}, Added: []graph.ID{"2"}},
{Node: "3", Removed: []graph.ID{"1", "4"}, Added: []graph.ID{"0", "2"}}, {Node: "3", Removed: []graph.ID{"1", "4"}, Added: []graph.ID{"0", "2"}},
}, },
nodes: []graph.ID{"0", "1", "2", "3", "4"},
global: []float64{0.18820, 0.12128, 0.32417, 0.08511, 0.28125},
personalized: []float64{0.38873, 0.0, 0.28086, 0.0, 0.33042},
} }
var Acyclic7 = Setup{ var Acyclic7 = Setup{
@@ -265,8 +297,6 @@ var Acyclic7 = Setup{
"3": {}, "3": {},
"4": {"0", "1", "2", "3"}, "4": {"0", "1", "2", "3"},
}), }),
nodes: []graph.ID{"0", "1", "2", "3", "4"},
ranks: []float64{0.17622, 0.22615, 0.22615, 0.22615, 0.14534},
deltas: []graph.Delta{ deltas: []graph.Delta{
// removals // removals
{Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2", "3"}}, {Node: "0", Removed: []graph.ID{"1"}, Common: []graph.ID{"2", "3"}},
@@ -281,4 +311,7 @@ var Acyclic7 = Setup{
{Node: "1", Added: []graph.ID{"2"}}, {Node: "1", Added: []graph.ID{"2"}},
{Node: "1", Added: []graph.ID{"2", "3"}}, {Node: "1", Added: []graph.ID{"2", "3"}},
}, },
nodes: []graph.ID{"0", "1", "2", "3", "4"},
global: []float64{0.17622, 0.22615, 0.22615, 0.22615, 0.14534},
personalized: []float64{0.54054, 0.15315, 0.15315, 0.15315, 0.0},
} }