mirror of
https://github.com/aljazceru/crawler_v2.git
synced 2025-12-17 07:24:21 +01:00
processor is here
This commit is contained in:
@@ -4,6 +4,9 @@ package graph
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math/rand/v2"
|
||||
"slices"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
@@ -64,6 +67,46 @@ type Delta struct {
|
||||
Add []ID
|
||||
}
|
||||
|
||||
// NewDelta returns a delta by computing the relationships to remove, keep and add.
|
||||
// Time complexity O(n * logn + m * logm), where n and m are the lengths of the slices.
|
||||
// This function is much faster than converting to sets for sizes (n, m) smaller than ~10^6.
|
||||
func NewDelta(kind int, node ID, old, new []ID) Delta {
|
||||
delta := Delta{
|
||||
Kind: kind,
|
||||
Node: node,
|
||||
}
|
||||
|
||||
slices.Sort(old)
|
||||
slices.Sort(new)
|
||||
i, j := 0, 0
|
||||
oldLen, newLen := len(old), len(new)
|
||||
|
||||
for i < oldLen && j < newLen {
|
||||
switch {
|
||||
case old[i] < new[j]:
|
||||
// ID is in old but not in new => remove
|
||||
delta.Remove = append(delta.Remove, old[i])
|
||||
i++
|
||||
|
||||
case old[i] > new[j]:
|
||||
// ID is in new but not in old => add
|
||||
delta.Add = append(delta.Add, new[j])
|
||||
j++
|
||||
|
||||
default:
|
||||
// ID is in both => keep
|
||||
delta.Keep = append(delta.Keep, old[i])
|
||||
i++
|
||||
j++
|
||||
}
|
||||
}
|
||||
|
||||
// add all elements not traversed
|
||||
delta.Remove = append(delta.Remove, old[i:]...)
|
||||
delta.Add = append(delta.Add, new[j:]...)
|
||||
return delta
|
||||
}
|
||||
|
||||
// Size returns the number of relationships changed by delta
|
||||
func (d Delta) Size() int {
|
||||
return len(d.Remove) + len(d.Add)
|
||||
@@ -89,3 +132,13 @@ func (d Delta) Inverse() Delta {
|
||||
Add: d.Remove,
|
||||
}
|
||||
}
|
||||
|
||||
// RandomIDs of the provided size.
|
||||
func RandomIDs(size int) []ID {
|
||||
IDs := make([]ID, size)
|
||||
for i := range size {
|
||||
node := rand.IntN(10000000)
|
||||
IDs[i] = ID(strconv.Itoa(node))
|
||||
}
|
||||
return IDs
|
||||
}
|
||||
|
||||
118
pkg/graph/graph_test.go
Normal file
118
pkg/graph/graph_test.go
Normal file
@@ -0,0 +1,118 @@
|
||||
package graph
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNewDelta(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
old []ID
|
||||
new []ID
|
||||
expected Delta
|
||||
}{
|
||||
{
|
||||
name: "nil slices",
|
||||
expected: Delta{Kind: 3, Node: "0"},
|
||||
},
|
||||
{
|
||||
name: "empty slices",
|
||||
expected: Delta{Kind: 3, Node: "0"},
|
||||
},
|
||||
{
|
||||
name: "only removals",
|
||||
old: []ID{"0", "1", "2", "19", "111"},
|
||||
new: []ID{"2", "19"},
|
||||
expected: Delta{Kind: 3, Node: "0", Remove: []ID{"0", "1", "111"}, Keep: []ID{"19", "2"}},
|
||||
},
|
||||
{
|
||||
name: "only additions",
|
||||
old: []ID{"0", "1"},
|
||||
new: []ID{"420", "0", "1", "69"},
|
||||
expected: Delta{Kind: 3, Node: "0", Keep: []ID{"0", "1"}, Add: []ID{"420", "69"}},
|
||||
},
|
||||
{
|
||||
name: "both additions",
|
||||
old: []ID{"0", "1", "111"},
|
||||
new: []ID{"420", "0", "1", "69"},
|
||||
expected: Delta{Kind: 3, Node: "0", Remove: []ID{"111"}, Keep: []ID{"0", "1"}, Add: []ID{"420", "69"}},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
delta := NewDelta(3, "0", test.old, test.new)
|
||||
if !reflect.DeepEqual(delta, test.expected) {
|
||||
t.Errorf("expected delta %v, got %v", test.expected, delta)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkNewDelta(b *testing.B) {
|
||||
sizes := []int{1000, 10000, 100000}
|
||||
for _, size := range sizes {
|
||||
b.Run(fmt.Sprintf("size=%d", size), func(b *testing.B) {
|
||||
old := RandomIDs(size)
|
||||
new := RandomIDs(size)
|
||||
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
NewDelta(3, "0", old, new)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkNewDeltaSets(b *testing.B) {
|
||||
sizes := []int{1000, 10000, 100000}
|
||||
for _, size := range sizes {
|
||||
b.Run(fmt.Sprintf("size=%d", size), func(b *testing.B) {
|
||||
old := RandomIDs(size)
|
||||
new := RandomIDs(size)
|
||||
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
newDeltaSet(3, "0", old, new)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func newDeltaSet(kind int, node ID, old, new []ID) Delta {
|
||||
delta := Delta{
|
||||
Kind: kind,
|
||||
Node: node,
|
||||
}
|
||||
|
||||
oldMap := make(map[ID]struct{}, len(old))
|
||||
newMap := make(map[ID]struct{}, len(new))
|
||||
|
||||
// Fill maps
|
||||
for _, id := range old {
|
||||
oldMap[id] = struct{}{}
|
||||
}
|
||||
for _, id := range new {
|
||||
newMap[id] = struct{}{}
|
||||
}
|
||||
|
||||
// Find removed and kept
|
||||
for _, id := range old {
|
||||
if _, found := newMap[id]; found {
|
||||
delta.Keep = append(delta.Keep, id)
|
||||
} else {
|
||||
delta.Remove = append(delta.Remove, id)
|
||||
}
|
||||
}
|
||||
|
||||
// Find added
|
||||
for _, id := range new {
|
||||
if _, found := oldMap[id]; !found {
|
||||
delta.Add = append(delta.Add, id)
|
||||
}
|
||||
}
|
||||
|
||||
return delta
|
||||
}
|
||||
Reference in New Issue
Block a user