mirror of
https://github.com/aljazceru/crawler_v2.git
synced 2025-12-17 07:24:21 +01:00
using cache for faster existance check in Firehose
This commit is contained in:
@@ -83,7 +83,8 @@ func main() {
|
|||||||
producers.Add(4)
|
producers.Add(4)
|
||||||
go func() {
|
go func() {
|
||||||
defer producers.Done()
|
defer producers.Done()
|
||||||
pipe.Firehose(ctx, config.Firehose, db, pipe.Send(recorderQueue))
|
gate := pipe.NewExistenceGate(db)
|
||||||
|
pipe.Firehose(ctx, config.Firehose, gate, pipe.Send(recorderQueue))
|
||||||
}()
|
}()
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/nbd-wtf/go-nostr"
|
"github.com/nbd-wtf/go-nostr"
|
||||||
|
"github.com/vertex-lab/crawler_v2/pkg/redb"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -100,10 +101,6 @@ func (c FirehoseConfig) Print() {
|
|||||||
fmt.Printf(" Offset: %v\n", c.Offset)
|
fmt.Printf(" Offset: %v\n", c.Offset)
|
||||||
}
|
}
|
||||||
|
|
||||||
type PubkeyChecker interface {
|
|
||||||
Exists(ctx context.Context, pubkey string) (bool, error)
|
|
||||||
}
|
|
||||||
|
|
||||||
type Forward[T any] func(T) error
|
type Forward[T any] func(T) error
|
||||||
|
|
||||||
// Send returns a [Forward] function that will attempt to send values into the given channel.
|
// Send returns a [Forward] function that will attempt to send values into the given channel.
|
||||||
@@ -119,14 +116,50 @@ func Send[T any](ch chan T) Forward[T] {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type PubkeyGate interface {
|
||||||
|
Allows(ctx context.Context, pubkey string) bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExistenceGate is a [PubkeyGate] that allows pubkeys if they exists in the database.
|
||||||
|
// The assumption is that keys can't be removed from the database.
|
||||||
|
type ExistenceGate struct {
|
||||||
|
exists map[string]struct{}
|
||||||
|
fallback redb.RedisDB
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewExistenceGate(fallback redb.RedisDB) *ExistenceGate {
|
||||||
|
return &ExistenceGate{
|
||||||
|
exists: make(map[string]struct{}),
|
||||||
|
fallback: fallback,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (g *ExistenceGate) Allows(ctx context.Context, pubkey string) bool {
|
||||||
|
if _, ok := g.exists[pubkey]; ok {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
exists, err := g.fallback.Exists(ctx, pubkey)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("ExistanceGate: %v", err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if exists {
|
||||||
|
g.exists[pubkey] = struct{}{}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// Firehose connects to a list of relays and pulls config.Kinds events that are newer than config.Since.
|
// Firehose connects to a list of relays and pulls config.Kinds events that are newer than config.Since.
|
||||||
// It deduplicate events using a simple ring-buffer.
|
// It deduplicate events using a simple ring-buffer.
|
||||||
// It discards events from unknown pubkeys as an anti-spam mechanism.
|
// It applies the [PubkeyGate] to remove events from undesired pubkeys.
|
||||||
// It forwards the rest using the provided [Forward] function.
|
// It forwards the rest using the provided [Forward] function.
|
||||||
func Firehose(
|
func Firehose(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
config FirehoseConfig,
|
config FirehoseConfig,
|
||||||
check PubkeyChecker,
|
gate PubkeyGate,
|
||||||
forward Forward[*nostr.Event],
|
forward Forward[*nostr.Event],
|
||||||
) {
|
) {
|
||||||
log.Println("Firehose: ready")
|
log.Println("Firehose: ready")
|
||||||
@@ -147,14 +180,7 @@ func Firehose(
|
|||||||
}
|
}
|
||||||
seen.Add(event.ID)
|
seen.Add(event.ID)
|
||||||
|
|
||||||
exists, err := check.Exists(ctx, event.PubKey)
|
if !gate.Allows(ctx, event.PubKey) {
|
||||||
if err != nil {
|
|
||||||
log.Printf("Firehose: %v", err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if !exists {
|
|
||||||
// event from unknown pubkey, skip
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -19,12 +19,12 @@ var (
|
|||||||
pip string = "f683e87035f7ad4f44e0b98cfbd9537e16455a92cd38cefc4cb31db7557f5ef2"
|
pip string = "f683e87035f7ad4f44e0b98cfbd9537e16455a92cd38cefc4cb31db7557f5ef2"
|
||||||
)
|
)
|
||||||
|
|
||||||
type mockChecker struct {
|
type mockGate struct {
|
||||||
pubkey string
|
pubkey string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c mockChecker) Exists(ctx context.Context, pubkey string) (bool, error) {
|
func (g mockGate) Allows(ctx context.Context, pubkey string) bool {
|
||||||
return pubkey == c.pubkey, nil
|
return pubkey == g.pubkey
|
||||||
}
|
}
|
||||||
|
|
||||||
func print(e *nostr.Event) error {
|
func print(e *nostr.Event) error {
|
||||||
@@ -39,9 +39,9 @@ func TestFirehose(t *testing.T) {
|
|||||||
ctx, cancel := context.WithTimeout(ctx, time.Second*20)
|
ctx, cancel := context.WithTimeout(ctx, time.Second*20)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
checker := mockChecker{pubkey: pip}
|
gate := mockGate{pubkey: pip}
|
||||||
config := NewFirehoseConfig()
|
config := NewFirehoseConfig()
|
||||||
Firehose(ctx, config, checker, print)
|
Firehose(ctx, config, gate, print)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFetch(t *testing.T) {
|
func TestFetch(t *testing.T) {
|
||||||
|
|||||||
Reference in New Issue
Block a user