diff --git a/README.md b/README.md new file mode 100644 index 0000000..20458db --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# Crawler v2 + +This repo is a rewrite of the original and discontinued [crawler](https://github.com/vertex-lab/crawler), under active developement. + +## Goals + +The goals of this project are: + +- Continuously crawl the Nostr network (24/7/365), searching for follow lists (`kind:3`) and other relevant events. + +- Quickly assess whether new events should be added to the database based on the author's rank. Approved events are used to build a custom Redis-backed graph database. + +- Generate and maintain random walks for nodes in the graph, updating them as the graph topology evolves. + +- Use these random walks to efficiently compute acyclic Monte Carlo Pageranks (personalized and global). Algorithms are inspired by [this paper](snap.stanford.edu/class/cs224w-readings/bahmani10pagerank.pdf) + +## Apps + +`/cmd/crawler/` + +The main entry point, which assumes that the event store and Redis are syncronized. In case they are empty, the graph will be initialized using the `INIT_PUBKEYS` specified in the enviroment. + +`/cmd/sync/` + +This mode builds the Redis graph database from the event store. In other words, it syncronizes Redis to reflect the events in the event store, starting from the `INIT_PUBKEYS` specified in the enviroment, and expanding outward. + diff --git a/cmd/crawler/main.go b/cmd/crawler/main.go index df34e7f..f70c815 100644 --- a/cmd/crawler/main.go +++ b/cmd/crawler/main.go @@ -18,6 +18,12 @@ import ( "github.com/vertex-lab/relay/pkg/eventstore" ) +/* +This programs assumes syncronization between Redis and the event store, meaning +that the graph in Redis reflects these events. +If that is not the case, go run /cmd/sync/ to syncronize Redis with the event store. +*/ + func main() { ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/cmd/sync/main.go b/cmd/sync/main.go index 3c9bdc0..2f36903 100644 --- a/cmd/sync/main.go +++ b/cmd/sync/main.go @@ -20,7 +20,7 @@ import ( /* This program syncronize the Redis database to the events already stored in the event store. -If Redis and the eventstore are already in sync, run the executable at /cmd/crawler/. +If Redis and the eventstore are already in sync, go run /cmd/crawler/. */ func main() { diff --git a/pkg/config/config.go b/pkg/config/config.go index 1270d12..c23d8c3 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -1,3 +1,4 @@ +// The config package loads and validates the variables in the enviroment into a [Config] package config import ( diff --git a/pkg/pipe/engine.go b/pkg/pipe/engine.go index 3112f85..31f0e6c 100644 --- a/pkg/pipe/engine.go +++ b/pkg/pipe/engine.go @@ -1,3 +1,4 @@ +// The pipe package defines high-level pipeline functions (e.g. [Firehose], [Engine]) package pipe import (