aperture: internally aggregate session statistics to reduce cardinality

This commit is contained in:
djkazic
2025-05-06 11:11:49 -04:00
parent b05d801d8c
commit 7233c25bce
2 changed files with 131 additions and 20 deletions

View File

@@ -11,7 +11,6 @@ import (
"github.com/btcsuite/btclog/v2" "github.com/btcsuite/btclog/v2"
"github.com/lightninglabs/lightning-node-connect/hashmailrpc" "github.com/lightninglabs/lightning-node-connect/hashmailrpc"
"github.com/lightningnetwork/lnd/tlv" "github.com/lightningnetwork/lnd/tlv"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/time/rate" "golang.org/x/time/rate"
"google.golang.org/grpc/codes" "google.golang.org/grpc/codes"
"google.golang.org/grpc/status" "google.golang.org/grpc/status"
@@ -35,6 +34,11 @@ const (
// DefaultBufSize is the default number of bytes that are read in a // DefaultBufSize is the default number of bytes that are read in a
// single operation. // single operation.
DefaultBufSize = 4096 DefaultBufSize = 4096
// streamTTL is the amount of time that a stream needs to be exist without
// reads for it to be considered for pruning. Otherwise, memory will grow
// unbounded.
streamTTL = 24 * time.Hour
) )
// streamID is the identifier of a stream. // streamID is the identifier of a stream.
@@ -747,9 +751,7 @@ func (h *hashMailServer) RecvStream(desc *hashmailrpc.CipherBoxDesc,
streamID := newStreamID(desc.StreamId) streamID := newStreamID(desc.StreamId)
if streamID.isOdd() { if streamID.isOdd() {
baseID := streamID.baseID() baseID := streamID.baseID()
mailboxReadCount.With(prometheus.Labels{ streamActivityTracker.Record(fmt.Sprintf("%x", baseID))
streamIDLabel: fmt.Sprintf("%x", baseID),
}).Inc()
} }
err = reader.Send(&hashmailrpc.CipherBox{ err = reader.Send(&hashmailrpc.CipherBox{
@@ -766,6 +768,91 @@ func (h *hashMailServer) RecvStream(desc *hashmailrpc.CipherBoxDesc,
var _ hashmailrpc.HashMailServer = (*hashMailServer)(nil) var _ hashmailrpc.HashMailServer = (*hashMailServer)(nil)
// streamActivity tracks per-session read activity for classifying mailbox
// sessions as active, standby, or in-use. It maintains an in-memory map
// of stream IDs to counters and timestamps.
type streamActivity struct {
sync.Mutex
streams map[string]*activityEntry
}
// activityEntry holds the read count and last update time for a single mailbox
// session.
type activityEntry struct {
count uint64
lastUpdate time.Time
}
// newStreamActivity creates a new streamActivity tracker used to monitor
// mailbox read activity per stream ID.
func newStreamActivity() *streamActivity {
return &streamActivity{
streams: make(map[string]*activityEntry),
}
}
// Record logs a read event for the given base stream ID.
// It increments the read count and updates the last activity timestamp.
func (sa *streamActivity) Record(baseID string) {
sa.Lock()
defer sa.Unlock()
entry, ok := sa.streams[baseID]
if !ok {
entry = &activityEntry{}
sa.streams[baseID] = entry
}
entry.count++
entry.lastUpdate = time.Now()
}
// ClassifyAndReset categorizes each tracked stream based on its recent read
// rate and returns aggregate counts of active, standby, and in-use sessions.
// A stream is classified as:
// - In-use: if read rate ≥ 0.5 reads/sec
// - Standby: if 0 < read rate < 0.5 reads/sec
// - Active: if read rate > 0 (includes standby and in-use)
func (sa *streamActivity) ClassifyAndReset() (active, standby, inuse int) {
sa.Lock()
defer sa.Unlock()
now := time.Now()
for baseID, e := range sa.streams {
inactiveDuration := now.Sub(e.lastUpdate)
// Prune if idle for >24h and no new reads.
if e.count == 0 && inactiveDuration > streamTTL {
delete(sa.streams, baseID)
continue
}
elapsed := inactiveDuration.Seconds()
if elapsed <= 0 {
// Prevent divide-by-zero, treat as 1s interval.
elapsed = 1
}
rate := float64(e.count) / elapsed
switch {
case rate >= 0.5:
inuse++
case rate > 0:
standby++
}
if rate > 0 {
active++
}
// Reset for next window.
e.count = 0
e.lastUpdate = now
}
return active, standby, inuse
}
// streamStatus keeps track of the occupancy status of a stream's read and // streamStatus keeps track of the occupancy status of a stream's read and
// write sub-streams. It is initialised with callback functions to call on the // write sub-streams. It is initialised with callback functions to call on the
// event of the streams being occupied (either or both of the streams are // event of the streams being occupied (either or both of the streams are

View File

@@ -3,13 +3,12 @@ package aperture
import ( import (
"fmt" "fmt"
"net/http" "net/http"
"time"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/client_golang/prometheus/promhttp"
) )
const streamIDLabel = "streamID"
var ( var (
// mailboxCount tracks the current number of active mailboxes. // mailboxCount tracks the current number of active mailboxes.
mailboxCount = prometheus.NewGauge(prometheus.GaugeOpts{ mailboxCount = prometheus.NewGauge(prometheus.GaugeOpts{
@@ -17,21 +16,31 @@ var (
Name: "mailbox_count", Name: "mailbox_count",
}) })
// mailboxReadCount counts each time a mailbox pair is being used. // activeSessions tracks the active session count for mailbox
// A session consists of a bidirectional stream each using a mailbox activeSessions = prometheus.NewGauge(prometheus.GaugeOpts{
// with an ID that overlaps for the first 63 bytes and differ for the Namespace: "hashmail",
// last bit. So in order to obtain accurate data about a specific Name: "mailbox_active_sessions",
// mailbox session, the stream ID that will be recorded is the first Help: "Number of active sessions",
// 16 bytes of the session ID and we will only record the odd stream's })
// reads so that we don't duplicate the data.
mailboxReadCount = prometheus.NewCounterVec( // standbySessions tracks the standby session count for mailbox
prometheus.CounterOpts{ standbySessions = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "hashmail", Namespace: "hashmail",
Name: "mailbox_read_count", Name: "mailbox_standby_sessions",
}, []string{streamIDLabel}, Help: "Number of standby sessions",
) })
// inUseSessions tracks the in-use session count for mailbox
inUseSessions = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "hashmail",
Name: "mailbox_inuse_sessions",
Help: "Number of in-use sessions",
})
) )
// streamActivityTracker handles the calculation of session statistics
var streamActivityTracker = newStreamActivity()
// PrometheusConfig is the set of configuration data that specifies if // PrometheusConfig is the set of configuration data that specifies if
// Prometheus metric exporting is activated, and if so the listening address of // Prometheus metric exporting is activated, and if so the listening address of
// the Prometheus server. // the Prometheus server.
@@ -55,7 +64,22 @@ func StartPrometheusExporter(cfg *PrometheusConfig) error {
// Next, we'll register all our metrics. // Next, we'll register all our metrics.
prometheus.MustRegister(mailboxCount) prometheus.MustRegister(mailboxCount)
prometheus.MustRegister(mailboxReadCount) prometheus.MustRegister(activeSessions)
prometheus.MustRegister(standbySessions)
prometheus.MustRegister(inUseSessions)
// Periodically update session classification metrics from internal tracker
go func() {
ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()
for range ticker.C {
active, standby, inuse := streamActivityTracker.ClassifyAndReset()
activeSessions.Set(float64(active))
standbySessions.Set(float64(standby))
inUseSessions.Set(float64(inuse))
}
}()
// Finally, we'll launch the HTTP server that Prometheus will use to // Finally, we'll launch the HTTP server that Prometheus will use to
// scape our metrics. // scape our metrics.