mirror of
https://github.com/aljazceru/kata-containers.git
synced 2025-12-23 01:04:25 +01:00
Add initial support for opentracing by using the `jaeger` package.
Since opentracing uses the `context` package, add a `context.Context`
as the first parameter to all the functions that we might want to
trace. Trace "spans" (trace points) are then added by extracting the
trace details from the specified context parameter.
Notes:
- Although the tracer is created in `main()`, the "root span"
(aka the first trace point) is not added until `beforeSubcommands()`.
This is by design and is a compromise: by delaying the creation of the
root span, the spans become much more readable since using the web-based
JaegerUI, you will see traces like this:
```
kata-runtime: kata-runtime create
------------ -------------------
^ ^
| |
Trace name First span name
(which clearly shows the CLI command that was run)
```
Creating the span earlier means it is necessary to expand 'n' spans in
the UI before you get to see the name of the CLI command that was run.
In adding support, this became very tedious, hence my design decision to
defer the creation of the root span until after signal handling has been
setup and after CLI options have been parsed, but still very early in
the code path.
- At this stage, the tracing stops at the `virtcontainers` call
boundary.
- Tracing is "always on" as there doesn't appear to be a way to toggle
it. However, its resolves to a "nop" unless the tracer can talk to a
jaeger agent.
Note that this commit required a bit of rework to `beforeSubcommands()`
to reduce the cyclomatic complexity.
Fixes #557.
Signed-off-by: James O. D. Hunt <james.o.hunt@intel.com>
125 lines
4.3 KiB
Go
125 lines
4.3 KiB
Go
// Copyright (c) 2017 Uber Technologies, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package rpcmetrics
|
|
|
|
import (
|
|
"sync"
|
|
|
|
"github.com/uber/jaeger-lib/metrics"
|
|
)
|
|
|
|
const (
|
|
otherEndpointsPlaceholder = "other"
|
|
endpointNameMetricTag = "endpoint"
|
|
)
|
|
|
|
// Metrics is a collection of metrics for an endpoint describing
|
|
// throughput, success, errors, and performance.
|
|
type Metrics struct {
|
|
// RequestCountSuccess is a counter of the total number of successes.
|
|
RequestCountSuccess metrics.Counter `metric:"requests" tags:"error=false"`
|
|
|
|
// RequestCountFailures is a counter of the number of times any failure has been observed.
|
|
RequestCountFailures metrics.Counter `metric:"requests" tags:"error=true"`
|
|
|
|
// RequestLatencySuccess is a latency histogram of succesful requests.
|
|
RequestLatencySuccess metrics.Timer `metric:"request_latency" tags:"error=false"`
|
|
|
|
// RequestLatencyFailures is a latency histogram of failed requests.
|
|
RequestLatencyFailures metrics.Timer `metric:"request_latency" tags:"error=true"`
|
|
|
|
// HTTPStatusCode2xx is a counter of the total number of requests with HTTP status code 200-299
|
|
HTTPStatusCode2xx metrics.Counter `metric:"http_requests" tags:"status_code=2xx"`
|
|
|
|
// HTTPStatusCode3xx is a counter of the total number of requests with HTTP status code 300-399
|
|
HTTPStatusCode3xx metrics.Counter `metric:"http_requests" tags:"status_code=3xx"`
|
|
|
|
// HTTPStatusCode4xx is a counter of the total number of requests with HTTP status code 400-499
|
|
HTTPStatusCode4xx metrics.Counter `metric:"http_requests" tags:"status_code=4xx"`
|
|
|
|
// HTTPStatusCode5xx is a counter of the total number of requests with HTTP status code 500-599
|
|
HTTPStatusCode5xx metrics.Counter `metric:"http_requests" tags:"status_code=5xx"`
|
|
}
|
|
|
|
func (m *Metrics) recordHTTPStatusCode(statusCode uint16) {
|
|
if statusCode >= 200 && statusCode < 300 {
|
|
m.HTTPStatusCode2xx.Inc(1)
|
|
} else if statusCode >= 300 && statusCode < 400 {
|
|
m.HTTPStatusCode3xx.Inc(1)
|
|
} else if statusCode >= 400 && statusCode < 500 {
|
|
m.HTTPStatusCode4xx.Inc(1)
|
|
} else if statusCode >= 500 && statusCode < 600 {
|
|
m.HTTPStatusCode5xx.Inc(1)
|
|
}
|
|
}
|
|
|
|
// MetricsByEndpoint is a registry/cache of metrics for each unique endpoint name.
|
|
// Only maxNumberOfEndpoints Metrics are stored, all other endpoint names are mapped
|
|
// to a generic endpoint name "other".
|
|
type MetricsByEndpoint struct {
|
|
metricsFactory metrics.Factory
|
|
endpoints *normalizedEndpoints
|
|
metricsByEndpoint map[string]*Metrics
|
|
mux sync.RWMutex
|
|
}
|
|
|
|
func newMetricsByEndpoint(
|
|
metricsFactory metrics.Factory,
|
|
normalizer NameNormalizer,
|
|
maxNumberOfEndpoints int,
|
|
) *MetricsByEndpoint {
|
|
return &MetricsByEndpoint{
|
|
metricsFactory: metricsFactory,
|
|
endpoints: newNormalizedEndpoints(maxNumberOfEndpoints, normalizer),
|
|
metricsByEndpoint: make(map[string]*Metrics, maxNumberOfEndpoints+1), // +1 for "other"
|
|
}
|
|
}
|
|
|
|
func (m *MetricsByEndpoint) get(endpoint string) *Metrics {
|
|
safeName := m.endpoints.normalize(endpoint)
|
|
if safeName == "" {
|
|
safeName = otherEndpointsPlaceholder
|
|
}
|
|
m.mux.RLock()
|
|
met := m.metricsByEndpoint[safeName]
|
|
m.mux.RUnlock()
|
|
if met != nil {
|
|
return met
|
|
}
|
|
|
|
return m.getWithWriteLock(safeName)
|
|
}
|
|
|
|
// split to make easier to test
|
|
func (m *MetricsByEndpoint) getWithWriteLock(safeName string) *Metrics {
|
|
m.mux.Lock()
|
|
defer m.mux.Unlock()
|
|
|
|
// it is possible that the name has been already registered after we released
|
|
// the read lock and before we grabbed the write lock, so check for that.
|
|
if met, ok := m.metricsByEndpoint[safeName]; ok {
|
|
return met
|
|
}
|
|
|
|
// it would be nice to create the struct before locking, since Init() is somewhat
|
|
// expensive, however some metrics backends (e.g. expvar) may not like duplicate metrics.
|
|
met := &Metrics{}
|
|
tags := map[string]string{endpointNameMetricTag: safeName}
|
|
metrics.Init(met, m.metricsFactory, tags)
|
|
|
|
m.metricsByEndpoint[safeName] = met
|
|
return met
|
|
}
|