consensus_core/network/
metrics.rs

1// Copyright (c) Mysten Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4use std::sync::Arc;
5
6use prometheus::{
7    HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, Registry,
8    register_histogram_vec_with_registry, register_int_counter_vec_with_registry,
9    register_int_gauge_vec_with_registry, register_int_gauge_with_registry,
10};
11
12// Fields for network-agnostic metrics can be added here
13pub(crate) struct NetworkMetrics {
14    pub(crate) inbound: Arc<NetworkRouteMetrics>,
15    pub(crate) outbound: Arc<NetworkRouteMetrics>,
16    #[cfg_attr(msim, allow(dead_code))]
17    pub(crate) tcp_connection_metrics: Arc<TcpConnectionMetrics>,
18}
19
20impl NetworkMetrics {
21    pub(crate) fn new(registry: &Registry) -> Self {
22        Self {
23            inbound: Arc::new(NetworkRouteMetrics::new("", "inbound", registry)),
24            outbound: Arc::new(NetworkRouteMetrics::new("", "outbound", registry)),
25            tcp_connection_metrics: Arc::new(TcpConnectionMetrics::new(registry)),
26        }
27    }
28}
29
30#[cfg_attr(msim, allow(dead_code))]
31pub(crate) struct TcpConnectionMetrics {
32    /// Send buffer size of consensus TCP socket.
33    pub(crate) socket_send_buffer_size: IntGauge,
34    /// Receive buffer size of consensus TCP socket.
35    pub(crate) socket_recv_buffer_size: IntGauge,
36    /// Max send buffer size of TCP socket.
37    pub(crate) socket_send_buffer_max_size: IntGauge,
38    /// Max receive buffer size of TCP socket.
39    pub(crate) socket_recv_buffer_max_size: IntGauge,
40}
41
42impl TcpConnectionMetrics {
43    pub fn new(registry: &Registry) -> Self {
44        Self {
45            socket_send_buffer_size: register_int_gauge_with_registry!(
46                "tcp_socket_send_buffer_size",
47                "Send buffer size of consensus TCP socket.",
48                registry
49            )
50            .unwrap(),
51            socket_recv_buffer_size: register_int_gauge_with_registry!(
52                "tcp_socket_recv_buffer_size",
53                "Receive buffer size of consensus TCP socket.",
54                registry
55            )
56            .unwrap(),
57            socket_send_buffer_max_size: register_int_gauge_with_registry!(
58                "tcp_socket_send_buffer_max_size",
59                "Max send buffer size of TCP socket.",
60                registry
61            )
62            .unwrap(),
63            socket_recv_buffer_max_size: register_int_gauge_with_registry!(
64                "tcp_socket_recv_buffer_max_size",
65                "Max receive buffer size of TCP socket.",
66                registry
67            )
68            .unwrap(),
69        }
70    }
71}
72
73#[derive(Clone)]
74pub struct NetworkRouteMetrics {
75    /// Counter of requests by route
76    pub requests: IntCounterVec,
77    /// Request latency by route
78    pub request_latency: HistogramVec,
79    /// Request size by route
80    pub request_size: HistogramVec,
81    /// Response size by route
82    pub response_size: HistogramVec,
83    /// Counter of requests exceeding the "excessive" size limit
84    pub excessive_size_requests: IntCounterVec,
85    /// Counter of responses exceeding the "excessive" size limit
86    pub excessive_size_responses: IntCounterVec,
87    /// Gauge of the number of inflight requests at any given time by route
88    pub inflight_requests: IntGaugeVec,
89    /// Failed requests by route
90    pub errors: IntCounterVec,
91}
92
93const LATENCY_SEC_BUCKETS: &[f64] = &[
94    0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
95];
96
97// Arbitrarily chosen buckets for message size, with gradually-lowering exponent to give us
98// better resolution at high sizes.
99const SIZE_BYTE_BUCKETS: &[f64] = &[
100    2048., 8192., // *4
101    16384., 32768., 65536., 131072., 262144., 524288., 1048576., // *2
102    1572864., 2359256., 3538944., // *1.5
103    4600627., 5980815., 7775060., 10107578., 13139851., 17081807., 22206349., 28868253., 37528729.,
104    48787348., 63423553., // *1.3
105];
106
107impl NetworkRouteMetrics {
108    pub fn new(node: &'static str, direction: &'static str, registry: &Registry) -> Self {
109        let requests = register_int_counter_vec_with_registry!(
110            format!("{node}_{direction}_requests"),
111            "The number of requests made on the network",
112            &["route"],
113            registry
114        )
115        .unwrap();
116
117        let request_latency = register_histogram_vec_with_registry!(
118            format!("{node}_{direction}_request_latency"),
119            "Latency of a request by route",
120            &["route"],
121            LATENCY_SEC_BUCKETS.to_vec(),
122            registry,
123        )
124        .unwrap();
125
126        let request_size = register_histogram_vec_with_registry!(
127            format!("{node}_{direction}_request_size"),
128            "Size of a request by route",
129            &["route"],
130            SIZE_BYTE_BUCKETS.to_vec(),
131            registry,
132        )
133        .unwrap();
134
135        let response_size = register_histogram_vec_with_registry!(
136            format!("{node}_{direction}_response_size"),
137            "Size of a response by route",
138            &["route"],
139            SIZE_BYTE_BUCKETS.to_vec(),
140            registry,
141        )
142        .unwrap();
143
144        let excessive_size_requests = register_int_counter_vec_with_registry!(
145            format!("{node}_{direction}_excessive_size_requests"),
146            "The number of excessively large request messages sent",
147            &["route"],
148            registry
149        )
150        .unwrap();
151
152        let excessive_size_responses = register_int_counter_vec_with_registry!(
153            format!("{node}_{direction}_excessive_size_responses"),
154            "The number of excessively large response messages seen",
155            &["route"],
156            registry
157        )
158        .unwrap();
159
160        let inflight_requests = register_int_gauge_vec_with_registry!(
161            format!("{node}_{direction}_inflight_requests"),
162            "The number of inflight network requests",
163            &["route"],
164            registry
165        )
166        .unwrap();
167
168        let errors = register_int_counter_vec_with_registry!(
169            format!("{node}_{direction}_request_errors"),
170            "Number of errors by route",
171            &["route", "status"],
172            registry,
173        )
174        .unwrap();
175
176        Self {
177            requests,
178            request_latency,
179            request_size,
180            response_size,
181            excessive_size_requests,
182            excessive_size_responses,
183            inflight_requests,
184            errors,
185        }
186    }
187}