consensus_core/network/
metrics.rs

1// Copyright (c) Mysten Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4use std::sync::Arc;
5
6use prometheus::{
7    HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, Registry,
8    register_histogram_vec_with_registry, register_int_counter_vec_with_registry,
9    register_int_gauge_vec_with_registry, register_int_gauge_with_registry,
10};
11
12// Fields for network-agnostic metrics can be added here
13pub(crate) struct NetworkMetrics {
14    pub(crate) network_type: IntGaugeVec,
15    pub(crate) inbound: Arc<NetworkRouteMetrics>,
16    pub(crate) outbound: Arc<NetworkRouteMetrics>,
17    #[cfg_attr(msim, allow(dead_code))]
18    pub(crate) tcp_connection_metrics: Arc<TcpConnectionMetrics>,
19}
20
21impl NetworkMetrics {
22    pub(crate) fn new(registry: &Registry) -> Self {
23        Self {
24            network_type: register_int_gauge_vec_with_registry!(
25                "network_type",
26                "Type of the network used: anemo or tonic",
27                &["type"],
28                registry
29            )
30            .unwrap(),
31            inbound: Arc::new(NetworkRouteMetrics::new("", "inbound", registry)),
32            outbound: Arc::new(NetworkRouteMetrics::new("", "outbound", registry)),
33            tcp_connection_metrics: Arc::new(TcpConnectionMetrics::new(registry)),
34        }
35    }
36}
37
38#[cfg_attr(msim, allow(dead_code))]
39pub(crate) struct TcpConnectionMetrics {
40    /// Send buffer size of consensus TCP socket.
41    pub(crate) socket_send_buffer_size: IntGauge,
42    /// Receive buffer size of consensus TCP socket.
43    pub(crate) socket_recv_buffer_size: IntGauge,
44    /// Max send buffer size of TCP socket.
45    pub(crate) socket_send_buffer_max_size: IntGauge,
46    /// Max receive buffer size of TCP socket.
47    pub(crate) socket_recv_buffer_max_size: IntGauge,
48}
49
50impl TcpConnectionMetrics {
51    pub fn new(registry: &Registry) -> Self {
52        Self {
53            socket_send_buffer_size: register_int_gauge_with_registry!(
54                "tcp_socket_send_buffer_size",
55                "Send buffer size of consensus TCP socket.",
56                registry
57            )
58            .unwrap(),
59            socket_recv_buffer_size: register_int_gauge_with_registry!(
60                "tcp_socket_recv_buffer_size",
61                "Receive buffer size of consensus TCP socket.",
62                registry
63            )
64            .unwrap(),
65            socket_send_buffer_max_size: register_int_gauge_with_registry!(
66                "tcp_socket_send_buffer_max_size",
67                "Max send buffer size of TCP socket.",
68                registry
69            )
70            .unwrap(),
71            socket_recv_buffer_max_size: register_int_gauge_with_registry!(
72                "tcp_socket_recv_buffer_max_size",
73                "Max receive buffer size of TCP socket.",
74                registry
75            )
76            .unwrap(),
77        }
78    }
79}
80
81#[derive(Clone)]
82pub struct NetworkRouteMetrics {
83    /// Counter of requests by route
84    pub requests: IntCounterVec,
85    /// Request latency by route
86    pub request_latency: HistogramVec,
87    /// Request size by route
88    pub request_size: HistogramVec,
89    /// Response size by route
90    pub response_size: HistogramVec,
91    /// Counter of requests exceeding the "excessive" size limit
92    pub excessive_size_requests: IntCounterVec,
93    /// Counter of responses exceeding the "excessive" size limit
94    pub excessive_size_responses: IntCounterVec,
95    /// Gauge of the number of inflight requests at any given time by route
96    pub inflight_requests: IntGaugeVec,
97    /// Failed requests by route
98    pub errors: IntCounterVec,
99}
100
101const LATENCY_SEC_BUCKETS: &[f64] = &[
102    0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
103];
104
105// Arbitrarily chosen buckets for message size, with gradually-lowering exponent to give us
106// better resolution at high sizes.
107const SIZE_BYTE_BUCKETS: &[f64] = &[
108    2048., 8192., // *4
109    16384., 32768., 65536., 131072., 262144., 524288., 1048576., // *2
110    1572864., 2359256., 3538944., // *1.5
111    4600627., 5980815., 7775060., 10107578., 13139851., 17081807., 22206349., 28868253., 37528729.,
112    48787348., 63423553., // *1.3
113];
114
115impl NetworkRouteMetrics {
116    pub fn new(node: &'static str, direction: &'static str, registry: &Registry) -> Self {
117        let requests = register_int_counter_vec_with_registry!(
118            format!("{node}_{direction}_requests"),
119            "The number of requests made on the network",
120            &["route"],
121            registry
122        )
123        .unwrap();
124
125        let request_latency = register_histogram_vec_with_registry!(
126            format!("{node}_{direction}_request_latency"),
127            "Latency of a request by route",
128            &["route"],
129            LATENCY_SEC_BUCKETS.to_vec(),
130            registry,
131        )
132        .unwrap();
133
134        let request_size = register_histogram_vec_with_registry!(
135            format!("{node}_{direction}_request_size"),
136            "Size of a request by route",
137            &["route"],
138            SIZE_BYTE_BUCKETS.to_vec(),
139            registry,
140        )
141        .unwrap();
142
143        let response_size = register_histogram_vec_with_registry!(
144            format!("{node}_{direction}_response_size"),
145            "Size of a response by route",
146            &["route"],
147            SIZE_BYTE_BUCKETS.to_vec(),
148            registry,
149        )
150        .unwrap();
151
152        let excessive_size_requests = register_int_counter_vec_with_registry!(
153            format!("{node}_{direction}_excessive_size_requests"),
154            "The number of excessively large request messages sent",
155            &["route"],
156            registry
157        )
158        .unwrap();
159
160        let excessive_size_responses = register_int_counter_vec_with_registry!(
161            format!("{node}_{direction}_excessive_size_responses"),
162            "The number of excessively large response messages seen",
163            &["route"],
164            registry
165        )
166        .unwrap();
167
168        let inflight_requests = register_int_gauge_vec_with_registry!(
169            format!("{node}_{direction}_inflight_requests"),
170            "The number of inflight network requests",
171            &["route"],
172            registry
173        )
174        .unwrap();
175
176        let errors = register_int_counter_vec_with_registry!(
177            format!("{node}_{direction}_request_errors"),
178            "Number of errors by route",
179            &["route", "status"],
180            registry,
181        )
182        .unwrap();
183
184        Self {
185            requests,
186            request_latency,
187            request_size,
188            response_size,
189            excessive_size_requests,
190            excessive_size_responses,
191            inflight_requests,
192            errors,
193        }
194    }
195}