1use mysten_network::metrics::MetricsCallbackProvider;
5use prometheus::{
6 HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, Registry,
7 register_histogram_vec_with_registry, register_int_counter_vec_with_registry,
8 register_int_gauge_vec_with_registry, register_int_gauge_with_registry,
9};
10
11use std::time::Duration;
12use sui_network::tonic::Code;
13
14pub struct SuiNodeMetrics {
15 pub jwk_requests: IntCounterVec,
16 pub jwk_request_errors: IntCounterVec,
17
18 pub total_jwks: IntCounterVec,
19 pub invalid_jwks: IntCounterVec,
20 pub unique_jwks: IntCounterVec,
21
22 pub current_protocol_version: IntGauge,
23 pub binary_max_protocol_version: IntGauge,
24 pub configured_max_protocol_version: IntGauge,
25}
26
27impl SuiNodeMetrics {
28 pub fn new(registry: &Registry) -> Self {
29 Self {
30 jwk_requests: register_int_counter_vec_with_registry!(
31 "jwk_requests",
32 "Total number of JWK requests",
33 &["provider"],
34 registry,
35 )
36 .unwrap(),
37 jwk_request_errors: register_int_counter_vec_with_registry!(
38 "jwk_request_errors",
39 "Total number of JWK request errors",
40 &["provider"],
41 registry,
42 )
43 .unwrap(),
44 total_jwks: register_int_counter_vec_with_registry!(
45 "total_jwks",
46 "Total number of JWKs",
47 &["provider"],
48 registry,
49 )
50 .unwrap(),
51 invalid_jwks: register_int_counter_vec_with_registry!(
52 "invalid_jwks",
53 "Total number of invalid JWKs",
54 &["provider"],
55 registry,
56 )
57 .unwrap(),
58 unique_jwks: register_int_counter_vec_with_registry!(
59 "unique_jwks",
60 "Total number of unique JWKs",
61 &["provider"],
62 registry,
63 )
64 .unwrap(),
65 current_protocol_version: register_int_gauge_with_registry!(
66 "sui_current_protocol_version",
67 "Current protocol version in this epoch",
68 registry,
69 )
70 .unwrap(),
71 binary_max_protocol_version: register_int_gauge_with_registry!(
72 "sui_binary_max_protocol_version",
73 "Max protocol version supported by this binary",
74 registry,
75 )
76 .unwrap(),
77 configured_max_protocol_version: register_int_gauge_with_registry!(
78 "sui_configured_max_protocol_version",
79 "Max protocol version configured in the node config",
80 registry,
81 )
82 .unwrap(),
83 }
84 }
85}
86
87#[derive(Clone)]
88pub struct GrpcMetrics {
89 inflight_grpc: IntGaugeVec,
90 grpc_requests: IntCounterVec,
91 grpc_request_latency: HistogramVec,
92}
93
94const LATENCY_SEC_BUCKETS: &[f64] = &[
95 0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
96];
97
98impl GrpcMetrics {
99 pub fn new(registry: &Registry) -> Self {
100 Self {
101 inflight_grpc: register_int_gauge_vec_with_registry!(
102 "inflight_grpc",
103 "Total in-flight GRPC requests per route",
104 &["path"],
105 registry,
106 )
107 .unwrap(),
108 grpc_requests: register_int_counter_vec_with_registry!(
109 "grpc_requests",
110 "Total GRPC requests per route",
111 &["path", "status"],
112 registry,
113 )
114 .unwrap(),
115 grpc_request_latency: register_histogram_vec_with_registry!(
116 "grpc_request_latency",
117 "Latency of GRPC requests per route",
118 &["path"],
119 LATENCY_SEC_BUCKETS.to_vec(),
120 registry,
121 )
122 .unwrap(),
123 }
124 }
125}
126
127impl MetricsCallbackProvider for GrpcMetrics {
128 fn on_request(&self, _path: String) {}
129
130 fn on_response(&self, path: String, latency: Duration, _status: u16, grpc_status_code: Code) {
131 self.grpc_requests
132 .with_label_values(&[path.as_str(), format!("{grpc_status_code:?}").as_str()])
133 .inc();
134 self.grpc_request_latency
135 .with_label_values(&[path.as_str()])
136 .observe(latency.as_secs_f64());
137 }
138
139 fn on_start(&self, path: &str) {
140 self.inflight_grpc.with_label_values(&[path]).inc();
141 }
142
143 fn on_drop(&self, path: &str) {
144 self.inflight_grpc.with_label_values(&[path]).dec();
145 }
146}
147
148#[cfg(test)]
149mod tests {
150 use mysten_metrics::start_prometheus_server;
151 use prometheus::{IntCounter, Registry};
152 use std::net::{IpAddr, Ipv4Addr, SocketAddr};
153
154 #[tokio::test]
155 pub async fn test_metrics_endpoint_with_multiple_registries_add_remove() {
156 let port: u16 = 8081;
157 let socket = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), port);
158
159 let registry_service = start_prometheus_server(socket);
160
161 tokio::task::yield_now().await;
162
163 let registry_1 = Registry::new_custom(Some("narwhal".to_string()), None).unwrap();
165 let counter_1 = IntCounter::new("counter_1", "a sample counter 1").unwrap();
166 registry_1.register(Box::new(counter_1)).unwrap();
167
168 let registry_2 = Registry::new_custom(Some("sui".to_string()), None).unwrap();
169 let counter_2 = IntCounter::new("counter_2", "a sample counter 2").unwrap();
170 registry_2.register(Box::new(counter_2.clone())).unwrap();
171
172 let registry_1_id = registry_service.add(registry_1);
173 let _registry_2_id = registry_service.add(registry_2);
174
175 let result = get_metrics(port).await;
177
178 assert!(result.contains(
179 "# HELP sui_counter_2 a sample counter 2
180# TYPE sui_counter_2 counter
181sui_counter_2 0"
182 ));
183
184 assert!(result.contains(
185 "# HELP narwhal_counter_1 a sample counter 1
186# TYPE narwhal_counter_1 counter
187narwhal_counter_1 0"
188 ));
189
190 assert!(registry_service.remove(registry_1_id));
192
193 counter_2.inc();
195
196 let result = get_metrics(port).await;
199
200 assert!(!result.contains(
202 "# HELP narwhal_counter_1 a sample counter 1
203# TYPE narwhal_counter_1 counter
204narwhal_counter_1 0"
205 ));
206
207 assert!(result.contains(
209 "# HELP sui_counter_2 a sample counter 2
210# TYPE sui_counter_2 counter
211sui_counter_2 1"
212 ));
213 }
214
215 async fn get_metrics(port: u16) -> String {
216 let client = reqwest::Client::new();
217 let response = client
218 .get(format!("http://127.0.0.1:{}/metrics", port))
219 .send()
220 .await
221 .unwrap();
222 response.text().await.unwrap()
223 }
224}