sui_types/
metrics.rs

1// Copyright (c) Mysten Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4use std::sync::atomic::{AtomicU64, Ordering};
5use std::time::{SystemTime, UNIX_EPOCH};
6
7use prometheus::{
8    Histogram, IntCounterVec, IntGauge, register_histogram_with_registry,
9    register_int_counter_vec_with_registry, register_int_gauge_with_registry,
10};
11
12pub struct LimitsMetrics {
13    /// Execution limits metrics
14    pub excessive_estimated_effects_size: IntCounterVec,
15    pub excessive_written_objects_size: IntCounterVec,
16    pub excessive_new_move_object_ids: IntCounterVec,
17    pub excessive_deleted_move_object_ids: IntCounterVec,
18    pub excessive_transferred_move_object_ids: IntCounterVec,
19    pub excessive_object_runtime_cached_objects: IntCounterVec,
20    pub excessive_object_runtime_store_entries: IntCounterVec,
21}
22
23impl LimitsMetrics {
24    pub fn new(registry: &prometheus::Registry) -> LimitsMetrics {
25        Self {
26            excessive_estimated_effects_size: register_int_counter_vec_with_registry!(
27                "excessive_estimated_effects_size",
28                "Number of transactions with estimated effects size exceeding the limit",
29                &["metered", "limit_type"],
30                registry,
31            )
32                .unwrap(),
33            excessive_written_objects_size: register_int_counter_vec_with_registry!(
34                "excessive_written_objects_size",
35                "Number of transactions with written objects size exceeding the limit",
36                &["metered", "limit_type"],
37                registry,
38            )
39                .unwrap(),
40            excessive_new_move_object_ids: register_int_counter_vec_with_registry!(
41                "excessive_new_move_object_ids_size",
42                "Number of transactions with new move object ID count exceeding the limit",
43                &["metered", "limit_type"],
44                registry,
45            )
46                .unwrap(),
47            excessive_deleted_move_object_ids: register_int_counter_vec_with_registry!(
48                "excessive_deleted_move_object_ids_size",
49                "Number of transactions with deleted move object ID count exceeding the limit",
50                &["metered", "limit_type"],
51                registry,
52            )
53                .unwrap(),
54            excessive_transferred_move_object_ids: register_int_counter_vec_with_registry!(
55                "excessive_transferred_move_object_ids_size",
56                "Number of transactions with transferred move object ID count exceeding the limit",
57                &["metered", "limit_type"],
58                registry,
59            )
60                .unwrap(),
61            excessive_object_runtime_cached_objects: register_int_counter_vec_with_registry!(
62                "excessive_object_runtime_cached_objects_size",
63                "Number of transactions with object runtime cached object count exceeding the limit",
64                &["metered", "limit_type"],
65                registry,
66            )
67                .unwrap(),
68            excessive_object_runtime_store_entries: register_int_counter_vec_with_registry!(
69                "excessive_object_runtime_store_entries_size",
70                "Number of transactions with object runtime store entry count exceeding the limit",
71                &["metered", "limit_type"],
72                registry,
73            )
74                .unwrap(),
75        }
76    }
77}
78
79/// Combined execution metrics passed into executor methods.
80pub struct ExecutionMetrics {
81    pub limits_metrics: LimitsMetrics,
82    pub vm_telemetry_metrics: MoveVMTelemetryMetrics,
83}
84
85impl ExecutionMetrics {
86    pub fn new(registry: &prometheus::Registry) -> Self {
87        Self {
88            limits_metrics: LimitsMetrics::new(registry),
89            vm_telemetry_metrics: MoveVMTelemetryMetrics::new(registry),
90        }
91    }
92}
93
94pub struct BytecodeVerifierMetrics {
95    /// Bytecode verifier metrics timeout counter
96    pub verifier_timeout_metrics: IntCounterVec,
97    /// Bytecode verifier runtime latency for each module successfully verified
98    pub verifier_runtime_per_module_success_latency: Histogram,
99    /// Bytecode verifier runtime latency for each programmable transaction block successfully verified
100    pub verifier_runtime_per_ptb_success_latency: Histogram,
101    /// Bytecode verifier runtime latency for each module which timed out
102    pub verifier_runtime_per_module_timeout_latency: Histogram,
103    /// Bytecode verifier runtime latency for each programmable transaction block which timed out
104    pub verifier_runtime_per_ptb_timeout_latency: Histogram,
105}
106
107impl BytecodeVerifierMetrics {
108    /// DEPRECATED in latest metered verifier, which only report overall success or timeout.
109    pub const MOVE_VERIFIER_TAG: &'static str = "move_verifier";
110
111    /// DEPRECATED in latest metered verifier, which only report overall success or timeout.
112    pub const SUI_VERIFIER_TAG: &'static str = "sui_verifier";
113
114    pub const OVERALL_TAG: &'static str = "overall";
115    pub const SUCCESS_TAG: &'static str = "success";
116    pub const TIMEOUT_TAG: &'static str = "failed";
117    const LATENCY_SEC_BUCKETS: &'static [f64] = &[
118        0.000_010, 0.000_025, 0.000_050, 0.000_100, /* sub 100 micros */
119        0.000_250, 0.000_500, 0.001_000, 0.002_500, 0.005_000, 0.010_000, /* sub 10 ms: p99 */
120        0.025_000, 0.050_000, 0.100_000, 0.250_000, 0.500_000, 1.000_000, /* sub 1 s */
121        10.000_000, 20.000_000, 50.000_000, 100.0, /* We should almost never get here */
122    ];
123    pub fn new(registry: &prometheus::Registry) -> Self {
124        Self {
125            verifier_timeout_metrics: register_int_counter_vec_with_registry!(
126                "verifier_timeout_metrics",
127                "Number of timeouts in bytecode verifier",
128                &["verifier_meter", "status"],
129                registry,
130            )
131            .unwrap(),
132            verifier_runtime_per_module_success_latency: register_histogram_with_registry!(
133                "verifier_runtime_per_module_success_latency",
134                "Time spent running bytecode verifier to completion at `run_metered_move_bytecode_verifier_impl`",
135                Self::LATENCY_SEC_BUCKETS.to_vec(),
136                registry
137            )
138            .unwrap(),
139            verifier_runtime_per_ptb_success_latency: register_histogram_with_registry!(
140                "verifier_runtime_per_ptb_success_latency",
141                "Time spent running bytecode verifier to completion over the entire PTB at `transaction_input_checker::check_non_system_packages_to_be_published`",
142                Self::LATENCY_SEC_BUCKETS.to_vec(),
143                registry
144            ).unwrap(),
145            verifier_runtime_per_module_timeout_latency:  register_histogram_with_registry!(
146                "verifier_runtime_per_module_timeout_latency",
147                "Time spent running bytecode verifier to timeout at `run_metered_move_bytecode_verifier_impl`",
148                Self::LATENCY_SEC_BUCKETS.to_vec(),
149                registry
150            )
151            .unwrap(),
152            verifier_runtime_per_ptb_timeout_latency: register_histogram_with_registry!(
153                "verifier_runtime_per_ptb_timeout_latency",
154                "Time spent running bytecode verifier to timeout over the entire PTB at `transaction_input_checker::check_non_system_packages_to_be_published`",
155                Self::LATENCY_SEC_BUCKETS.to_vec(),
156                registry
157            ).unwrap(),
158        }
159    }
160}
161
162/// Prometheus metrics for Move VM runtime telemetry, updated periodically via
163/// time-based sampling.
164pub struct MoveVMTelemetryMetrics {
165    pub move_vm_package_cache_count: IntGauge,
166    pub move_vm_total_arena_size_bytes: IntGauge,
167    pub move_vm_module_count: IntGauge,
168    pub move_vm_function_count: IntGauge,
169    pub move_vm_type_count: IntGauge,
170    pub move_vm_interner_size: IntGauge,
171    pub move_vm_vtable_cache_count: IntGauge,
172    pub move_vm_vtable_cache_hits: IntGauge,
173    pub move_vm_vtable_cache_misses: IntGauge,
174    pub move_vm_load_time_ms: IntGauge,
175    pub move_vm_load_count: IntGauge,
176    pub move_vm_validation_time_ms: IntGauge,
177    pub move_vm_validation_count: IntGauge,
178    pub move_vm_jit_time_ms: IntGauge,
179    pub move_vm_jit_count: IntGauge,
180    pub move_vm_execution_time_ms: IntGauge,
181    pub move_vm_execution_count: IntGauge,
182    pub move_vm_interpreter_time_ms: IntGauge,
183    pub move_vm_interpreter_count: IntGauge,
184    pub move_vm_max_callstack_size: IntGauge,
185    pub move_vm_max_valuestack_size: IntGauge,
186    pub move_vm_total_time_ms: IntGauge,
187    pub move_vm_total_count: IntGauge,
188    last_report_ms: AtomicU64,
189}
190
191impl MoveVMTelemetryMetrics {
192    const REPORT_INTERVAL_MS: u64 = 30_000;
193
194    pub fn new(registry: &prometheus::Registry) -> Self {
195        Self {
196            move_vm_package_cache_count: register_int_gauge_with_registry!(
197                "move_vm_package_cache_count",
198                "Number of packages in the Move VM cache",
199                registry,
200            )
201            .unwrap(),
202            move_vm_total_arena_size_bytes: register_int_gauge_with_registry!(
203                "move_vm_total_arena_size_bytes",
204                "Total arena memory of cached Move VM packages in bytes",
205                registry,
206            )
207            .unwrap(),
208            move_vm_module_count: register_int_gauge_with_registry!(
209                "move_vm_module_count",
210                "Total modules across cached Move VM packages",
211                registry,
212            )
213            .unwrap(),
214            move_vm_function_count: register_int_gauge_with_registry!(
215                "move_vm_function_count",
216                "Total functions across cached Move VM packages",
217                registry,
218            )
219            .unwrap(),
220            move_vm_type_count: register_int_gauge_with_registry!(
221                "move_vm_type_count",
222                "Total types across cached Move VM packages",
223                registry,
224            )
225            .unwrap(),
226            move_vm_interner_size: register_int_gauge_with_registry!(
227                "move_vm_interner_size",
228                "Number of entries in the Move VM string interner",
229                registry,
230            )
231            .unwrap(),
232            move_vm_vtable_cache_count: register_int_gauge_with_registry!(
233                "move_vm_vtable_cache_count",
234                "Number of entries in the Move VM VTable cache",
235                registry,
236            )
237            .unwrap(),
238            move_vm_vtable_cache_hits: register_int_gauge_with_registry!(
239                "move_vm_vtable_cache_hits",
240                "Cumulative VTable cache hits in the Move VM",
241                registry,
242            )
243            .unwrap(),
244            move_vm_vtable_cache_misses: register_int_gauge_with_registry!(
245                "move_vm_vtable_cache_misses",
246                "Cumulative VTable cache misses in the Move VM",
247                registry,
248            )
249            .unwrap(),
250            move_vm_load_time_ms: register_int_gauge_with_registry!(
251                "move_vm_load_time_ms",
252                "Cumulative package load time in the Move VM (ms)",
253                registry,
254            )
255            .unwrap(),
256            move_vm_load_count: register_int_gauge_with_registry!(
257                "move_vm_load_count",
258                "Cumulative number of packages loaded by the Move VM",
259                registry,
260            )
261            .unwrap(),
262            move_vm_validation_time_ms: register_int_gauge_with_registry!(
263                "move_vm_validation_time_ms",
264                "Cumulative validation time in the Move VM (ms)",
265                registry,
266            )
267            .unwrap(),
268            move_vm_validation_count: register_int_gauge_with_registry!(
269                "move_vm_validation_count",
270                "Cumulative number of validations in the Move VM",
271                registry,
272            )
273            .unwrap(),
274            move_vm_jit_time_ms: register_int_gauge_with_registry!(
275                "move_vm_jit_time_ms",
276                "Cumulative JIT compilation time in the Move VM (ms)",
277                registry,
278            )
279            .unwrap(),
280            move_vm_jit_count: register_int_gauge_with_registry!(
281                "move_vm_jit_count",
282                "Cumulative number of JIT compilations in the Move VM",
283                registry,
284            )
285            .unwrap(),
286            move_vm_execution_time_ms: register_int_gauge_with_registry!(
287                "move_vm_execution_time_ms",
288                "Cumulative execution time in the Move VM (ms)",
289                registry,
290            )
291            .unwrap(),
292            move_vm_execution_count: register_int_gauge_with_registry!(
293                "move_vm_execution_count",
294                "Cumulative number of execution calls in the Move VM",
295                registry,
296            )
297            .unwrap(),
298            move_vm_interpreter_time_ms: register_int_gauge_with_registry!(
299                "move_vm_interpreter_time_ms",
300                "Cumulative interpreter time in the Move VM (ms)",
301                registry,
302            )
303            .unwrap(),
304            move_vm_interpreter_count: register_int_gauge_with_registry!(
305                "move_vm_interpreter_count",
306                "Cumulative number of interpreter calls in the Move VM",
307                registry,
308            )
309            .unwrap(),
310            move_vm_max_callstack_size: register_int_gauge_with_registry!(
311                "move_vm_max_callstack_size",
312                "Maximum observed callstack depth in the Move VM",
313                registry,
314            )
315            .unwrap(),
316            move_vm_max_valuestack_size: register_int_gauge_with_registry!(
317                "move_vm_max_valuestack_size",
318                "Maximum observed value stack size in the Move VM",
319                registry,
320            )
321            .unwrap(),
322            move_vm_total_time_ms: register_int_gauge_with_registry!(
323                "move_vm_total_time_ms",
324                "Cumulative total time spent in the Move VM (ms)",
325                registry,
326            )
327            .unwrap(),
328            move_vm_total_count: register_int_gauge_with_registry!(
329                "move_vm_total_count",
330                "Cumulative total number of Move VM interactions",
331                registry,
332            )
333            .unwrap(),
334            last_report_ms: AtomicU64::new(0),
335        }
336    }
337
338    /// Update gauges if the reporting interval has elapsed. The closure is only called
339    /// when an update is due, avoiding the expensive `get_telemetry_report()` cache scan
340    /// on every transaction. The closure receives `&Self` so it can set gauges directly.
341    pub fn try_update(&self, f: impl FnOnce(&Self)) {
342        let now_ms = SystemTime::now()
343            .duration_since(UNIX_EPOCH)
344            .expect("system time before UNIX epoch")
345            .as_millis() as u64;
346        let last = self.last_report_ms.load(Ordering::Relaxed);
347        if now_ms.saturating_sub(last) < Self::REPORT_INTERVAL_MS {
348            return;
349        }
350        self.last_report_ms.store(now_ms, Ordering::Relaxed);
351        f(self);
352    }
353}