sui_core/checkpoints/
metrics.rs

1// Copyright (c) Mysten Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4use mysten_metrics::histogram::Histogram as MystenHistogram;
5use prometheus::{
6    Histogram, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, Registry,
7    register_histogram_with_registry, register_int_counter_vec_with_registry,
8    register_int_counter_with_registry, register_int_gauge_vec_with_registry,
9    register_int_gauge_with_registry,
10};
11use std::convert::TryFrom;
12use std::sync::Arc;
13
14pub struct CheckpointMetrics {
15    pub last_certified_checkpoint: IntGauge,
16    pub last_constructed_checkpoint: IntGauge,
17    pub checkpoint_errors: IntCounter,
18    pub transactions_included_in_checkpoint: IntCounter,
19    pub checkpoint_roots_count: IntCounter,
20    pub checkpoint_participation: IntCounterVec,
21    pub last_received_checkpoint_signatures: IntGaugeVec,
22    pub last_sent_checkpoint_signature: IntGauge,
23    pub last_skipped_checkpoint_signature_submission: IntGauge,
24    pub last_ignored_checkpoint_signature_received: IntGauge,
25    pub highest_accumulated_epoch: IntGauge,
26    pub checkpoint_creation_latency: Histogram,
27    // TODO: delete once users are migrated to non-Mysten histogram.
28    pub checkpoint_creation_latency_ms: MystenHistogram,
29    pub remote_checkpoint_forks: IntCounter,
30    pub split_brain_checkpoint_forks: IntCounter,
31    pub checkpoint_fork_crash_mode: IntGaugeVec,
32    pub transaction_fork_crash_mode: IntGaugeVec,
33    pub last_created_checkpoint_age: Histogram,
34    // TODO: delete once users are migrated to non-Mysten histogram.
35    pub last_created_checkpoint_age_ms: MystenHistogram,
36    pub last_certified_checkpoint_age: Histogram,
37    // TODO: delete once users are migrated to non-Mysten histogram.
38    pub last_certified_checkpoint_age_ms: MystenHistogram,
39    pub accumulator_accounts_created: IntCounter,
40    pub accumulator_accounts_deleted: IntCounter,
41    pub accumulator_accounts_live: IntGauge,
42}
43
44impl CheckpointMetrics {
45    pub fn new(registry: &Registry) -> Arc<Self> {
46        let this = Self {
47            last_certified_checkpoint: register_int_gauge_with_registry!(
48                "last_certified_checkpoint",
49                "Last certified checkpoint",
50                registry
51            )
52            .unwrap(),
53            last_constructed_checkpoint: register_int_gauge_with_registry!(
54                "last_constructed_checkpoint",
55                "Last constructed checkpoint",
56                registry
57            )
58            .unwrap(),
59            last_created_checkpoint_age: register_histogram_with_registry!(
60                "last_created_checkpoint_age",
61                "Age of the last created checkpoint",
62                mysten_metrics::LATENCY_SEC_BUCKETS.to_vec(),
63                registry
64            ).unwrap(),
65            last_created_checkpoint_age_ms: MystenHistogram::new_in_registry(
66                "last_created_checkpoint_age_ms",
67                "Age of the last created checkpoint",
68                registry
69            ),
70            last_certified_checkpoint_age: register_histogram_with_registry!(
71                "last_certified_checkpoint_age",
72                "Age of the last certified checkpoint",
73                mysten_metrics::LATENCY_SEC_BUCKETS.to_vec(),
74                registry
75            ).unwrap(),
76            last_certified_checkpoint_age_ms: MystenHistogram::new_in_registry(
77                "last_certified_checkpoint_age_ms",
78                "Age of the last certified checkpoint",
79                registry
80            ),
81            checkpoint_errors: register_int_counter_with_registry!(
82                "checkpoint_errors",
83                "Checkpoints errors count",
84                registry
85            )
86            .unwrap(),
87            transactions_included_in_checkpoint: register_int_counter_with_registry!(
88                "transactions_included_in_checkpoint",
89                "Transactions included in a checkpoint",
90                registry
91            )
92            .unwrap(),
93            checkpoint_roots_count: register_int_counter_with_registry!(
94                "checkpoint_roots_count",
95                "Number of checkpoint roots received from consensus",
96                registry
97            )
98            .unwrap(),
99            checkpoint_participation: register_int_counter_vec_with_registry!(
100                "checkpoint_participation",
101                "Participation in checkpoint certification by validator",
102                &["signer"],
103                registry
104            )
105            .unwrap(),
106            last_received_checkpoint_signatures: register_int_gauge_vec_with_registry!(
107                "last_received_checkpoint_signatures",
108                "Last received checkpoint signatures by validator",
109                &["signer"],
110                registry
111            )
112            .unwrap(),
113            last_sent_checkpoint_signature: register_int_gauge_with_registry!(
114                "last_sent_checkpoint_signature",
115                "Last checkpoint signature sent by myself",
116                registry
117            )
118            .unwrap(),
119            last_skipped_checkpoint_signature_submission: register_int_gauge_with_registry!(
120                "last_skipped_checkpoint_signature_submission",
121                "Last checkpoint signature that this validator skipped submitting because it was already certfied.",
122                registry
123            )
124            .unwrap(),
125            last_ignored_checkpoint_signature_received: register_int_gauge_with_registry!(
126                "last_ignored_checkpoint_signature_received",
127                "Last received checkpoint signature that this validator ignored because it was already certfied.",
128                registry
129            )
130            .unwrap(),
131            highest_accumulated_epoch: register_int_gauge_with_registry!(
132                "highest_accumulated_epoch",
133                "Highest accumulated epoch",
134                registry
135            )
136            .unwrap(),
137            checkpoint_creation_latency: register_histogram_with_registry!(
138                "checkpoint_creation_latency",
139                "Latency from consensus commit timstamp to local checkpoint creation in milliseconds",
140                mysten_metrics::LATENCY_SEC_BUCKETS.to_vec(),
141                registry,
142            ).unwrap(),
143            checkpoint_creation_latency_ms: MystenHistogram::new_in_registry(
144                "checkpoint_creation_latency_ms",
145                "Latency from consensus commit timstamp to local checkpoint creation in milliseconds",
146                registry,
147            ),
148            remote_checkpoint_forks: register_int_counter_with_registry!(
149                "remote_checkpoint_forks",
150                "Number of remote checkpoints that forked from local checkpoints",
151                registry
152            )
153            .unwrap(),
154            split_brain_checkpoint_forks: register_int_counter_with_registry!(
155                "split_brain_checkpoint_forks",
156                "Number of checkpoints that have resulted in a split brain",
157                registry
158            )
159            .unwrap(),
160            checkpoint_fork_crash_mode: register_int_gauge_vec_with_registry!(
161                "checkpoint_fork_crash_mode",
162                "Indicates node is in crash mode due to checkpoint fork",
163                &["checkpoint_seq", "checkpoint_digest_prefix", "detected_at"],
164                registry
165            )
166            .unwrap(),
167            transaction_fork_crash_mode: register_int_gauge_vec_with_registry!(
168                "transaction_fork_crash_mode",
169                "Indicates node is in crash mode due to transaction fork",
170                &["tx_digest_prefix", "expected_effects_prefix", "actual_effects_prefix", "detected_at"],
171                registry
172            )
173            .unwrap(),
174            accumulator_accounts_created: register_int_counter_with_registry!(
175                "accumulator_accounts_created",
176                "Total number of accumulator account objects created by settlement transactions",
177                registry
178            )
179            .unwrap(),
180            accumulator_accounts_deleted: register_int_counter_with_registry!(
181                "accumulator_accounts_deleted",
182                "Total number of accumulator account objects deleted by settlement transactions",
183                registry
184            )
185            .unwrap(),
186            accumulator_accounts_live: register_int_gauge_with_registry!(
187                "accumulator_accounts_live",
188                "Current number of live accumulator account objects after settlement processing",
189                registry
190            )
191            .unwrap(),
192        };
193        Arc::new(this)
194    }
195
196    pub fn new_for_tests() -> Arc<Self> {
197        Self::new(&Registry::new())
198    }
199
200    pub fn initialize_accumulator_accounts_live(&self, live_accounts: u64) {
201        self.accumulator_accounts_live
202            .set(i64::try_from(live_accounts).expect("accumulator account count exceeds i64"));
203    }
204
205    pub fn report_accumulator_account_changes(&self, created: u64, deleted: u64) {
206        self.accumulator_accounts_created.inc_by(created);
207        self.accumulator_accounts_deleted.inc_by(deleted);
208
209        let created = i64::try_from(created).expect("created accumulator accounts exceeds i64");
210        let deleted = i64::try_from(deleted).expect("deleted accumulator accounts exceeds i64");
211        self.accumulator_accounts_live.add(created - deleted);
212    }
213}