consensus_config/
parameters.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
// Copyright (c) Mysten Labs, Inc.
// SPDX-License-Identifier: Apache-2.0

use std::{path::PathBuf, time::Duration};

use serde::{Deserialize, Serialize};

/// Operational configurations of a consensus authority.
///
/// All fields should tolerate inconsistencies among authorities, without affecting safety of the
/// protocol. Otherwise, they need to be part of Sui protocol config or epoch state on-chain.
///
/// NOTE: fields with default values are specified in the serde default functions. Most operators
/// should not need to specify any field, except db_path.
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Parameters {
    /// Path to consensus DB for this epoch. Required when initializing consensus.
    /// This is calculated based on user configuration for base directory.
    #[serde(skip)]
    pub db_path: PathBuf,

    /// Time to wait for parent round leader before sealing a block, from when parent round
    /// has a quorum.
    #[serde(default = "Parameters::default_leader_timeout")]
    pub leader_timeout: Duration,

    /// Minimum delay between rounds, to avoid generating too many rounds when latency is low.
    /// This is especially necessary for tests running locally.
    /// If setting a non-default value, it should be set low enough to avoid reducing
    /// round rate and increasing latency in realistic and distributed configurations.
    #[serde(default = "Parameters::default_min_round_delay")]
    pub min_round_delay: Duration,

    /// Maximum forward time drift (how far in future) allowed for received blocks.
    #[serde(default = "Parameters::default_max_forward_time_drift")]
    pub max_forward_time_drift: Duration,

    /// Max number of blocks to fetch per block sync request.
    /// Block sync requests have very short (~2s) timeouts.
    /// So this value should be limited to allow the requests
    /// to finish on hosts with good network with this timeout.
    /// Usually a host sends 14-16 blocks per sec to a peer, so
    /// sending 32 blocks in 2 seconds should be reasonable.
    #[serde(default = "Parameters::default_max_blocks_per_sync")]
    pub max_blocks_per_sync: usize,

    /// Max number of blocks to fetch per commit sync request.
    #[serde(default = "Parameters::default_max_blocks_per_fetch")]
    pub max_blocks_per_fetch: usize,

    /// Time to wait during node start up until the node has synced the last proposed block via the
    /// network peers. When set to `0` the sync mechanism is disabled. This property is meant to be
    /// used for amnesia recovery.
    #[serde(default = "Parameters::default_sync_last_known_own_block_timeout")]
    pub sync_last_known_own_block_timeout: Duration,

    /// Interval in milliseconds to probe highest received rounds of peers.
    #[serde(default = "Parameters::default_round_prober_interval_ms")]
    pub round_prober_interval_ms: u64,

    /// Timeout in milliseconds for a round prober request.
    #[serde(default = "Parameters::default_round_prober_request_timeout_ms")]
    pub round_prober_request_timeout_ms: u64,

    /// Proposing new block is stopped when the propagation delay is greater than this threshold.
    /// Propagation delay is the difference between the round of the last proposed block and the
    /// the highest round from this authority that is received by all validators in a quorum.
    #[serde(default = "Parameters::default_propagation_delay_stop_proposal_threshold")]
    pub propagation_delay_stop_proposal_threshold: u32,

    /// The number of rounds of blocks to be kept in the Dag state cache per authority. The larger
    /// the number the more the blocks that will be kept in memory allowing minimising any potential
    /// disk access.
    /// Value should be at minimum 50 rounds to ensure node performance, but being too large can be
    /// expensive in memory usage.
    #[serde(default = "Parameters::default_dag_state_cached_rounds")]
    pub dag_state_cached_rounds: u32,

    // Number of authorities commit syncer fetches in parallel.
    // Both commits in a range and blocks referenced by the commits are fetched per authority.
    #[serde(default = "Parameters::default_commit_sync_parallel_fetches")]
    pub commit_sync_parallel_fetches: usize,

    // Number of commits to fetch in a batch, also the maximum number of commits returned per fetch.
    // If this value is set too small, fetching becomes inefficient.
    // If this value is set too large, it can result in load imbalance and stragglers.
    #[serde(default = "Parameters::default_commit_sync_batch_size")]
    pub commit_sync_batch_size: u32,

    // This affects the maximum number of commit batches being fetched, and those fetched but not
    // processed as consensus output, before throttling of outgoing commit fetches starts.
    #[serde(default = "Parameters::default_commit_sync_batches_ahead")]
    pub commit_sync_batches_ahead: usize,

    /// Anemo network settings.
    #[serde(default = "AnemoParameters::default")]
    pub anemo: AnemoParameters,

    /// Tonic network settings.
    #[serde(default = "TonicParameters::default")]
    pub tonic: TonicParameters,
}

impl Parameters {
    pub(crate) fn default_leader_timeout() -> Duration {
        Duration::from_millis(200)
    }

    pub(crate) fn default_min_round_delay() -> Duration {
        if cfg!(msim) || std::env::var("__TEST_ONLY_CONSENSUS_USE_LONG_MIN_ROUND_DELAY").is_ok() {
            // Checkpoint building and execution cannot keep up with high commit rate in simtests,
            // leading to long reconfiguration delays. This is because simtest is single threaded,
            // and spending too much time in consensus can lead to starvation elsewhere.
            Duration::from_millis(400)
        } else if cfg!(test) {
            // Avoid excessive CPU, data and logs in tests.
            Duration::from_millis(250)
        } else {
            Duration::from_millis(50)
        }
    }

    pub(crate) fn default_max_forward_time_drift() -> Duration {
        Duration::from_millis(500)
    }

    pub(crate) fn default_max_blocks_per_sync() -> usize {
        if cfg!(msim) {
            // Exercise hitting blocks per sync limit.
            4
        } else {
            32
        }
    }

    pub(crate) fn default_max_blocks_per_fetch() -> usize {
        if cfg!(msim) {
            // Exercise hitting blocks per fetch limit.
            10
        } else {
            1000
        }
    }

    pub(crate) fn default_sync_last_known_own_block_timeout() -> Duration {
        if cfg!(msim) {
            Duration::from_millis(500)
        } else {
            // Here we prioritise liveness over the complete de-risking of block equivocation. 5 seconds
            // in the majority of cases should be good enough for this given a healthy network.
            Duration::from_secs(5)
        }
    }

    pub(crate) fn default_round_prober_interval_ms() -> u64 {
        if cfg!(msim) {
            1000
        } else {
            5000
        }
    }

    pub(crate) fn default_round_prober_request_timeout_ms() -> u64 {
        if cfg!(msim) {
            800
        } else {
            4000
        }
    }

    pub(crate) fn default_propagation_delay_stop_proposal_threshold() -> u32 {
        // Propagation delay is usually 0 round in production.
        if cfg!(msim) {
            2
        } else {
            5
        }
    }

    pub(crate) fn default_dag_state_cached_rounds() -> u32 {
        if cfg!(msim) {
            // Exercise reading blocks from store.
            5
        } else {
            500
        }
    }

    pub(crate) fn default_commit_sync_parallel_fetches() -> usize {
        8
    }

    pub(crate) fn default_commit_sync_batch_size() -> u32 {
        if cfg!(msim) {
            // Exercise commit sync.
            5
        } else {
            100
        }
    }

    pub(crate) fn default_commit_sync_batches_ahead() -> usize {
        // This is set to be a multiple of default commit_sync_parallel_fetches to allow fetching ahead,
        // while keeping the total number of inflight fetches and unprocessed fetched commits limited.
        32
    }
}

impl Default for Parameters {
    fn default() -> Self {
        Self {
            db_path: PathBuf::default(),
            leader_timeout: Parameters::default_leader_timeout(),
            min_round_delay: Parameters::default_min_round_delay(),
            max_forward_time_drift: Parameters::default_max_forward_time_drift(),
            max_blocks_per_sync: Parameters::default_max_blocks_per_sync(),
            max_blocks_per_fetch: Parameters::default_max_blocks_per_fetch(),
            sync_last_known_own_block_timeout:
                Parameters::default_sync_last_known_own_block_timeout(),
            round_prober_interval_ms: Parameters::default_round_prober_interval_ms(),
            round_prober_request_timeout_ms: Parameters::default_round_prober_request_timeout_ms(),
            propagation_delay_stop_proposal_threshold:
                Parameters::default_propagation_delay_stop_proposal_threshold(),
            dag_state_cached_rounds: Parameters::default_dag_state_cached_rounds(),
            commit_sync_parallel_fetches: Parameters::default_commit_sync_parallel_fetches(),
            commit_sync_batch_size: Parameters::default_commit_sync_batch_size(),
            commit_sync_batches_ahead: Parameters::default_commit_sync_batches_ahead(),
            anemo: AnemoParameters::default(),
            tonic: TonicParameters::default(),
        }
    }
}

#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct AnemoParameters {
    /// Size in bytes above which network messages are considered excessively large. Excessively
    /// large messages will still be handled, but logged and reported in metrics for debugging.
    ///
    /// If unspecified, this will default to 8 MiB.
    #[serde(default = "AnemoParameters::default_excessive_message_size")]
    pub excessive_message_size: usize,
}

impl AnemoParameters {
    fn default_excessive_message_size() -> usize {
        8 << 20
    }
}

impl Default for AnemoParameters {
    fn default() -> Self {
        Self {
            excessive_message_size: AnemoParameters::default_excessive_message_size(),
        }
    }
}

#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct TonicParameters {
    /// Keepalive interval and timeouts for both client and server.
    ///
    /// If unspecified, this will default to 5s.
    #[serde(default = "TonicParameters::default_keepalive_interval")]
    pub keepalive_interval: Duration,

    /// Size of various per-connection buffers.
    ///
    /// If unspecified, this will default to 32MiB.
    #[serde(default = "TonicParameters::default_connection_buffer_size")]
    pub connection_buffer_size: usize,

    /// Messages over this size threshold will increment a counter.
    ///
    /// If unspecified, this will default to 16MiB.
    #[serde(default = "TonicParameters::default_excessive_message_size")]
    pub excessive_message_size: usize,

    /// Hard message size limit for both requests and responses.
    /// This value is higher than strictly necessary, to allow overheads.
    /// Message size targets and soft limits are computed based on this value.
    ///
    /// If unspecified, this will default to 1GiB.
    #[serde(default = "TonicParameters::default_message_size_limit")]
    pub message_size_limit: usize,
}

impl TonicParameters {
    fn default_keepalive_interval() -> Duration {
        Duration::from_secs(5)
    }

    fn default_connection_buffer_size() -> usize {
        32 << 20
    }

    fn default_excessive_message_size() -> usize {
        16 << 20
    }

    fn default_message_size_limit() -> usize {
        64 << 20
    }
}

impl Default for TonicParameters {
    fn default() -> Self {
        Self {
            keepalive_interval: TonicParameters::default_keepalive_interval(),
            connection_buffer_size: TonicParameters::default_connection_buffer_size(),
            excessive_message_size: TonicParameters::default_excessive_message_size(),
            message_size_limit: TonicParameters::default_message_size_limit(),
        }
    }
}