sui_package_dump/
lib.rs

1// Copyright (c) Mysten Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4use std::{
5    collections::BTreeMap,
6    fs,
7    path::{Path, PathBuf},
8};
9
10use anyhow::{Context, Result, bail, ensure};
11use client::Client;
12use fastcrypto::encoding::{Base64, Encoding};
13use query::{SuiAddress, UInt53, limits, packages};
14use sui_types::object::Object;
15use tracing::info;
16
17mod client;
18mod query;
19
20/// Ensure all packages created before `before_checkpoint` are written to the `output_dir`ectory,
21/// from the GraphQL service at `rpc_url`.
22///
23/// `output_dir` can be a path to a non-existent directory, an existing empty directory, or an
24/// existing directory written to in the past. If the path is non-existent, the invocation creates
25/// it. If the path exists but is empty, the invocation writes to the directory. If the directory
26/// has been written to in the past, the invocation picks back up where the previous invocation
27/// left off.
28pub async fn dump(
29    rpc_url: String,
30    output_dir: PathBuf,
31    before_checkpoint: Option<u64>,
32) -> Result<()> {
33    ensure_output_directory(&output_dir)?;
34
35    let client = Client::new(rpc_url)?;
36    let after_checkpoint = read_last_checkpoint(&output_dir)?;
37    let limit = max_page_size(&client).await?;
38    let (last_checkpoint, packages) =
39        fetch_packages(&client, limit, after_checkpoint, before_checkpoint).await?;
40
41    for package in &packages {
42        let SuiAddress(address) = &package.address;
43        dump_package(&output_dir, package)
44            .with_context(|| format!("Failed to dump package {address}"))?;
45    }
46
47    if let Some(last_checkpoint) = last_checkpoint {
48        write_last_checkpoint(&output_dir, last_checkpoint)?;
49    }
50
51    Ok(())
52}
53
54/// Ensure the output directory exists, either because it already exists as a writable directory, or
55/// by creating a new directory.
56fn ensure_output_directory(path: impl Into<PathBuf>) -> Result<()> {
57    let path: PathBuf = path.into();
58    if !path.exists() {
59        fs::create_dir_all(&path).context("Making output directory")?;
60        return Ok(());
61    }
62
63    ensure!(
64        path.is_dir(),
65        "Output path is not a directory: {}",
66        path.display()
67    );
68
69    let metadata = fs::metadata(&path).context("Getting metadata for output path")?;
70
71    ensure!(
72        !metadata.permissions().readonly(),
73        "Output directory is not writable: {}",
74        path.display()
75    );
76
77    Ok(())
78}
79
80/// Load the last checkpoint that was loaded by a previous run of the tool, if there is a previous
81/// run.
82fn read_last_checkpoint(output: &Path) -> Result<Option<u64>> {
83    let path = output.join("last-checkpoint");
84    if !path.exists() {
85        return Ok(None);
86    }
87
88    let content = fs::read_to_string(&path).context("Failed to read last checkpoint")?;
89    let checkpoint: u64 =
90        serde_json::from_str(&content).context("Failed to parse last checkpoint")?;
91
92    info!("Resuming download after checkpoint {checkpoint}");
93
94    Ok(Some(checkpoint))
95}
96
97/// Write the max checkpoint that we have seen a package from back to the output directory.
98fn write_last_checkpoint(output: &Path, checkpoint: u64) -> Result<()> {
99    let path = output.join("last-checkpoint");
100    let content =
101        serde_json::to_string(&checkpoint).context("Failed to serialize last checkpoint")?;
102
103    fs::write(path, content).context("Failed to write last checkpoint")?;
104    Ok(())
105}
106
107/// Read the max page size supported by the GraphQL service.
108async fn max_page_size(client: &Client) -> Result<i32> {
109    client
110        .query(limits::build())
111        .await
112        .context("Failed to fetch max page size")?
113        .service_config
114        .max_page_size
115        .context("Max page size not available for packages query")
116}
117
118/// Read all the packages between `after_checkpoint` and `before_checkpoint`, in batches of
119/// `page_size` from the `client` connected to a GraphQL service.
120///
121/// If `after_checkpoint` is not provided, packages are read from genesis. If `before_checkpoint`
122/// is not provided, packages are read until the latest checkpoint.
123///
124/// Returns the latest checkpoint that was read from in this fetch, and a list of all the packages
125/// that were read.
126async fn fetch_packages(
127    client: &Client,
128    page_size: i32,
129    after_checkpoint: Option<u64>,
130    before_checkpoint: Option<u64>,
131) -> Result<(Option<u64>, Vec<packages::MovePackage>)> {
132    let packages::Query {
133        checkpoint: checkpoint_viewed_at,
134        packages,
135    } = client
136        .query(packages::build(
137            page_size,
138            None,
139            after_checkpoint.map(UInt53),
140            before_checkpoint.map(UInt53),
141        ))
142        .await
143        .with_context(|| "Failed to fetch page 1 of packages.")?;
144
145    let packages::MovePackageConnection {
146        mut page_info,
147        mut nodes,
148    } = packages.context("Packages query returned null")?;
149
150    for i in 2.. {
151        if !page_info.has_next_page {
152            break;
153        }
154
155        let packages = client
156            .query(packages::build(
157                page_size,
158                page_info.end_cursor,
159                after_checkpoint.map(UInt53),
160                before_checkpoint.map(UInt53),
161            ))
162            .await
163            .with_context(|| format!("Failed to fetch page {i} of packages."))?
164            .packages
165            .with_context(|| format!("Packages query returned null on page {i}"))?;
166
167        nodes.extend(packages.nodes);
168        page_info = packages.page_info;
169
170        info!(
171            "Fetched page {i} ({} package{} so far).",
172            nodes.len(),
173            if nodes.len() == 1 { "" } else { "s" },
174        );
175    }
176
177    use packages::Checkpoint as C;
178    let last_checkpoint = match (checkpoint_viewed_at, before_checkpoint) {
179        (
180            Some(C {
181                sequence_number: UInt53(v),
182            }),
183            Some(b),
184        ) if b > 0 => Some(v.min(b - 1)),
185        (
186            Some(C {
187                sequence_number: UInt53(c),
188            }),
189            _,
190        )
191        | (_, Some(c)) => Some(c),
192        _ => None,
193    };
194
195    Ok((last_checkpoint, nodes))
196}
197
198/// Write out `pkg` to the `output_dir`ectory, using the package's address and name as the directory
199/// name. The following files are written for each directory:
200///
201/// - `object.bcs` -- the BCS serialized form of the `Object` type containing the package.
202///
203/// - `linkage.json` -- a JSON serialization of the package's linkage table, mapping dependency
204///   original IDs to the version of the dependency being depended on and the ID of the object
205///   on chain that contains that version.
206///
207/// - `origins.json` -- a JSON serialization of the type origin table, mapping type names contained
208///   in this package to the version of the package that first introduced that type.
209///
210/// - `*.mv` -- a BCS serialization of each compiled module in the package.
211fn dump_package(output_dir: &Path, pkg: &packages::MovePackage) -> Result<()> {
212    let Some(query::Base64(bcs)) = &pkg.object_bcs else {
213        bail!("Missing BCS");
214    };
215
216    let bytes = Base64::decode(bcs).context("Failed to decode BCS")?;
217
218    let object = bcs::from_bytes::<Object>(&bytes).context("Failed to deserialize")?;
219    let id = object.id();
220    let Some(package) = object.data.try_as_package() else {
221        bail!("Not a package");
222    };
223
224    let origins: BTreeMap<_, _> = package
225        .type_origin_table()
226        .iter()
227        .map(|o| {
228            (
229                format!("{}::{}", o.module_name, o.datatype_name),
230                o.package.to_string(),
231            )
232        })
233        .collect();
234
235    let package_dir = output_dir.join(format!("{}.{}", id, package.version().value()));
236    fs::create_dir(&package_dir).context("Failed to make output directory")?;
237
238    let linkage_json = serde_json::to_string_pretty(package.linkage_table())
239        .context("Failed to serialize linkage")?;
240    let origins_json =
241        serde_json::to_string_pretty(&origins).context("Failed to serialize type origins")?;
242
243    fs::write(package_dir.join("object.bcs"), bytes).context("Failed to write object BCS")?;
244    fs::write(package_dir.join("linkage.json"), linkage_json).context("Failed to write linkage")?;
245    fs::write(package_dir.join("origins.json"), origins_json)
246        .context("Failed to write type origins")?;
247
248    for (module_name, module_bytes) in package.serialized_module_map() {
249        let module_path = package_dir.join(format!("{module_name}.mv"));
250        fs::write(module_path, module_bytes)
251            .with_context(|| format!("Failed to write module: {module_name}"))?
252    }
253
254    Ok(())
255}