sui_package_dump/
lib.rs

1// Copyright (c) Mysten Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4use std::{
5    collections::BTreeMap,
6    fs,
7    path::{Path, PathBuf},
8};
9
10use anyhow::{Context, Result, bail, ensure};
11use client::Client;
12use fastcrypto::encoding::{Base64, Encoding};
13use query::{SuiAddress, UInt53, limits, packages};
14use sui_types::object::Object;
15use tracing::info;
16
17mod client;
18mod query;
19
20/// Ensure all packages created before `before_checkpoint` are written to the `output_dir`ectory,
21/// from the GraphQL service at `rpc_url`.
22///
23/// `output_dir` can be a path to a non-existent directory, an existing empty directory, or an
24/// existing directory written to in the past. If the path is non-existent, the invocation creates
25/// it. If the path exists but is empty, the invocation writes to the directory. If the directory
26/// has been written to in the past, the invocation picks back up where the previous invocation
27/// left off.
28pub async fn dump(
29    rpc_url: String,
30    output_dir: PathBuf,
31    before_checkpoint: Option<u64>,
32) -> Result<()> {
33    ensure_output_directory(&output_dir)?;
34
35    let client = Client::new(rpc_url)?;
36    let after_checkpoint = read_last_checkpoint(&output_dir)?;
37    let limit = max_page_size(&client).await?;
38    let (last_checkpoint, packages) =
39        fetch_packages(&client, limit, after_checkpoint, before_checkpoint).await?;
40
41    for package in &packages {
42        let SuiAddress(address) = &package.address;
43        dump_package(&output_dir, package)
44            .with_context(|| format!("Failed to dump package {address}"))?;
45    }
46
47    if let Some(last_checkpoint) = last_checkpoint {
48        write_last_checkpoint(&output_dir, last_checkpoint)?;
49    }
50
51    Ok(())
52}
53
54/// Ensure the output directory exists, either because it already exists as a writable directory, or
55/// by creating a new directory.
56fn ensure_output_directory(path: impl Into<PathBuf>) -> Result<()> {
57    let path: PathBuf = path.into();
58    if !path.exists() {
59        fs::create_dir_all(&path).context("Making output directory")?;
60        return Ok(());
61    }
62
63    ensure!(
64        path.is_dir(),
65        "Output path is not a directory: {}",
66        path.display()
67    );
68
69    let metadata = fs::metadata(&path).context("Getting metadata for output path")?;
70
71    ensure!(
72        !metadata.permissions().readonly(),
73        "Output directory is not writable: {}",
74        path.display()
75    );
76
77    Ok(())
78}
79
80/// Load the last checkpoint that was loaded by a previous run of the tool, if there is a previous
81/// run.
82fn read_last_checkpoint(output: &Path) -> Result<Option<u64>> {
83    let path = output.join("last-checkpoint");
84    if !path.exists() {
85        return Ok(None);
86    }
87
88    let content = fs::read_to_string(&path).context("Failed to read last checkpoint")?;
89    let checkpoint: u64 =
90        serde_json::from_str(&content).context("Failed to parse last checkpoint")?;
91
92    info!("Resuming download after checkpoint {checkpoint}");
93
94    Ok(Some(checkpoint))
95}
96
97/// Write the max checkpoint that we have seen a package from back to the output directory.
98fn write_last_checkpoint(output: &Path, checkpoint: u64) -> Result<()> {
99    let path = output.join("last-checkpoint");
100    let content =
101        serde_json::to_string(&checkpoint).context("Failed to serialize last checkpoint")?;
102
103    fs::write(path, content).context("Failed to write last checkpoint")?;
104    Ok(())
105}
106
107/// Read the max page size supported by the GraphQL service.
108async fn max_page_size(client: &Client) -> Result<i32> {
109    Ok(client
110        .query(limits::build())
111        .await
112        .context("Failed to fetch max page size")?
113        .service_config
114        .max_page_size)
115}
116
117/// Read all the packages between `after_checkpoint` and `before_checkpoint`, in batches of
118/// `page_size` from the `client` connected to a GraphQL service.
119///
120/// If `after_checkpoint` is not provided, packages are read from genesis. If `before_checkpoint`
121/// is not provided, packages are read until the latest checkpoint.
122///
123/// Returns the latest checkpoint that was read from in this fetch, and a list of all the packages
124/// that were read.
125async fn fetch_packages(
126    client: &Client,
127    page_size: i32,
128    after_checkpoint: Option<u64>,
129    before_checkpoint: Option<u64>,
130) -> Result<(Option<u64>, Vec<packages::MovePackage>)> {
131    let packages::Query {
132        checkpoint: checkpoint_viewed_at,
133        packages:
134            packages::MovePackageConnection {
135                mut page_info,
136                mut nodes,
137            },
138    } = client
139        .query(packages::build(
140            page_size,
141            None,
142            after_checkpoint.map(UInt53),
143            before_checkpoint.map(UInt53),
144        ))
145        .await
146        .with_context(|| "Failed to fetch page 1 of packages.")?;
147
148    for i in 2.. {
149        if !page_info.has_next_page {
150            break;
151        }
152
153        let packages = client
154            .query(packages::build(
155                page_size,
156                page_info.end_cursor,
157                after_checkpoint.map(UInt53),
158                before_checkpoint.map(UInt53),
159            ))
160            .await
161            .with_context(|| format!("Failed to fetch page {i} of packages."))?
162            .packages;
163
164        nodes.extend(packages.nodes);
165        page_info = packages.page_info;
166
167        info!(
168            "Fetched page {i} ({} package{} so far).",
169            nodes.len(),
170            if nodes.len() == 1 { "" } else { "s" },
171        );
172    }
173
174    use packages::Checkpoint as C;
175    let last_checkpoint = match (checkpoint_viewed_at, before_checkpoint) {
176        (
177            Some(C {
178                sequence_number: UInt53(v),
179            }),
180            Some(b),
181        ) if b > 0 => Some(v.min(b - 1)),
182        (
183            Some(C {
184                sequence_number: UInt53(c),
185            }),
186            _,
187        )
188        | (_, Some(c)) => Some(c),
189        _ => None,
190    };
191
192    Ok((last_checkpoint, nodes))
193}
194
195/// Write out `pkg` to the `output_dir`ectory, using the package's address and name as the directory
196/// name. The following files are written for each directory:
197///
198/// - `object.bcs` -- the BCS serialized form of the `Object` type containing the package.
199///
200/// - `linkage.json` -- a JSON serialization of the package's linkage table, mapping dependency
201///   original IDs to the version of the dependency being depended on and the ID of the object
202///   on chain that contains that version.
203///
204/// - `origins.json` -- a JSON serialization of the type origin table, mapping type names contained
205///   in this package to the version of the package that first introduced that type.
206///
207/// - `*.mv` -- a BCS serialization of each compiled module in the package.
208fn dump_package(output_dir: &Path, pkg: &packages::MovePackage) -> Result<()> {
209    let Some(query::Base64(bcs)) = &pkg.bcs else {
210        bail!("Missing BCS");
211    };
212
213    let bytes = Base64::decode(bcs).context("Failed to decode BCS")?;
214
215    let object = bcs::from_bytes::<Object>(&bytes).context("Failed to deserialize")?;
216    let id = object.id();
217    let Some(package) = object.data.try_as_package() else {
218        bail!("Not a package");
219    };
220
221    let origins: BTreeMap<_, _> = package
222        .type_origin_table()
223        .iter()
224        .map(|o| {
225            (
226                format!("{}::{}", o.module_name, o.datatype_name),
227                o.package.to_string(),
228            )
229        })
230        .collect();
231
232    let package_dir = output_dir.join(format!("{}.{}", id, package.version().value()));
233    fs::create_dir(&package_dir).context("Failed to make output directory")?;
234
235    let linkage_json = serde_json::to_string_pretty(package.linkage_table())
236        .context("Failed to serialize linkage")?;
237    let origins_json =
238        serde_json::to_string_pretty(&origins).context("Failed to serialize type origins")?;
239
240    fs::write(package_dir.join("object.bcs"), bytes).context("Failed to write object BCS")?;
241    fs::write(package_dir.join("linkage.json"), linkage_json).context("Failed to write linkage")?;
242    fs::write(package_dir.join("origins.json"), origins_json)
243        .context("Failed to write type origins")?;
244
245    for (module_name, module_bytes) in package.serialized_module_map() {
246        let module_path = package_dir.join(format!("{module_name}.mv"));
247        fs::write(module_path, module_bytes)
248            .with_context(|| format!("Failed to write module: {module_name}"))?
249    }
250
251    Ok(())
252}