sui_analytics_indexer/
schema.rs

1// Copyright (c) Mysten Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Row schema abstraction for columnar data serialization.
5//!
6//! This module provides a generic abstraction for serializing table rows to columnar
7//! formats like CSV and Parquet. The `RowSchema` trait and `ColumnValue` enum work
8//! together to enable format-agnostic row serialization.
9
10use std::borrow::Cow;
11use std::sync::Arc;
12
13use sui_types::dynamic_field::DynamicFieldType;
14use thiserror::Error;
15
16use crate::tables::InputObjectKind;
17use crate::tables::ObjectStatus;
18use crate::tables::OwnerType;
19
20/// Error type for column access operations.
21#[derive(Debug, Error)]
22pub enum ColumnError {
23    #[error("invalid column index {0}")]
24    InvalidIndex(usize),
25}
26
27/// Represents a single column value in a row.
28///
29/// This enum provides a type-safe way to represent different column value types
30/// that can be serialized to columnar formats. Uses `Cow<str>` to enable zero-copy
31/// borrowing for string fields.
32pub enum ColumnValue<'a> {
33    U64(u64),
34    Str(Cow<'a, str>),
35    Bool(bool),
36    I64(i64),
37    OptionU64(Option<u64>),
38    OptionStr(Option<Cow<'a, str>>),
39}
40
41// Copy types - dereference from reference
42impl<'a> From<&'a u64> for ColumnValue<'a> {
43    fn from(value: &'a u64) -> Self {
44        Self::U64(*value)
45    }
46}
47
48impl<'a> From<&'a i64> for ColumnValue<'a> {
49    fn from(value: &'a i64) -> Self {
50        Self::I64(*value)
51    }
52}
53
54impl<'a> From<&'a bool> for ColumnValue<'a> {
55    fn from(value: &'a bool) -> Self {
56        Self::Bool(*value)
57    }
58}
59
60impl<'a> From<&'a Option<u64>> for ColumnValue<'a> {
61    fn from(value: &'a Option<u64>) -> Self {
62        Self::OptionU64(*value)
63    }
64}
65
66// String types - zero-copy borrow
67impl<'a> From<&'a String> for ColumnValue<'a> {
68    fn from(value: &'a String) -> Self {
69        Self::Str(Cow::Borrowed(value.as_str()))
70    }
71}
72
73impl<'a> From<&'a Option<String>> for ColumnValue<'a> {
74    fn from(value: &'a Option<String>) -> Self {
75        Self::OptionStr(value.as_ref().map(|s| Cow::Borrowed(s.as_str())))
76    }
77}
78
79// Enum types - must allocate since they use Display::to_string()
80impl<'a> From<&'a OwnerType> for ColumnValue<'a> {
81    fn from(value: &'a OwnerType) -> Self {
82        Self::Str(Cow::Owned(value.to_string()))
83    }
84}
85
86impl<'a> From<&'a Option<OwnerType>> for ColumnValue<'a> {
87    fn from(value: &'a Option<OwnerType>) -> Self {
88        Self::OptionStr(value.as_ref().map(|v| Cow::Owned(v.to_string())))
89    }
90}
91
92impl<'a> From<&'a ObjectStatus> for ColumnValue<'a> {
93    fn from(value: &'a ObjectStatus) -> Self {
94        Self::Str(Cow::Owned(value.to_string()))
95    }
96}
97
98impl<'a> From<&'a Option<ObjectStatus>> for ColumnValue<'a> {
99    fn from(value: &'a Option<ObjectStatus>) -> Self {
100        Self::OptionStr(value.as_ref().map(|v| Cow::Owned(v.to_string())))
101    }
102}
103
104impl<'a> From<&'a Option<InputObjectKind>> for ColumnValue<'a> {
105    fn from(value: &'a Option<InputObjectKind>) -> Self {
106        Self::OptionStr(value.as_ref().map(|v| Cow::Owned(v.to_string())))
107    }
108}
109
110impl<'a> From<&'a DynamicFieldType> for ColumnValue<'a> {
111    fn from(value: &'a DynamicFieldType) -> Self {
112        Self::Str(Cow::Owned(value.to_string()))
113    }
114}
115
116impl<'a> From<&'a Option<DynamicFieldType>> for ColumnValue<'a> {
117    fn from(value: &'a Option<DynamicFieldType>) -> Self {
118        Self::OptionStr(value.as_ref().map(|v| Cow::Owned(v.to_string())))
119    }
120}
121
122/// Trait for types that can describe their columnar schema and provide column values.
123///
124/// This trait enables generic serialization of row types to columnar formats.
125/// Implementations are typically generated by the `SerializeRow` derive macro.
126///
127/// The trait is object-safe: `schema()` has a `Self: Sized` bound which excludes it
128/// from the vtable, while `column_count()` and `get_column()` can be called on trait objects.
129pub trait RowSchema {
130    /// Returns the column names for this row type.
131    ///
132    /// This method requires `Self: Sized` and cannot be called on trait objects.
133    /// Use `column_count()` for the number of columns when working with `dyn RowSchema`.
134    fn schema() -> &'static [&'static str]
135    where
136        Self: Sized;
137
138    /// Returns the number of columns in the schema.
139    ///
140    /// This is an object-safe alternative to `schema().len()` for use with trait objects.
141    fn column_count(&self) -> usize;
142
143    /// Returns the value at the given column index.
144    ///
145    /// Returns an error if the index is out of bounds.
146    fn get_column(&self, idx: usize) -> Result<ColumnValue<'_>, ColumnError>;
147}
148
149/// Blanket implementation for `Arc<T>` - delegates to inner type.
150/// This allows processors to return `Arc<RowType>` while still satisfying `RowSchema`.
151impl<T: RowSchema> RowSchema for Arc<T> {
152    fn schema() -> &'static [&'static str]
153    where
154        Self: Sized,
155    {
156        T::schema()
157    }
158
159    fn column_count(&self) -> usize {
160        (**self).column_count()
161    }
162
163    fn get_column(&self, idx: usize) -> Result<ColumnValue<'_>, ColumnError> {
164        (**self).get_column(idx)
165    }
166}