Skip to main content

highlandcows_isam/
secondary_index.rs

1/// Secondary index support for `Isam<K, V>`.
2///
3/// A secondary index maps a derived key (`SK`) to the set of primary keys
4/// (`K`) whose values produce that secondary key.  One secondary key can map
5/// to many primary keys (non-unique index).
6///
7/// # Files on disk
8///
9/// Each named secondary index uses two files alongside the primary store:
10///
11/// | File | Contents |
12/// |------|----------|
13/// | `<base>_<name>.sidb` | Append-only store of serialised `Vec<K>` buckets |
14/// | `<base>_<name>.sidx` | B-tree (`SK → RecordRef`) pointing into `.sidb` |
15use std::marker::PhantomData;
16use std::path::{Path, PathBuf};
17
18use serde::de::DeserializeOwned;
19use serde::Serialize;
20
21use crate::error::IsamResult;
22use crate::index::BTree;
23use crate::store::DataStore;
24
25// ── DeriveKey ─────────────────────────────────────────────────────────────── //
26
27/// Describes how to derive a secondary index key from a record value.
28///
29/// Implement this trait on a marker struct, one per secondary index.
30/// For composite indices in the future, set `Key` to a tuple type —
31/// no change to this trait is required.
32///
33/// # Example
34/// ```
35/// use serde::{Serialize, Deserialize};
36/// use highlandcows_isam::DeriveKey;
37///
38/// #[derive(Serialize, Deserialize, Clone)]
39/// struct User { name: String, city: String }
40///
41/// struct CityIndex;
42/// impl DeriveKey<User> for CityIndex {
43///     type Key = String;
44///     fn derive(value: &User) -> String { value.city.clone() }
45/// }
46/// ```
47pub trait DeriveKey<V>: Send + Sync + 'static {
48    /// The type of the derived secondary key.
49    type Key: Serialize + DeserializeOwned + Ord + Clone + Send;
50
51    /// Derive the secondary key from a value.
52    fn derive(value: &V) -> Self::Key;
53}
54
55// ── AnySecondaryIndex ─────────────────────────────────────────────────────── //
56
57/// Type-erased secondary index interface stored inside `IsamStorage`.
58///
59/// All methods receive the primary key and deserialized value so the concrete
60/// implementation can extract `SK` without the storage layer knowing about it.
61pub(crate) trait AnySecondaryIndex<K, V>: Send {
62    // ── Forward operations (called during CRUD) ───────────────────────── //
63
64    fn on_insert(&mut self, key: &K, value: &V) -> IsamResult<()>;
65    fn on_update(&mut self, key: &K, old_value: &V, new_value: &V) -> IsamResult<()>;
66    fn on_delete(&mut self, key: &K, value: &V) -> IsamResult<()>;
67
68    // ── Inverse operations (called during rollback) ───────────────────── //
69
70    fn undo_insert(&mut self, key: &K, value: &V) -> IsamResult<()>;
71    fn undo_update(&mut self, key: &K, old_value: &V, new_value: &V) -> IsamResult<()>;
72    fn undo_delete(&mut self, key: &K, value: &V) -> IsamResult<()>;
73
74    /// Return all primary keys whose secondary key serialises to `sk_bytes`.
75    fn lookup_primary_keys(&mut self, sk_bytes: &[u8]) -> IsamResult<Vec<K>>;
76
77    fn fsync(&mut self) -> IsamResult<()>;
78    fn name(&self) -> &str;
79
80    /// Return the fully-qualified type name of the `DeriveKey` extractor.
81    ///
82    /// Uses [`std::any::type_name`] — suitable for display, not for persistent
83    /// storage (the value can change between compiler versions or refactors).
84    fn extractor_type_name(&self) -> &'static str;
85
86    /// Return the index schema version stored in the `.sidx` metadata.
87    fn stored_schema_version(&self) -> u32;
88
89    /// Write `version` into the `.sidx` metadata as the index schema version.
90    fn persist_schema_version(&mut self, version: u32) -> IsamResult<()>;
91
92    /// Discard all index data and recreate fresh, empty index files in place.
93    ///
94    /// Used by `migrate_index` to clear the index before repopulating it.
95    fn reset(&mut self) -> IsamResult<()>;
96}
97
98// ── SecondaryIndexImpl ────────────────────────────────────────────────────── //
99
100/// Concrete secondary index backed by a `DataStore` + `BTree<SK>` pair.
101///
102/// The data store holds serialised `Vec<K>` buckets (one per distinct SK value).
103/// The B-tree maps each SK to the `RecordRef` of its current bucket in the store.
104pub(crate) struct SecondaryIndexImpl<K, V, E>
105where
106    E: DeriveKey<V>,
107{
108    name: String,
109    base: PathBuf,
110    store: DataStore,
111    btree: BTree<E::Key>,
112    _phantom: PhantomData<(K, V)>,
113}
114
115impl<K, V, E> SecondaryIndexImpl<K, V, E>
116where
117    K: Serialize + DeserializeOwned + Ord + Clone + Send,
118    V: Send,
119    E: DeriveKey<V>,
120{
121    /// Create new secondary index files for `name` alongside `base`.
122    pub(crate) fn create(name: &str, base: &Path) -> IsamResult<Self> {
123        Ok(Self {
124            name: name.to_owned(),
125            base: base.to_path_buf(),
126            store: DataStore::create(&sidb_path(base, name))?,
127            btree: BTree::create(&sidx_path(base, name))?,
128            _phantom: PhantomData,
129        })
130    }
131
132    /// Open existing secondary index files for `name` alongside `base`.
133    pub(crate) fn open(name: &str, base: &Path) -> IsamResult<Self> {
134        Ok(Self {
135            name: name.to_owned(),
136            base: base.to_path_buf(),
137            store: DataStore::open(&sidb_path(base, name))?,
138            btree: BTree::open(&sidx_path(base, name))?,
139            _phantom: PhantomData,
140        })
141    }
142
143    /// Open existing files if present, otherwise create new ones.
144    pub(crate) fn create_or_open(name: &str, base: &Path) -> IsamResult<Self> {
145        if sidb_path(base, name).exists() {
146            Self::open(name, base)
147        } else {
148            Self::create(name, base)
149        }
150    }
151
152    // ── Private helpers ───────────────────────────────────────────────── //
153
154    /// Read the current primary-key bucket for `sk`, or an empty vec.
155    fn read_pks(&mut self, sk: &E::Key) -> IsamResult<Vec<K>> {
156        match self.btree.search(sk)? {
157            None => Ok(Vec::new()),
158            Some(rec) => self.store.read_value(rec),
159        }
160    }
161
162    /// Write (append + update index) a primary-key bucket for `sk`.
163    fn write_pks(&mut self, sk: &E::Key, pks: &[K]) -> IsamResult<()> {
164        let exists = self.btree.search(sk)?.is_some();
165        let rec = self.store.append(sk, &pks)?;
166        if exists {
167            self.btree.update(sk, rec)?;
168        } else {
169            self.btree.insert(sk, rec)?;
170        }
171        Ok(())
172    }
173
174    /// Add `pk` to the bucket for `sk` (no-op if already present).
175    fn add_pk(&mut self, sk: &E::Key, pk: &K) -> IsamResult<()> {
176        let mut pks = self.read_pks(sk)?;
177        if !pks.contains(pk) {
178            pks.push(pk.clone());
179            self.write_pks(sk, &pks)?;
180        }
181        Ok(())
182    }
183
184    /// Remove `pk` from the bucket for `sk`.  Deletes the bucket when empty.
185    fn remove_pk(&mut self, sk: &E::Key, pk: &K) -> IsamResult<()> {
186        let mut pks = self.read_pks(sk)?;
187        pks.retain(|k| k != pk);
188        if pks.is_empty() {
189            // Ignore KeyNotFound — bucket may already be absent.
190            let _ = self.btree.delete(sk);
191        } else {
192            self.write_pks(sk, &pks)?;
193        }
194        Ok(())
195    }
196}
197
198impl<K, V, E> AnySecondaryIndex<K, V> for SecondaryIndexImpl<K, V, E>
199where
200    K: Serialize + DeserializeOwned + Ord + Clone + Send,
201    V: Send,
202    E: DeriveKey<V>,
203{
204    fn on_insert(&mut self, key: &K, value: &V) -> IsamResult<()> {
205        let sk = E::derive(value);
206        self.add_pk(&sk, key)
207    }
208
209    fn on_update(&mut self, key: &K, old_value: &V, new_value: &V) -> IsamResult<()> {
210        let old_sk = E::derive(old_value);
211        let new_sk = E::derive(new_value);
212        if old_sk != new_sk {
213            self.remove_pk(&old_sk, key)?;
214            self.add_pk(&new_sk, key)?;
215        }
216        Ok(())
217    }
218
219    fn on_delete(&mut self, key: &K, value: &V) -> IsamResult<()> {
220        let sk = E::derive(value);
221        self.remove_pk(&sk, key)
222    }
223
224    // Undo operations are exact inverses of the forward operations.
225
226    fn undo_insert(&mut self, key: &K, value: &V) -> IsamResult<()> {
227        self.on_delete(key, value)
228    }
229
230    fn undo_update(&mut self, key: &K, old_value: &V, new_value: &V) -> IsamResult<()> {
231        // Swap old/new to reverse the direction.
232        self.on_update(key, new_value, old_value)
233    }
234
235    fn undo_delete(&mut self, key: &K, value: &V) -> IsamResult<()> {
236        self.on_insert(key, value)
237    }
238
239    fn lookup_primary_keys(&mut self, sk_bytes: &[u8]) -> IsamResult<Vec<K>> {
240        let sk: E::Key = bincode::deserialize(sk_bytes)?;
241        self.read_pks(&sk)
242    }
243
244    fn fsync(&mut self) -> IsamResult<()> {
245        self.store.fsync()?;
246        self.btree.fsync()
247    }
248
249    fn name(&self) -> &str {
250        &self.name
251    }
252
253    fn extractor_type_name(&self) -> &'static str {
254        std::any::type_name::<E>()
255    }
256
257    fn stored_schema_version(&self) -> u32 {
258        self.btree.index_schema_version()
259    }
260
261    fn persist_schema_version(&mut self, version: u32) -> IsamResult<()> {
262        self.btree.set_index_schema_version(version)
263    }
264
265    fn reset(&mut self) -> IsamResult<()> {
266        self.store = DataStore::create(&sidb_path(&self.base, &self.name))?;
267        self.btree = BTree::create(&sidx_path(&self.base, &self.name))?;
268        Ok(())
269    }
270}
271
272// ── Path helpers ──────────────────────────────────────────────────────────── //
273
274pub(crate) fn sidb_path(base: &Path, name: &str) -> PathBuf {
275    let parent = base.parent().unwrap_or(Path::new(""));
276    let stem = base.file_stem().unwrap_or_default().to_string_lossy();
277    parent.join(format!("{stem}_{name}.sidb"))
278}
279
280pub(crate) fn sidx_path(base: &Path, name: &str) -> PathBuf {
281    let parent = base.parent().unwrap_or(Path::new(""));
282    let stem = base.file_stem().unwrap_or_default().to_string_lossy();
283    parent.join(format!("{stem}_{name}.sidx"))
284}