highlandcows_isam/
secondary_index.rs

1/// Secondary index support for `Isam<K, V>`.
2///
3/// A secondary index maps a derived key (`SK`) to the set of primary keys
4/// (`K`) whose values produce that secondary key.  One secondary key can map
5/// to many primary keys (non-unique index).
6///
7/// # Files on disk
8///
9/// Each named secondary index uses two files alongside the primary store:
10///
11/// | File | Contents |
12/// |------|----------|
13/// | `<base>_<name>.sidb` | Append-only store of serialised `Vec<K>` buckets |
14/// | `<base>_<name>.sidx` | B-tree (`SK → RecordRef`) pointing into `.sidb` |
15use std::marker::PhantomData;
16use std::path::{Path, PathBuf};
17
18use serde::de::DeserializeOwned;
19use serde::Serialize;
20
21use crate::error::IsamResult;
22use crate::index::BTree;
23use crate::store::DataStore;
24
25// ── DeriveKey ─────────────────────────────────────────────────────────────── //
26
27/// Describes how to derive a secondary index key from a record value.
28///
29/// Implement this trait on a marker struct, one per secondary index.
30/// For composite indices in the future, set `Key` to a tuple type —
31/// no change to this trait is required.
32///
33/// # Example
34/// ```
35/// use serde::{Serialize, Deserialize};
36/// use highlandcows_isam::DeriveKey;
37///
38/// #[derive(Serialize, Deserialize, Clone)]
39/// struct User { name: String, city: String }
40///
41/// struct CityIndex;
42/// impl DeriveKey<User> for CityIndex {
43///     type Key = String;
44///     fn derive(value: &User) -> String { value.city.clone() }
45/// }
46/// ```
47pub trait DeriveKey<V>: Send + Sync + 'static {
48    /// The type of the derived secondary key.
49    type Key: Serialize + DeserializeOwned + Ord + Clone + Send;
50
51    /// Derive the secondary key from a value.
52    fn derive(value: &V) -> Self::Key;
53}
54
55// ── AnySecondaryIndex ─────────────────────────────────────────────────────── //
56
57/// Type-erased secondary index interface stored inside `IsamStorage`.
58///
59/// All methods receive the primary key and deserialized value so the concrete
60/// implementation can extract `SK` without the storage layer knowing about it.
61pub(crate) trait AnySecondaryIndex<K, V>: Send {
62    // ── Forward operations (called during CRUD) ───────────────────────── //
63
64    fn on_insert(&mut self, key: &K, value: &V) -> IsamResult<()>;
65    fn on_update(&mut self, key: &K, old_value: &V, new_value: &V) -> IsamResult<()>;
66    fn on_delete(&mut self, key: &K, value: &V) -> IsamResult<()>;
67
68    // ── Inverse operations (called during rollback) ───────────────────── //
69
70    fn undo_insert(&mut self, key: &K, value: &V) -> IsamResult<()>;
71    fn undo_update(&mut self, key: &K, old_value: &V, new_value: &V) -> IsamResult<()>;
72    fn undo_delete(&mut self, key: &K, value: &V) -> IsamResult<()>;
73
74    /// Return all primary keys whose secondary key serialises to `sk_bytes`.
75    fn lookup_primary_keys(&mut self, sk_bytes: &[u8]) -> IsamResult<Vec<K>>;
76
77    fn fsync(&mut self) -> IsamResult<()>;
78    fn name(&self) -> &str;
79}
80
81// ── SecondaryIndexImpl ────────────────────────────────────────────────────── //
82
83/// Concrete secondary index backed by a `DataStore` + `BTree<SK>` pair.
84///
85/// The data store holds serialised `Vec<K>` buckets (one per distinct SK value).
86/// The B-tree maps each SK to the `RecordRef` of its current bucket in the store.
87pub(crate) struct SecondaryIndexImpl<K, V, E>
88where
89    E: DeriveKey<V>,
90{
91    name: String,
92    store: DataStore,
93    btree: BTree<E::Key>,
94    _phantom: PhantomData<(K, V)>,
95}
96
97impl<K, V, E> SecondaryIndexImpl<K, V, E>
98where
99    K: Serialize + DeserializeOwned + Ord + Clone + Send,
100    V: Send,
101    E: DeriveKey<V>,
102{
103    /// Create new secondary index files for `name` alongside `base`.
104    pub(crate) fn create(name: &str, base: &Path) -> IsamResult<Self> {
105        Ok(Self {
106            name: name.to_owned(),
107            store: DataStore::create(&sidb_path(base, name))?,
108            btree: BTree::create(&sidx_path(base, name))?,
109            _phantom: PhantomData,
110        })
111    }
112
113    /// Open existing secondary index files for `name` alongside `base`.
114    pub(crate) fn open(name: &str, base: &Path) -> IsamResult<Self> {
115        Ok(Self {
116            name: name.to_owned(),
117            store: DataStore::open(&sidb_path(base, name))?,
118            btree: BTree::open(&sidx_path(base, name))?,
119            _phantom: PhantomData,
120        })
121    }
122
123    /// Open existing files if present, otherwise create new ones.
124    pub(crate) fn create_or_open(name: &str, base: &Path) -> IsamResult<Self> {
125        if sidb_path(base, name).exists() {
126            Self::open(name, base)
127        } else {
128            Self::create(name, base)
129        }
130    }
131
132    // ── Private helpers ───────────────────────────────────────────────── //
133
134    /// Read the current primary-key bucket for `sk`, or an empty vec.
135    fn read_pks(&mut self, sk: &E::Key) -> IsamResult<Vec<K>> {
136        match self.btree.search(sk)? {
137            None => Ok(Vec::new()),
138            Some(rec) => self.store.read_value(rec),
139        }
140    }
141
142    /// Write (append + update index) a primary-key bucket for `sk`.
143    fn write_pks(&mut self, sk: &E::Key, pks: &[K]) -> IsamResult<()> {
144        let exists = self.btree.search(sk)?.is_some();
145        let rec = self.store.append(sk, &pks)?;
146        if exists {
147            self.btree.update(sk, rec)?;
148        } else {
149            self.btree.insert(sk, rec)?;
150        }
151        Ok(())
152    }
153
154    /// Add `pk` to the bucket for `sk` (no-op if already present).
155    fn add_pk(&mut self, sk: &E::Key, pk: &K) -> IsamResult<()> {
156        let mut pks = self.read_pks(sk)?;
157        if !pks.contains(pk) {
158            pks.push(pk.clone());
159            self.write_pks(sk, &pks)?;
160        }
161        Ok(())
162    }
163
164    /// Remove `pk` from the bucket for `sk`.  Deletes the bucket when empty.
165    fn remove_pk(&mut self, sk: &E::Key, pk: &K) -> IsamResult<()> {
166        let mut pks = self.read_pks(sk)?;
167        pks.retain(|k| k != pk);
168        if pks.is_empty() {
169            // Ignore KeyNotFound — bucket may already be absent.
170            let _ = self.btree.delete(sk);
171        } else {
172            self.write_pks(sk, &pks)?;
173        }
174        Ok(())
175    }
176}
177
178impl<K, V, E> AnySecondaryIndex<K, V> for SecondaryIndexImpl<K, V, E>
179where
180    K: Serialize + DeserializeOwned + Ord + Clone + Send,
181    V: Send,
182    E: DeriveKey<V>,
183{
184    fn on_insert(&mut self, key: &K, value: &V) -> IsamResult<()> {
185        let sk = E::derive(value);
186        self.add_pk(&sk, key)
187    }
188
189    fn on_update(&mut self, key: &K, old_value: &V, new_value: &V) -> IsamResult<()> {
190        let old_sk = E::derive(old_value);
191        let new_sk = E::derive(new_value);
192        if old_sk != new_sk {
193            self.remove_pk(&old_sk, key)?;
194            self.add_pk(&new_sk, key)?;
195        }
196        Ok(())
197    }
198
199    fn on_delete(&mut self, key: &K, value: &V) -> IsamResult<()> {
200        let sk = E::derive(value);
201        self.remove_pk(&sk, key)
202    }
203
204    // Undo operations are exact inverses of the forward operations.
205
206    fn undo_insert(&mut self, key: &K, value: &V) -> IsamResult<()> {
207        self.on_delete(key, value)
208    }
209
210    fn undo_update(&mut self, key: &K, old_value: &V, new_value: &V) -> IsamResult<()> {
211        // Swap old/new to reverse the direction.
212        self.on_update(key, new_value, old_value)
213    }
214
215    fn undo_delete(&mut self, key: &K, value: &V) -> IsamResult<()> {
216        self.on_insert(key, value)
217    }
218
219    fn lookup_primary_keys(&mut self, sk_bytes: &[u8]) -> IsamResult<Vec<K>> {
220        let sk: E::Key = bincode::deserialize(sk_bytes)?;
221        self.read_pks(&sk)
222    }
223
224    fn fsync(&mut self) -> IsamResult<()> {
225        self.store.fsync()?;
226        self.btree.fsync()
227    }
228
229    fn name(&self) -> &str {
230        &self.name
231    }
232}
233
234// ── Path helpers ──────────────────────────────────────────────────────────── //
235
236pub(crate) fn sidb_path(base: &Path, name: &str) -> PathBuf {
237    let parent = base.parent().unwrap_or(Path::new(""));
238    let stem = base.file_stem().unwrap_or_default().to_string_lossy();
239    parent.join(format!("{stem}_{name}.sidb"))
240}
241
242pub(crate) fn sidx_path(base: &Path, name: &str) -> PathBuf {
243    let parent = base.parent().unwrap_or(Path::new(""));
244    let stem = base.file_stem().unwrap_or_default().to_string_lossy();
245    parent.join(format!("{stem}_{name}.sidx"))
246}