From 1a060f20bf183f6654c9f129d6280112764db92f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Tue, 23 Jun 2026 16:26:58 -0300 Subject: [PATCH 01/17] feat(storage): diff-layer state storage with an in-memory LRU cache Store every non-genesis state as a parent-linked StateDiff (StateDiffs, never pruned) plus a full snapshot (States) written only at 1024-slot anchors (and the bootstrap). Neither table is ever pruned, so the full state history is preserved cheaply. - get_state returns an anchor snapshot directly, else reconstructs by walking base_root back to the nearest anchor and replaying the appended historical_block_hashes tails; config/validators come from the snapshot and latest_block_header from BlockHeaders. - Reconstructed and freshly imported states are memoized in an in-memory LRU (STATE_CACHE_CAPACITY), keyed by block root. States are immutable per root, so the cache never needs invalidation; it keeps recent reads (e.g. a child block's parent state) hot without reconstruction. - DiffBase captures the parent (root, hbh_len, slot) before it is consumed into the post-state. StateDiff/DiffBase live in the storage crate. - No snapshot eviction and no StateAnchors table: anchors are simply the snapshots in States, so the prune-states scan is gone entirely. --- CLAUDE.md | 10 +- Cargo.lock | 2 + Cargo.toml | 1 + crates/blockchain/src/store.rs | 15 +- crates/storage/Cargo.toml | 3 + crates/storage/src/api/tables.rs | 13 +- crates/storage/src/backend/rocksdb.rs | 12 +- crates/storage/src/lib.rs | 2 + crates/storage/src/state_diff.rs | 267 +++++++++++++ crates/storage/src/store.rs | 529 ++++++++++++++------------ 10 files changed, 586 insertions(+), 268 deletions(-) create mode 100644 crates/storage/src/state_diff.rs diff --git a/CLAUDE.md b/CLAUDE.md index 5b10c6dd..6714ada2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -366,17 +366,23 @@ cargo test -p ethlambda-blockchain --test forkchoice_spectests -- --test-threads finalized boundary, signatures are pruned (`prune_old_block_signatures`) while headers and bodies are kept forever. `get_signed_block` returns `None` for a pruned finalized block +- States are stored as parent-linked diffs (`StateDiffs`, never pruned) plus + full-state snapshots (`States`) written only at 1024-slot anchors (and the + bootstrap). Neither is ever pruned. `get_state` returns an anchor snapshot or + reconstructs by walking diffs back to the nearest anchor; results are memoized + in an in-memory LRU (`STATE_CACHE_CAPACITY`) so recent reads stay hot - `LiveChain` table provides fast `(slot||root) → parent_root` index for fork choice - Storage uses trait-based API: `StorageBackend` → `StorageReadView` (reads) + `StorageWriteBatch` (atomic writes) -### Storage Tables (10) +### Storage Tables (11) | Table | Key → Value | Purpose | |-------|-------------|---------| | `BlockHeaders` | H256 → BlockHeader | Block headers by root | | `BlockBodies` | H256 → BlockBody | Block bodies (empty for genesis) | | `BlockSignatures` | H256 → BlockSignatures | Signatures (absent for genesis) | -| `States` | H256 → State | Beacon states by root | +| `States` | H256 → State | Full-state snapshots at anchors only | +| `StateDiffs` | H256 → StateDiff | Parent-linked state diff per non-genesis state | | `LatestKnownAttestations` | u64 → AttestationData | Fork-choice-active attestations | | `LatestNewAttestations` | u64 → AttestationData | Pending (pre-promotion) attestations | | `GossipSignatures` | SignatureKey → ValidatorSignature | Individual validator signatures | diff --git a/Cargo.lock b/Cargo.lock index 764d89b0..5c0ed65f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2170,6 +2170,8 @@ dependencies = [ "leansig", "libssz", "libssz-derive", + "libssz-types", + "lru", "rand 0.10.1", "rocksdb", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index 2b20d590..100b1a4a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,7 @@ vergen-git2 = { version = "9", features = ["rustc"] } rayon = "1.11" rand = "0.10" +lru = "0.16" rocksdb = "0.24" libc = "0.2" reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } diff --git a/crates/blockchain/src/store.rs b/crates/blockchain/src/store.rs index 8f7807d2..1b45e220 100644 --- a/crates/blockchain/src/store.rs +++ b/crates/blockchain/src/store.rs @@ -1,7 +1,7 @@ use std::collections::HashSet; use ethlambda_state_transition::{is_proposer, slot_is_justifiable_after}; -use ethlambda_storage::{ForkCheckpoints, Store}; +use ethlambda_storage::{DiffBase, ForkCheckpoints, Store}; use ethlambda_types::{ ShortRoot, attestation::{ @@ -556,6 +556,10 @@ fn on_block_core( let block = signed_block.message.clone(); + // Capture the diff base before the parent is consumed into the post-state + // (avoids cloning the multi-MB historical_block_hashes list). + let diff_base = DiffBase::from_state(block.parent_root, &parent_state); + // Execute state transition function to compute post-block state let state_transition_start = std::time::Instant::now(); let mut post_state = parent_state; @@ -576,9 +580,9 @@ fn on_block_core( store.update_checkpoints(ForkCheckpoints::new(store.head(), Some(justified), None)); } - // Store signed block and state + // Store signed block and state (as a parent-linked diff + snapshot) store.insert_signed_block(block_root, signed_block.clone()); - store.insert_state(block_root, post_state); + store.insert_state_with_diff(block_root, diff_base, post_state); for att in block.body.attestations.iter() { // Count each participating validator as a valid attestation. @@ -1251,7 +1255,10 @@ mod tests { let head_justified = Checkpoint { root: a, slot: 1 }; let mut head_state = State::from_genesis(1000, vec![]); head_state.latest_justified = head_justified; - store.insert_state(b, head_state); + // Persist `b`'s post-state via the diff API (parented on `a`); a full + // snapshot is written, so `get_state(b)` returns it directly. + let diff_base = DiffBase::from_state(a, &head_state); + store.insert_state_with_diff(b, diff_base, head_state); // Store's global justified latched onto a higher, off-head checkpoint, // as it would after a minority fork justified a slot the head never saw. diff --git a/crates/storage/Cargo.toml b/crates/storage/Cargo.toml index f5b2ca58..7381a53e 100644 --- a/crates/storage/Cargo.toml +++ b/crates/storage/Cargo.toml @@ -18,6 +18,9 @@ thiserror.workspace = true libssz.workspace = true libssz-derive.workspace = true +libssz-types.workspace = true + +lru.workspace = true [dev-dependencies] tempfile = "3" diff --git a/crates/storage/src/api/tables.rs b/crates/storage/src/api/tables.rs index 5884f1f9..dcda1cbf 100644 --- a/crates/storage/src/api/tables.rs +++ b/crates/storage/src/api/tables.rs @@ -11,7 +11,16 @@ pub enum Table { /// All other blocks must have an entry in this table. BlockSignatures, /// State storage: H256 -> State + /// + /// Holds full-state snapshots only: the bootstrap anchor plus one anchor per + /// 1024-slot window. Never pruned. Non-anchor states live in `StateDiffs` and + /// are reconstructed on demand (memoized by an in-memory cache). States, + /// State diffs: H256 -> StateDiff + /// + /// Parent-linked diff written for every non-genesis state. Never pruned, so + /// it preserves full state history. See `get_state` for reconstruction. + StateDiffs, /// Metadata: string keys -> various scalar values Metadata, /// Live chain index: (slot || root) -> parent_root @@ -23,11 +32,12 @@ pub enum Table { } /// All table variants. -pub const ALL_TABLES: [Table; 6] = [ +pub const ALL_TABLES: [Table; 7] = [ Table::BlockHeaders, Table::BlockBodies, Table::BlockSignatures, Table::States, + Table::StateDiffs, Table::Metadata, Table::LiveChain, ]; @@ -40,6 +50,7 @@ impl Table { Table::BlockBodies => "block_bodies", Table::BlockSignatures => "block_signatures", Table::States => "states", + Table::StateDiffs => "state_diffs", Table::Metadata => "metadata", Table::LiveChain => "live_chain", } diff --git a/crates/storage/src/backend/rocksdb.rs b/crates/storage/src/backend/rocksdb.rs index e278c8fe..58f6119f 100644 --- a/crates/storage/src/backend/rocksdb.rs +++ b/crates/storage/src/backend/rocksdb.rs @@ -11,15 +11,11 @@ use std::path::Path; use std::sync::Arc; /// Returns the column family name for a table. +/// +/// Delegates to [`Table::name`] so the CF name and the metrics label share a +/// single source of truth (and a new table only needs one mapping). fn cf_name(table: Table) -> &'static str { - match table { - Table::BlockHeaders => "block_headers", - Table::BlockBodies => "block_bodies", - Table::BlockSignatures => "block_signatures", - Table::States => "states", - Table::Metadata => "metadata", - Table::LiveChain => "live_chain", - } + table.name() } /// RocksDB storage backend. diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs index de5f20df..13b6410d 100644 --- a/crates/storage/src/lib.rs +++ b/crates/storage/src/lib.rs @@ -1,7 +1,9 @@ mod api; pub mod backend; mod error; +mod state_diff; mod store; pub use api::{ALL_TABLES, StorageBackend, StorageReadView, StorageWriteBatch, Table}; +pub use state_diff::DiffBase; pub use store::{ForkCheckpoints, GetForkchoiceStoreError, MAX_RESUMABLE_DB_STATE_AGE, Store}; diff --git a/crates/storage/src/state_diff.rs b/crates/storage/src/state_diff.rs new file mode 100644 index 00000000..3714b516 --- /dev/null +++ b/crates/storage/src/state_diff.rs @@ -0,0 +1,267 @@ +//! Parent-linked state diffs for diff-layer state storage. +//! +//! A [`StateDiff`] captures the change from a base state (the parent block's +//! post-state) to a target state, storing only what cannot be recovered from a +//! snapshot plus the parent relationship. +//! +//! Field handling: +//! - `config`, `validators`: never change; omitted (taken from the snapshot). +//! - `latest_block_header`: omitted; reconstructed from the `BlockHeaders` table. +//! - `historical_block_hashes`: pure-append in the STF, so only the appended +//! tail (`hbh_appended`) is stored. +//! - everything else: stored verbatim (the justification fields are bounded by +//! the non-finalized window, so they stay small under healthy finality). + +use ethlambda_types::{ + block::BlockHeader, + checkpoint::Checkpoint, + primitives::H256, + state::{ + HISTORICAL_ROOTS_LIMIT, JustificationRoots, JustificationValidators, JustifiedSlots, State, + }, +}; +use libssz_derive::{SszDecode, SszEncode}; +use libssz_types::SszList; + +/// Appended tail of `historical_block_hashes`, bounded by the same limit as the +/// full list. +pub type HistoricalBlockHashesTail = SszList; + +/// Describes the parent state a new state's diff is built against. +/// +/// Captured by the caller before the parent is consumed into the post-state, so +/// the store can build the diff and decide anchoring without re-reading it. +/// Construct via [`DiffBase::from_state`]; fields are crate-internal. +pub struct DiffBase { + /// Block root of the parent state (the diff's `base_root`). + pub(crate) root: H256, + /// Parent state's `historical_block_hashes` length. + pub(crate) hbh_len: usize, + /// Parent state's slot (used for the anchor-boundary check). + pub(crate) slot: u64, +} + +impl DiffBase { + /// Build the diff base from the parent state and its block root. + /// + /// `root` is the parent block root (the child's `parent_root`), passed in + /// since the caller already has it; `hbh_len` and `slot` are read from + /// `state`. Call this before the parent is consumed into the child. + pub fn from_state(root: H256, state: &State) -> Self { + Self { + root, + hbh_len: state.historical_block_hashes.len(), + slot: state.slot, + } + } +} + +/// The change from a base (parent) state to a target state. +/// +/// Reconstruct the target with [`StateDiff`] applied against the nearest +/// ancestor snapshot; see the storage layer's `get_state` for the walk. +#[derive(Debug, Clone, PartialEq, Eq, SszEncode, SszDecode)] +pub struct StateDiff { + /// Block root of the base state this diff is relative to (`block.parent_root`). + pub base_root: H256, + /// Target state's slot. + pub slot: u64, + /// Target state's latest justified checkpoint. + pub latest_justified: Checkpoint, + /// Target state's latest finalized checkpoint. + pub latest_finalized: Checkpoint, + /// Target state's `justified_slots` (stored in full). + pub justified_slots: JustifiedSlots, + /// Target state's `justifications_roots` (stored in full). + pub justifications_roots: JustificationRoots, + /// Target state's `justifications_validators` (stored in full). + pub justifications_validators: JustificationValidators, + /// Elements appended to `historical_block_hashes` relative to the base. + pub hbh_appended: HistoricalBlockHashesTail, +} + +impl StateDiff { + /// Build a diff from a consumed target state against a base identified by its + /// `historical_block_hashes` length. + /// + /// Takes `target` by value so the multi-MB justification fields are moved + /// into the diff rather than cloned. On the block-import path the base state + /// has already been consumed into `target`, so only its length is retained; + /// `base_hbh_len` is that length. + /// + /// # Assumptions about how the base is modified into the target + /// + /// The diff stores only part of `target` and is lossless *only* because the + /// state transition changes the base (parent) state in a restricted way. + /// `reconstruct` depends on each of these; a future STF that broke one would + /// make reconstructed states silently wrong, not just fail: + /// + /// - **`config` and `validators` are unchanged from base to target.** They + /// are not stored in the diff; reconstruction takes them from the nearest + /// ancestor snapshot. (The lean STF never mutates either: `validators` is + /// fixed at genesis and `config` is static.) + /// - **`historical_block_hashes` only grows by appending.** The base's list + /// is a prefix of the target's, so only the appended tail + /// (`target[base_hbh_len..]`) is stored and the earlier entries are never + /// reordered or rewritten. (`process_slots` pushes the parent root and + /// zero-fills skipped slots, leaving the existing prefix intact.) This is + /// why `base_hbh_len` alone is enough to identify the base's contribution. + /// - **`latest_block_header` is not stored here.** It is read back from the + /// `BlockHeaders` table during reconstruction; the persisted post-state + /// caches the real `state_root` there, so the two are byte-identical. + /// + /// All remaining fields (`slot`, both checkpoints, and the three + /// justification fields) are captured verbatim, so the diff makes no + /// assumption about how those change. + /// + /// # Panics + /// + /// Panics if `target.historical_block_hashes` is shorter than `base_hbh_len`, + /// i.e. the append-only assumption above was violated. + pub fn from_base(base_root: H256, base_hbh_len: usize, target: State) -> Self { + let State { + slot, + latest_justified, + latest_finalized, + historical_block_hashes, + justified_slots, + justifications_roots, + justifications_validators, + .. + } = target; + + let hbh = historical_block_hashes.into_inner(); + assert!( + hbh.len() >= base_hbh_len, + "target historical_block_hashes shorter than base: {} < {base_hbh_len}", + hbh.len() + ); + let hbh_appended = HistoricalBlockHashesTail::try_from(hbh[base_hbh_len..].to_vec()) + .expect("appended tail cannot exceed HISTORICAL_ROOTS_LIMIT"); + + Self { + base_root, + slot, + latest_justified, + latest_finalized, + justified_slots, + justifications_roots, + justifications_validators, + hbh_appended, + } + } +} + +/// Rebuild a state from a base snapshot and the diffs leading to the target. +/// +/// `diffs` are ordered from the snapshot's child up to the target (inclusive, +/// non-empty). `latest_block_header` is the target's header (kept in the +/// `BlockHeaders` table rather than the diff). `config`/`validators` come from +/// `snapshot` (they never change), `historical_block_hashes` is replayed from +/// the appended tails, and the remaining fields come from the last diff. +/// +/// # Panics +/// +/// Panics if `diffs` is empty. +pub(crate) fn reconstruct( + snapshot: State, + diffs: &[StateDiff], + latest_block_header: BlockHeader, +) -> State { + let target = diffs + .last() + .expect("reconstruct requires at least one diff"); + + let mut hbh: Vec = snapshot.historical_block_hashes.to_vec(); + for diff in diffs { + hbh.extend_from_slice(&diff.hbh_appended); + } + let historical_block_hashes = hbh + .try_into() + .expect("reconstructed historical_block_hashes within limit"); + + State { + config: snapshot.config, + slot: target.slot, + latest_block_header, + latest_justified: target.latest_justified, + latest_finalized: target.latest_finalized, + historical_block_hashes, + justified_slots: target.justified_slots.clone(), + validators: snapshot.validators, + justifications_roots: target.justifications_roots.clone(), + justifications_validators: target.justifications_validators.clone(), + } +} + +#[cfg(test)] +mod tests { + use ethlambda_types::state::{State, Validator}; + use libssz::{SszDecode, SszEncode}; + + use super::*; + + fn h256(byte: u8) -> H256 { + H256::from([byte; 32]) + } + + /// A minimal genesis-like base state with two validators. + fn base_state() -> State { + let validators = vec![ + Validator { + attestation_pubkey: [1u8; 52], + proposal_pubkey: [2u8; 52], + index: 0, + }, + Validator { + attestation_pubkey: [3u8; 52], + proposal_pubkey: [4u8; 52], + index: 1, + }, + ]; + State::from_genesis(1_000, validators) + } + + #[test] + fn from_base_captures_appended_tail_and_absolute_fields() { + let base = base_state(); + let base_len = base.historical_block_hashes.len(); + + let mut target = base.clone(); + target.slot = 5; + let expected_justified = Checkpoint { + root: h256(7), + slot: 4, + }; + target.latest_justified = expected_justified; + // Append three roots (one real parent + two zero-filled empty slots). + let mut hbh: Vec = base.historical_block_hashes.to_vec(); + hbh.extend([h256(9), H256::ZERO, H256::ZERO]); + target.historical_block_hashes = hbh.try_into().unwrap(); + + let diff = StateDiff::from_base(h256(1), base_len, target); + + assert_eq!(diff.base_root, h256(1)); + assert_eq!(diff.slot, 5); + assert_eq!(diff.latest_justified, expected_justified); + assert_eq!(diff.hbh_appended.len(), 3); + assert_eq!(diff.hbh_appended[0], h256(9)); + assert_eq!(diff.hbh_appended[1], H256::ZERO); + } + + #[test] + fn ssz_roundtrips() { + let base = base_state(); + let base_len = base.historical_block_hashes.len(); + let mut target = base.clone(); + target.slot = 2; + let mut hbh: Vec = base.historical_block_hashes.to_vec(); + hbh.push(h256(9)); + target.historical_block_hashes = hbh.try_into().unwrap(); + + let diff = StateDiff::from_base(h256(1), base_len, target); + let bytes = diff.to_ssz(); + let decoded = StateDiff::from_ssz_bytes(&bytes).expect("decodes"); + assert_eq!(diff, decoded); + } +} diff --git a/crates/storage/src/store.rs b/crates/storage/src/store.rs index 44e61d6a..984505c4 100644 --- a/crates/storage/src/store.rs +++ b/crates/storage/src/store.rs @@ -1,6 +1,9 @@ use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; +use std::num::NonZeroUsize; use std::sync::{Arc, LazyLock, Mutex}; +use lru::LruCache; + use crate::api::{StorageBackend, StorageWriteBatch, Table}; use crate::error::Error; @@ -15,6 +18,8 @@ use ethlambda_types::{ state::{ChainConfig, State, anchor_pair_is_consistent}, }; use libssz::{SszDecode, SszEncode}; + +use crate::state_diff::{DiffBase, StateDiff}; use thiserror::Error; use tracing::{info, warn}; @@ -84,8 +89,21 @@ const KEY_LATEST_JUSTIFIED: &[u8] = b"latest_justified"; /// Key for "latest_finalized" field of the Store. Its value has type [`Checkpoint`] and it's SSZ-encoded. const KEY_LATEST_FINALIZED: &[u8] = b"latest_finalized"; -/// ~3.3 hours of state history at 4-second slots (12000 / 4 = 3000). -const STATES_TO_KEEP: usize = 3_000; +/// Persist a full-state snapshot whenever a block's slot crosses a multiple of +/// this value (relative to its parent's slot). +/// +/// Snapshots are the only entries written to `States` (plus the bootstrap +/// anchor); they are never pruned and bound state-reconstruction diff walks to +/// at most this many steps. ~68 minutes at 4-second slots. +const SNAPSHOT_ANCHOR_INTERVAL: u64 = 1_024; + +/// Number of reconstructed/imported states memoized in memory. +/// +/// States are content-addressed by block root and immutable, so the cache never +/// needs invalidation; it only bounds how many recent states stay hot for reads +/// (e.g. a block's `parent_state` right after import). A miss falls back to a +/// snapshot read or a diff-chain reconstruction. +const STATE_CACHE_CAPACITY: usize = 256; /// Keep block signatures for at least this many slots below the tip, even once /// finalized. Signatures older than this window are pruned only when the window @@ -496,6 +514,15 @@ pub struct Store { known_payloads: Arc>, /// In-memory gossip signatures, consumed at interval 2 aggregation. gossip_signatures: Arc>, + /// LRU memoization of states by block root, shared across `Store` clones. + /// Avoids reconstructing recent states from diffs on every read. + state_cache: Arc>>, +} + +/// Build an empty state cache sized to [`STATE_CACHE_CAPACITY`]. +fn new_state_cache() -> Arc>> { + let capacity = NonZeroUsize::new(STATE_CACHE_CAPACITY).expect("cache capacity is non-zero"); + Arc::new(Mutex::new(LruCache::new(capacity))) } impl Store { @@ -568,6 +595,7 @@ impl Store { gossip_signatures: Arc::new(Mutex::new(GossipSignatureBuffer::new( GOSSIP_SIGNATURE_CAP, ))), + state_cache: new_state_cache(), }) } @@ -638,7 +666,9 @@ impl Store { .expect("put block body"); } - // State + // State snapshot. The anchor has no parent in the store, so it is + // the base of every diff chain: store it as a full snapshot in + // `States` (never pruned) so reconstruction always terminates here. let state_entries = vec![(anchor_block_root.to_ssz(), anchor_state.to_ssz())]; batch .put_batch(Table::States, state_entries) @@ -665,6 +695,7 @@ impl Store { gossip_signatures: Arc::new(Mutex::new(GossipSignatureBuffer::new( GOSSIP_SIGNATURE_CAP, ))), + state_cache: new_state_cache(), }) } @@ -788,29 +819,24 @@ impl Store { } } - /// Prune old states and blocks to keep storage bounded. + /// Bound storage by evicting old state snapshots and finalized signatures. + /// + /// State diffs, block headers, and block bodies are retained for the full + /// history; only full-state snapshots outside the hot window (diffs remain) + /// and signatures of finalized blocks are removed. /// /// This is separated from `update_checkpoints` so callers can defer heavy /// pruning until after a batch of blocks has been fully processed. Running - /// this mid-cascade would delete states that pending children still need, + /// this mid-cascade would delete snapshots that pending children still need, /// causing infinite re-processing loops when fallback pruning is active. pub fn prune_old_data(&mut self) { - let protected_roots = [ - self.latest_finalized().root, - self.latest_justified().root, - self.head(), - ]; let finalized_slot = self.latest_finalized().slot; let tip_slot = self .get_block_header(&self.head()) .map_or(finalized_slot, |header| header.slot); - let pruned_states = self.prune_old_states(&protected_roots); let pruned_signatures = self.prune_old_block_signatures(finalized_slot, tip_slot); - if pruned_states > 0 || pruned_signatures > 0 { - info!( - pruned_states, - pruned_signatures, "Pruned old states and block signatures" - ); + if pruned_signatures > 0 { + info!(pruned_signatures, "Pruned old finalized block signatures"); } } @@ -914,55 +940,6 @@ impl Store { pruned_new + pruned_known } - /// Prune old states beyond the retention window. - /// - /// Keeps the most recent `STATES_TO_KEEP` states (by slot), plus any - /// states whose roots appear in `protected_roots` (finalized, justified). - /// - /// Returns the number of states pruned. - pub fn prune_old_states(&mut self, protected_roots: &[H256]) -> usize { - let view = self.backend.begin_read().expect("read view"); - - // Collect (root_bytes, slot) from BlockHeaders to determine state age. - let mut entries: Vec<(Vec, u64)> = view - .prefix_iterator(Table::BlockHeaders, &[]) - .expect("iterator") - .filter_map(|res| res.ok()) - .map(|(key, value)| { - let header = BlockHeader::from_ssz_bytes(&value).expect("valid header"); - (key.to_vec(), header.slot) - }) - .collect(); - drop(view); - - if entries.len() <= STATES_TO_KEEP { - return 0; - } - - // Sort by slot descending (newest first) - entries.sort_unstable_by(|a, b| b.1.cmp(&a.1)); - - let protected: HashSet> = protected_roots.iter().map(|r| r.to_ssz()).collect(); - - // Skip the retention window, collect remaining keys for deletion - let keys_to_delete: Vec> = entries - .into_iter() - .skip(STATES_TO_KEEP) - .filter(|(key, _)| !protected.contains(key)) - .map(|(key, _)| key) - .collect(); - - let count = keys_to_delete.len(); - if count > 0 { - let mut batch = self.backend.begin_write().expect("write batch"); - batch - .delete_batch(Table::States, keys_to_delete) - .expect("delete old states"); - batch.commit().expect("commit"); - } - count - } - /// Prune signatures of old finalized blocks, keeping a recent window. /// /// Signatures within [`SIGNATURE_PRUNING_RANGE`] slots of `tip_slot` are @@ -1013,10 +990,7 @@ impl Store { /// Get the block header by root. pub fn get_block_header(&self, root: &H256) -> Option { - let view = self.backend.begin_read().expect("read view"); - view.get(Table::BlockHeaders, &root.to_ssz()) - .expect("get") - .map(|bytes| BlockHeader::from_ssz_bytes(&bytes).expect("valid header")) + self.get_ssz(Table::BlockHeaders, root) } // ============ Signed Blocks ============ @@ -1085,12 +1059,13 @@ impl Store { /// or if the signature row is missing for any block other than the /// slot-0 anchor. /// - /// Signatures are absent for genesis-style anchor blocks (no proposer - /// ever signed them). To keep BlocksByRoot symmetric with the - /// fork-choice view for peers, synthesize an empty proof for the slot-0 - /// case only; for any other slot the missing-signature state is treated - /// as storage corruption and surfaces as `None` rather than as a - /// fabricated block. + /// Signatures are absent in two cases: genesis-style anchor blocks (no + /// proposer ever signed them), and finalized blocks whose signatures were + /// pruned by [`prune_old_block_signatures`](Self::prune_old_block_signatures). + /// To keep BlocksByRoot symmetric with the fork-choice view for peers, + /// synthesize an empty proof for the slot-0 anchor only; for any other slot + /// a missing signature surfaces as `None` (a pruned finalized block can no + /// longer be served with its proof) rather than as a fabricated block. pub fn get_signed_block(&self, root: &H256) -> Option { let view = self.backend.begin_read().expect("read view"); let key = root.to_ssz(); @@ -1111,9 +1086,9 @@ impl Store { Some(proof_bytes) => { MultiMessageAggregate::from_ssz_bytes(&proof_bytes).expect("valid block proof") } - // Synthesis only covers the genesis-style anchor (slot 0). Any other - // missing-proof case is a storage corruption that should surface - // as `None` rather than fabricating a block with an empty proof. + // Synthesis only covers the genesis-style anchor (slot 0). For any + // other slot a missing proof (pruned finalized block, or genuine + // corruption) surfaces as `None` rather than a fabricated block. None if header.slot == 0 => MultiMessageAggregate::default(), None => return None, }; @@ -1129,26 +1104,107 @@ impl Store { // ============ States ============ /// Returns the state for the given block root. + /// + /// Fast path: a full snapshot in `States`. Otherwise the state is + /// reconstructed by walking parent-linked `StateDiffs` back to the nearest + /// ancestor snapshot and replaying forward. Returns `None` if the diff chain + /// is broken or the target block header is unavailable. pub fn get_state(&self, root: &H256) -> Option { + // Memoized hot states first (states are immutable per root). + if let Some(state) = self.state_cache.lock().unwrap().get(root) { + return Some(state.clone()); + } + // Anchor snapshot in `States`, otherwise reconstruct from the diff chain. + let state = self + .get_ssz::(Table::States, root) + .or_else(|| self.reconstruct_state(root))?; + self.state_cache.lock().unwrap().put(*root, state.clone()); + Some(state) + } + + /// Read and SSZ-decode a value keyed by block root from `table`. + fn get_ssz(&self, table: Table, root: &H256) -> Option { let view = self.backend.begin_read().expect("read view"); - view.get(Table::States, &root.to_ssz()) + view.get(table, &root.to_ssz()) .expect("get") - .map(|bytes| State::from_ssz_bytes(&bytes).expect("valid state")) + .map(|bytes| T::from_ssz_bytes(&bytes).expect("valid encoding")) } - /// Returns whether a state exists for the given block root. + /// Reconstruct a state from diffs and the nearest ancestor snapshot. + /// + /// Walks `base_root` pointers back until a snapshot is found, fetches the + /// target's block header, and delegates the assembly to + /// [`state_diff::reconstruct`](crate::state_diff::reconstruct). + fn reconstruct_state(&self, root: &H256) -> Option { + // Walk back collecting diffs until we reach a snapshot. + let mut diffs: Vec = Vec::new(); + let mut cursor = *root; + let snapshot = loop { + if let Some(snapshot) = self.get_ssz::(Table::States, &cursor) { + break snapshot; + } + let diff = self.get_ssz::(Table::StateDiffs, &cursor)?; + cursor = diff.base_root; + diffs.push(diff); + }; + + // `diffs` runs target -> snapshot child; reverse to snapshot child -> target. + diffs.reverse(); + + // The latest block header lives in BlockHeaders; the stored state caches + // the real state_root there, so it equals the header byte-for-byte. + let latest_block_header = self.get_block_header(root)?; + + Some(crate::state_diff::reconstruct( + snapshot, + &diffs, + latest_block_header, + )) + } + + /// Returns whether a state is available for the given block root. + /// + /// True if a snapshot exists or the state can be reconstructed from a diff. pub fn has_state(&self, root: &H256) -> bool { let view = self.backend.begin_read().expect("read view"); - view.get(Table::States, &root.to_ssz()) - .expect("get") - .is_some() + let key = root.to_ssz(); + view.get(Table::States, &key).expect("get").is_some() + || view.get(Table::StateDiffs, &key).expect("get").is_some() } - /// Stores a state indexed by block root. - pub fn insert_state(&mut self, root: H256, state: State) { + /// Persist a post-block state as a parent-linked diff, snapshotting at anchors. + /// + /// Every non-genesis state gets a `StateDiffs` entry (never pruned, so the + /// full state history is preserved). A full snapshot is written to `States` + /// only when the block crosses a [`SNAPSHOT_ANCHOR_INTERVAL`] boundary; these + /// anchors are never pruned and bound the reconstruction walk. The state is + /// also inserted into the in-memory cache so the immediate next read (e.g. as + /// a child block's parent state) is hot without reconstruction. + /// + /// `base` describes the parent state the diff is built against (see + /// [`DiffBase`]); its fields are captured before the parent is consumed into + /// `state`. + pub fn insert_state_with_diff(&mut self, root: H256, base: DiffBase, state: State) { + let slot = state.slot; + let is_anchor = slot / SNAPSHOT_ANCHOR_INTERVAL > base.slot / SNAPSHOT_ANCHOR_INTERVAL; + + // Snapshot only at anchors; serialize before `state` is consumed. + let snapshot_bytes = is_anchor.then(|| state.to_ssz()); + // Memoize the post-state for fast reads, then move it into the diff so + // its multi-MB justification fields are not cloned again. + self.state_cache.lock().unwrap().put(root, state.clone()); + let diff_bytes = StateDiff::from_base(base.root, base.hbh_len, state).to_ssz(); + + let key = root.to_ssz(); let mut batch = self.backend.begin_write().expect("write batch"); - let entries = vec![(root.to_ssz(), state.to_ssz())]; - batch.put_batch(Table::States, entries).expect("put state"); + batch + .put_batch(Table::StateDiffs, vec![(key.clone(), diff_bytes)]) + .expect("put state diff"); + if let Some(snapshot_bytes) = snapshot_bytes { + batch + .put_batch(Table::States, vec![(key, snapshot_bytes)]) + .expect("put state snapshot"); + } batch.commit().expect("commit"); } @@ -1448,13 +1504,12 @@ mod tests { batch.commit().expect("commit"); } - /// Insert a dummy state for a given root. - fn insert_state(backend: &dyn StorageBackend, root: H256) { + /// Insert a real full-state snapshot for a given root (seeds a diff-chain base). + fn insert_snapshot(backend: &dyn StorageBackend, root: H256, state: &State) { let mut batch = backend.begin_write().expect("write batch"); - let key = root.to_ssz(); batch - .put_batch(Table::States, vec![(key, vec![0u8; 4])]) - .expect("put state"); + .put_batch(Table::States, vec![(root.to_ssz(), state.to_ssz())]) + .expect("put snapshot"); batch.commit().expect("commit"); } @@ -1499,6 +1554,7 @@ mod tests { gossip_signatures: Arc::new(Mutex::new(GossipSignatureBuffer::new( GOSSIP_SIGNATURE_CAP, ))), + state_cache: new_state_cache(), } } @@ -1512,6 +1568,7 @@ mod tests { gossip_signatures: Arc::new(Mutex::new(GossipSignatureBuffer::new( GOSSIP_SIGNATURE_CAP, ))), + state_cache: new_state_cache(), } } } @@ -1582,193 +1639,148 @@ mod tests { assert_eq!(count_entries(backend.as_ref(), Table::BlockSignatures), 10); } - // ============ State Pruning Tests ============ + // ============ State Diff Reconstruction Tests ============ - #[test] - fn prune_old_states_within_retention() { - let backend = Arc::new(InMemoryBackend::new()); - let mut store = Store::test_store_with_backend(backend.clone()); + use ethlambda_types::state::Validator; - // Insert STATES_TO_KEEP headers + states - for i in 0..STATES_TO_KEEP as u64 { - insert_header(backend.as_ref(), root(i), i); - insert_state(backend.as_ref(), root(i)); + /// The header `insert_header` writes for a given slot. + fn header_at(slot: u64) -> BlockHeader { + BlockHeader { + slot, + proposer_index: 0, + parent_root: H256::ZERO, + state_root: H256::ZERO, + body_root: H256::ZERO, } - assert_eq!( - count_entries(backend.as_ref(), Table::States), - STATES_TO_KEEP - ); - - let pruned = store.prune_old_states(&[]); - assert_eq!(pruned, 0); } - #[test] - fn prune_old_states_exceeding_retention() { - let backend = Arc::new(InMemoryBackend::new()); - let mut store = Store::test_store_with_backend(backend.clone()); - - let total = STATES_TO_KEEP + 5; - for i in 0..total as u64 { - insert_header(backend.as_ref(), root(i), i); - insert_state(backend.as_ref(), root(i)); - } - assert_eq!(count_entries(backend.as_ref(), Table::States), total); - - let pruned = store.prune_old_states(&[]); - assert_eq!(pruned, 5); - assert_eq!( - count_entries(backend.as_ref(), Table::States), - STATES_TO_KEEP - ); - - // Oldest states should be gone - for i in 0..5u64 { - assert!(!has_key(backend.as_ref(), Table::States, &root(i))); - } - // Newest states should remain - for i in 5..total as u64 { - assert!(has_key(backend.as_ref(), Table::States, &root(i))); - } + /// A real `State` at `slot` with the given historical_block_hashes and a + /// `latest_block_header` matching what `insert_header` stores. + fn sample_state(slot: u64, hbh: Vec) -> State { + let validators = vec![Validator { + attestation_pubkey: [7u8; 52], + proposal_pubkey: [9u8; 52], + index: 0, + }]; + let mut state = State::from_genesis(1_000, validators); + state.slot = slot; + state.latest_block_header = header_at(slot); + state.historical_block_hashes = hbh.try_into().unwrap(); + state } #[test] - fn prune_old_states_preserves_protected() { + fn get_state_reconstructs_from_diff() { let backend = Arc::new(InMemoryBackend::new()); let mut store = Store::test_store_with_backend(backend.clone()); - let total = STATES_TO_KEEP + 5; - for i in 0..total as u64 { - insert_header(backend.as_ref(), root(i), i); - insert_state(backend.as_ref(), root(i)); - } + // Genesis snapshot at slot 0. + let s0 = sample_state(0, vec![]); + let r0 = root(0); + insert_header(backend.as_ref(), r0, 0); + insert_snapshot(backend.as_ref(), r0, &s0); + + // Child at slot 1: appends one historical root, sets a checkpoint. + let r1 = root(1); + let mut s1 = sample_state(1, vec![root(42)]); + s1.latest_justified = Checkpoint { + root: root(7), + slot: 0, + }; + insert_header(backend.as_ref(), r1, 1); + let base = DiffBase { + root: r0, + hbh_len: s0.historical_block_hashes.len(), + slot: s0.slot, + }; + store.insert_state_with_diff(r1, base, s1.clone()); - let finalized_root = root(0); - let justified_root = root(2); - let pruned = store.prune_old_states(&[finalized_root, justified_root]); + // Not an anchor, so no snapshot was written; only the diff. + assert!(!has_key(backend.as_ref(), Table::States, &r1)); - // 5 would be pruned, but 2 are protected - assert_eq!(pruned, 3); - assert!(has_key(backend.as_ref(), Table::States, &finalized_root)); - assert!(has_key(backend.as_ref(), Table::States, &justified_root)); - } - - // ============ Periodic Pruning Tests ============ + // Hot path: the just-imported state is memoized in the cache. + assert_eq!(store.get_state(&r1).unwrap().to_ssz(), s1.to_ssz()); - /// Set up finalized and justified checkpoints in metadata. - fn set_checkpoints(backend: &dyn StorageBackend, finalized: Checkpoint, justified: Checkpoint) { - let mut batch = backend.begin_write().expect("write batch"); - batch - .put_batch( - Table::Metadata, - vec![ - (KEY_LATEST_FINALIZED.to_vec(), finalized.to_ssz()), - (KEY_LATEST_JUSTIFIED.to_vec(), justified.to_ssz()), - ], - ) - .expect("put checkpoints"); - batch.commit().expect("commit"); + // A cold store (empty cache, shared backend) reconstructs from the diff, + // byte-identically. + let cold = Store::test_store_with_backend(backend.clone()); + let reconstructed = cold.get_state(&r1).expect("reconstructs from diff"); + assert_eq!(reconstructed.to_ssz(), s1.to_ssz()); } #[test] - fn fallback_pruning_removes_old_states_and_blocks() { + fn get_state_reconstructs_across_multiple_diffs() { let backend = Arc::new(InMemoryBackend::new()); let mut store = Store::test_store_with_backend(backend.clone()); - // Use roots that are within the retention window as finalized/justified - let finalized_root = root(0); - let justified_root = root(1); - set_checkpoints( - backend.as_ref(), - Checkpoint { - slot: 0, - root: finalized_root, - }, - Checkpoint { - slot: 1, - root: justified_root, - }, - ); - - // Insert more than STATES_TO_KEEP headers + states. - let total_states = STATES_TO_KEEP + 5; - for i in 0..total_states as u64 { - insert_header(backend.as_ref(), root(i), i); - insert_state(backend.as_ref(), root(i)); - } - - assert_eq!(count_entries(backend.as_ref(), Table::States), total_states); - assert_eq!( - count_entries(backend.as_ref(), Table::BlockHeaders), - total_states - ); - - // Use the last inserted root as head. Calling update_checkpoints with - // head_only triggers the fallback path (finalization doesn't advance). - let head_root = root(total_states as u64 - 1); - store.update_checkpoints(ForkCheckpoints::head_only(head_root)); - - // update_checkpoints no longer prunes states/blocks inline — the caller - // must invoke prune_old_data() separately (after a block cascade completes). - assert_eq!(count_entries(backend.as_ref(), Table::States), total_states); - - store.prune_old_data(); - - // 3005 headers total. Top 3000 by slot are kept in the retention window, - // leaving 5 candidates. 2 are protected (finalized + justified), - // so 3 are pruned → 3005 - 3 = 3002 states remaining. - assert_eq!( - count_entries(backend.as_ref(), Table::States), - STATES_TO_KEEP + 2 - ); - // Finalized and justified states must survive - assert!(has_key(backend.as_ref(), Table::States, &finalized_root)); - assert!(has_key(backend.as_ref(), Table::States, &justified_root)); + // Snapshot s0, then two chained diffs s1 -> s2. + let s0 = sample_state(0, vec![]); + let r0 = root(0); + insert_header(backend.as_ref(), r0, 0); + insert_snapshot(backend.as_ref(), r0, &s0); + + let r1 = root(1); + let s1 = sample_state(1, vec![root(42)]); + insert_header(backend.as_ref(), r1, 1); + let base = DiffBase { + root: r0, + hbh_len: s0.historical_block_hashes.len(), + slot: s0.slot, + }; + store.insert_state_with_diff(r1, base, s1.clone()); + + let r2 = root(2); + let s2 = sample_state(2, vec![root(42), root(43)]); + insert_header(backend.as_ref(), r2, 2); + let base = DiffBase { + root: r1, + hbh_len: s1.historical_block_hashes.len(), + slot: s1.slot, + }; + store.insert_state_with_diff(r2, base, s2.clone()); - // Headers and bodies are never pruned, so all are retained. - assert_eq!( - count_entries(backend.as_ref(), Table::BlockHeaders), - total_states - ); + // Neither child is an anchor, so a cold store reconstructs s2 by walking + // the diff chain back to the s0 snapshot. + assert!(!has_key(backend.as_ref(), Table::States, &r1)); + assert!(!has_key(backend.as_ref(), Table::States, &r2)); + let cold = Store::test_store_with_backend(backend.clone()); + let reconstructed = cold.get_state(&r2).expect("reconstructs across diffs"); + assert_eq!(reconstructed.to_ssz(), s2.to_ssz()); } #[test] - fn fallback_pruning_no_op_within_retention() { + fn insert_state_with_diff_snapshots_only_on_boundary_crossing() { let backend = Arc::new(InMemoryBackend::new()); let mut store = Store::test_store_with_backend(backend.clone()); - set_checkpoints( - backend.as_ref(), - Checkpoint { - slot: 0, - root: root(0), - }, - Checkpoint { - slot: 0, - root: root(0), - }, - ); - - // Insert exactly STATES_TO_KEEP entries (no excess) - for i in 0..STATES_TO_KEEP as u64 { - insert_header(backend.as_ref(), root(i), i); - insert_state(backend.as_ref(), root(i)); - } - - // Use the last inserted root as head - let head_root = root(STATES_TO_KEEP as u64 - 1); - store.update_checkpoints(ForkCheckpoints::head_only(head_root)); - store.prune_old_data(); - - // Nothing should be pruned (within retention window) - assert_eq!( - count_entries(backend.as_ref(), Table::States), - STATES_TO_KEEP - ); - assert_eq!( - count_entries(backend.as_ref(), Table::BlockHeaders), - STATES_TO_KEEP - ); + let s0 = sample_state(SNAPSHOT_ANCHOR_INTERVAL - 1, vec![]); + let r0 = root(0); + insert_header(backend.as_ref(), r0, s0.slot); + insert_snapshot(backend.as_ref(), r0, &s0); + + // Crossing the interval boundary records an anchor. + let r1 = root(1); + let s1 = sample_state(SNAPSHOT_ANCHOR_INTERVAL, vec![root(42)]); + insert_header(backend.as_ref(), r1, s1.slot); + let base = DiffBase { + root: r0, + hbh_len: s0.historical_block_hashes.len(), + slot: s0.slot, + }; + store.insert_state_with_diff(r1, base, s1.clone()); + assert!(has_key(backend.as_ref(), Table::States, &r1)); + + // A non-crossing child does not. + let r2 = root(2); + let s2 = sample_state(SNAPSHOT_ANCHOR_INTERVAL + 1, vec![root(42), root(43)]); + insert_header(backend.as_ref(), r2, s2.slot); + let base = DiffBase { + root: r1, + hbh_len: s1.historical_block_hashes.len(), + slot: s1.slot, + }; + store.insert_state_with_diff(r2, base, s2.clone()); + assert!(!has_key(backend.as_ref(), Table::States, &r2)); } // ============ PayloadBuffer Tests ============ @@ -2561,6 +2573,17 @@ mod tests { assert!(store.get_signed_block(&root).is_none()); } + /// The bootstrap anchor is stored as a full snapshot in `States`, the base of + /// every diff chain that reconstruction terminates at. + #[test] + fn from_anchor_state_stores_bootstrap_snapshot() { + let backend: Arc = Arc::new(InMemoryBackend::new()); + let store = Store::from_anchor_state(backend.clone(), State::from_genesis(0, vec![])); + + let anchor_root = store.head(); + assert!(has_key(backend.as_ref(), Table::States, &anchor_root)); + } + // ============ from_db_state Tests ============ #[test] From 0ea9936eb68b5e0bc77fada182bd6f261a7fcc82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Wed, 24 Jun 2026 16:13:07 -0300 Subject: [PATCH 02/17] tune(storage): set STATE_CACHE_CAPACITY to 32 --- crates/storage/src/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/storage/src/store.rs b/crates/storage/src/store.rs index 984505c4..e2175b75 100644 --- a/crates/storage/src/store.rs +++ b/crates/storage/src/store.rs @@ -103,7 +103,7 @@ const SNAPSHOT_ANCHOR_INTERVAL: u64 = 1_024; /// needs invalidation; it only bounds how many recent states stay hot for reads /// (e.g. a block's `parent_state` right after import). A miss falls back to a /// snapshot read or a diff-chain reconstruction. -const STATE_CACHE_CAPACITY: usize = 256; +const STATE_CACHE_CAPACITY: usize = 32; /// Keep block signatures for at least this many slots below the tip, even once /// finalized. Signatures older than this window are pruned only when the window From 191ff9de653e0f3b38900c0384dd21db708cd8d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Wed, 24 Jun 2026 16:21:33 -0300 Subject: [PATCH 03/17] test: build diff against genesis anchor, not the target state The migrated test built its DiffBase from the target post-state itself (DiffBase::from_state(a, &head_state)), so base.slot/base.hbh_len came from the target rather than the parent. That made the anchor-boundary check always false (no snapshot written, contradicting the comment) and left the diff self-referential, passing only via the cache memoization. Diff against the genesis anchor already present in the store instead, so the base correctly describes the parent state. --- crates/blockchain/src/store.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/blockchain/src/store.rs b/crates/blockchain/src/store.rs index 1b45e220..2d144dfb 100644 --- a/crates/blockchain/src/store.rs +++ b/crates/blockchain/src/store.rs @@ -1255,9 +1255,12 @@ mod tests { let head_justified = Checkpoint { root: a, slot: 1 }; let mut head_state = State::from_genesis(1000, vec![]); head_state.latest_justified = head_justified; - // Persist `b`'s post-state via the diff API (parented on `a`); a full - // snapshot is written, so `get_state(b)` returns it directly. - let diff_base = DiffBase::from_state(a, &head_state); + // Persist `b`'s post-state via the diff API, diffed against the genesis + // anchor that already lives in the store. The base must describe the + // parent (genesis) state, not the target; `get_state(b)` then resolves + // via the cache or by replaying this diff onto the genesis snapshot. + let genesis_state = store.get_state(&genesis).expect("genesis state"); + let diff_base = DiffBase::from_state(genesis, &genesis_state); store.insert_state_with_diff(b, diff_base, head_state); // Store's global justified latched onto a higher, off-head checkpoint, From 98e2630ffbe9691105add5e548bc102ac53b4219 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Wed, 24 Jun 2026 17:22:46 -0300 Subject: [PATCH 04/17] test: construct DiffBase via from_state in storage tests The reconstruction and boundary tests built DiffBase struct literals directly from the crate-internal fields instead of the public DiffBase::from_state constructor. Use the constructor so the tests exercise the real construction path (and would catch a regression in from_state itself). --- crates/storage/src/store.rs | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/crates/storage/src/store.rs b/crates/storage/src/store.rs index ee6fcb63..32fdecc4 100644 --- a/crates/storage/src/store.rs +++ b/crates/storage/src/store.rs @@ -1727,11 +1727,7 @@ mod tests { slot: 0, }; insert_header(backend.as_ref(), r1, 1); - let base = DiffBase { - root: r0, - hbh_len: s0.historical_block_hashes.len(), - slot: s0.slot, - }; + let base = DiffBase::from_state(r0, &s0); store.insert_state_with_diff(r1, base, s1.clone()); // Not an anchor, so no snapshot was written; only the diff. @@ -1761,21 +1757,13 @@ mod tests { let r1 = root(1); let s1 = sample_state(1, vec![root(42)]); insert_header(backend.as_ref(), r1, 1); - let base = DiffBase { - root: r0, - hbh_len: s0.historical_block_hashes.len(), - slot: s0.slot, - }; + let base = DiffBase::from_state(r0, &s0); store.insert_state_with_diff(r1, base, s1.clone()); let r2 = root(2); let s2 = sample_state(2, vec![root(42), root(43)]); insert_header(backend.as_ref(), r2, 2); - let base = DiffBase { - root: r1, - hbh_len: s1.historical_block_hashes.len(), - slot: s1.slot, - }; + let base = DiffBase::from_state(r1, &s1); store.insert_state_with_diff(r2, base, s2.clone()); // Neither child is an anchor, so a cold store reconstructs s2 by walking @@ -1801,11 +1789,7 @@ mod tests { let r1 = root(1); let s1 = sample_state(SNAPSHOT_ANCHOR_INTERVAL, vec![root(42)]); insert_header(backend.as_ref(), r1, s1.slot); - let base = DiffBase { - root: r0, - hbh_len: s0.historical_block_hashes.len(), - slot: s0.slot, - }; + let base = DiffBase::from_state(r0, &s0); store.insert_state_with_diff(r1, base, s1.clone()); assert!(has_key(backend.as_ref(), Table::States, &r1)); @@ -1813,11 +1797,7 @@ mod tests { let r2 = root(2); let s2 = sample_state(SNAPSHOT_ANCHOR_INTERVAL + 1, vec![root(42), root(43)]); insert_header(backend.as_ref(), r2, s2.slot); - let base = DiffBase { - root: r1, - hbh_len: s1.historical_block_hashes.len(), - slot: s1.slot, - }; + let base = DiffBase::from_state(r1, &s1); store.insert_state_with_diff(r2, base, s2.clone()); assert!(!has_key(backend.as_ref(), Table::States, &r2)); } From 948796529f212c812ae09ab8fd8bfe6c8c7d60f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Wed, 24 Jun 2026 17:26:59 -0300 Subject: [PATCH 05/17] docs: correct storage tables list to match the 7-variant Table enum The table dropped the 4 stale rows (LatestKnown/LatestNewAttestations, GossipSignatures, AggregatedPayloads) that no longer exist as persisted tables: attestations and gossip signatures are now in-memory Store buffers. Also fixes the BlockSignatures key (slot||root, not H256). --- CLAUDE.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 6714ada2..73dfcc3f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -374,22 +374,25 @@ cargo test -p ethlambda-blockchain --test forkchoice_spectests -- --test-threads - `LiveChain` table provides fast `(slot||root) → parent_root` index for fork choice - Storage uses trait-based API: `StorageBackend` → `StorageReadView` (reads) + `StorageWriteBatch` (atomic writes) -### Storage Tables (11) +### Storage Tables (7) + +These are the variants of the `Table` enum (`crates/storage/src/api/tables.rs`). | Table | Key → Value | Purpose | |-------|-------------|---------| | `BlockHeaders` | H256 → BlockHeader | Block headers by root | | `BlockBodies` | H256 → BlockBody | Block bodies (empty for genesis) | -| `BlockSignatures` | H256 → BlockSignatures | Signatures (absent for genesis) | -| `States` | H256 → State | Full-state snapshots at anchors only | -| `StateDiffs` | H256 → StateDiff | Parent-linked state diff per non-genesis state | -| `LatestKnownAttestations` | u64 → AttestationData | Fork-choice-active attestations | -| `LatestNewAttestations` | u64 → AttestationData | Pending (pre-promotion) attestations | -| `GossipSignatures` | SignatureKey → ValidatorSignature | Individual validator signatures | -| `AggregatedPayloads` | SignatureKey → Vec\ | Aggregated proofs | +| `BlockSignatures` | (slot\|\|root) → BlockSignatures | Type-2 proof blob; keyed slot\|\|root so pruning scans in slot order and stops early; absent for genesis, pruned below finalized | +| `States` | H256 → State | Full-state snapshots; bootstrap + 1024-slot anchors only; never pruned | +| `StateDiffs` | H256 → StateDiff | Parent-linked state diff per non-genesis state; never pruned | | `Metadata` | string → various | Store state (head, config, checkpoints) | | `LiveChain` | (slot\|\|root) → parent\_root | Fast fork choice traversal index | +Attestations and gossip signatures are **not** persisted tables; they live in +in-memory `Store` buffers (`new_payloads`, `known_payloads`, `gossip_signatures`) +and are consumed during the tick pipeline (promotion at intervals 0/4, +aggregation at interval 2). + ### State Root Computation - Always computed via `tree_hash_root()` after full state transition - Must match proposer's pre-computed `block.state_root` From c8fec91c7c606924136aeaf662db764a92974d55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 25 Jun 2026 11:32:58 -0300 Subject: [PATCH 06/17] test: remove ssz_roundtrip test --- crates/storage/src/state_diff.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/crates/storage/src/state_diff.rs b/crates/storage/src/state_diff.rs index 3714b516..fe3464a9 100644 --- a/crates/storage/src/state_diff.rs +++ b/crates/storage/src/state_diff.rs @@ -248,20 +248,4 @@ mod tests { assert_eq!(diff.hbh_appended[0], h256(9)); assert_eq!(diff.hbh_appended[1], H256::ZERO); } - - #[test] - fn ssz_roundtrips() { - let base = base_state(); - let base_len = base.historical_block_hashes.len(); - let mut target = base.clone(); - target.slot = 2; - let mut hbh: Vec = base.historical_block_hashes.to_vec(); - hbh.push(h256(9)); - target.historical_block_hashes = hbh.try_into().unwrap(); - - let diff = StateDiff::from_base(h256(1), base_len, target); - let bytes = diff.to_ssz(); - let decoded = StateDiff::from_ssz_bytes(&bytes).expect("decodes"); - assert_eq!(diff, decoded); - } } From bdbd436bd56d5bd056bf350b772e9a79e3aa10a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 25 Jun 2026 11:49:47 -0300 Subject: [PATCH 07/17] test(storage): add unit tests for state reconstruct reconstruct had only indirect coverage via the store integration tests. Add direct unit tests asserting the assembly contract: structural fields (config/validators) come from the snapshot, absolute fields come from the last diff, latest_block_header is passed through verbatim, and historical_block_hashes replays the snapshot tail plus each diff's appended tail in order. Covers both the multi-diff chain and single-diff cases. Also drop the now-unused libssz SszDecode/SszEncode import left behind when the ssz_roundtrip test was removed; it tripped -D warnings. --- crates/storage/src/state_diff.rs | 109 ++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/crates/storage/src/state_diff.rs b/crates/storage/src/state_diff.rs index fe3464a9..3a906e39 100644 --- a/crates/storage/src/state_diff.rs +++ b/crates/storage/src/state_diff.rs @@ -197,7 +197,6 @@ pub(crate) fn reconstruct( #[cfg(test)] mod tests { use ethlambda_types::state::{State, Validator}; - use libssz::{SszDecode, SszEncode}; use super::*; @@ -248,4 +247,112 @@ mod tests { assert_eq!(diff.hbh_appended[0], h256(9)); assert_eq!(diff.hbh_appended[1], H256::ZERO); } + + /// A block header distinct from any snapshot/diff field, so the test can + /// assert it is passed through `reconstruct` verbatim. + fn header_at(slot: u64) -> BlockHeader { + BlockHeader { + slot, + proposer_index: 7, + parent_root: h256(51), + state_root: h256(99), + body_root: h256(88), + } + } + + /// Build a diff against `base_root` that appends `appended` to + /// `historical_block_hashes`. Absolute fields default; tests override the + /// ones they assert on. + fn diff_at(base_root: H256, slot: u64, appended: Vec) -> StateDiff { + StateDiff { + base_root, + slot, + latest_justified: Checkpoint::default(), + latest_finalized: Checkpoint::default(), + justified_slots: JustifiedSlots::new(), + justifications_roots: JustificationRoots::default(), + justifications_validators: JustificationValidators::new(), + hbh_appended: HistoricalBlockHashesTail::try_from(appended).unwrap(), + } + } + + #[test] + fn reconstruct_merges_snapshot_with_diff_chain() { + // Snapshot: distinctive config + validators, plus one pre-existing root. + let mut snapshot = base_state(); + snapshot.slot = 100; + snapshot.historical_block_hashes = vec![h256(1)].try_into().unwrap(); + + // Intermediate diff (snapshot's child): appends one root, default fields. + let intermediate = diff_at(h256(50), 101, vec![h256(2)]); + + // Target diff (last): appends two roots and carries the absolute fields + // the reconstructed state must adopt, all different from the intermediate. + let mut target = diff_at(h256(51), 102, vec![h256(3), h256(4)]); + target.latest_justified = Checkpoint { + root: h256(7), + slot: 101, + }; + target.latest_finalized = Checkpoint { + root: h256(8), + slot: 100, + }; + target.justified_slots = JustifiedSlots::try_from(vec![true, false, true]).unwrap(); + target.justifications_roots = JustificationRoots::try_from(vec![h256(9)]).unwrap(); + target.justifications_validators = JustificationValidators::try_from(vec![true]).unwrap(); + + let header = header_at(102); + let state = reconstruct(snapshot, &[intermediate, target.clone()], header.clone()); + + // Structural fields come from the snapshot (diffs never carry them). + assert_eq!(state.config.genesis_time, 1_000); + assert_eq!(state.validators.len(), 2); + assert_eq!(state.validators[0].attestation_pubkey, [1u8; 52]); + assert_eq!(state.validators[1].index, 1); + + // latest_block_header is the argument, passed through verbatim. + assert_eq!(state.latest_block_header, header); + + // Absolute fields come from the LAST diff, not the intermediate one. + assert_eq!(state.slot, 102); + assert_eq!(state.latest_justified, target.latest_justified); + assert_eq!(state.latest_finalized, target.latest_finalized); + assert_eq!(state.justified_slots, target.justified_slots); + assert_eq!(state.justifications_roots, target.justifications_roots); + assert_eq!( + state.justifications_validators, + target.justifications_validators + ); + + // historical_block_hashes = snapshot tail ++ each diff's appended tail, + // replayed in order. + assert_eq!( + state.historical_block_hashes.to_vec(), + vec![h256(1), h256(2), h256(3), h256(4)], + ); + } + + #[test] + fn reconstruct_with_single_diff_uses_it_as_target() { + let mut snapshot = base_state(); + snapshot.slot = 7; + snapshot.historical_block_hashes = vec![h256(1)].try_into().unwrap(); + + let mut diff = diff_at(h256(50), 8, vec![h256(2)]); + diff.latest_justified = Checkpoint { + root: h256(7), + slot: 7, + }; + + let header = header_at(8); + let state = reconstruct(snapshot, &[diff.clone()], header.clone()); + + assert_eq!(state.slot, 8); + assert_eq!(state.latest_justified, diff.latest_justified); + assert_eq!(state.latest_block_header, header); + assert_eq!( + state.historical_block_hashes.to_vec(), + vec![h256(1), h256(2)], + ); + } } From 92b7c7f6271c627802a31b6985a4b2c8e4c20aa2 Mon Sep 17 00:00:00 2001 From: Pablo Deymonnaz Date: Thu, 25 Jun 2026 13:54:08 -0300 Subject: [PATCH 08/17] Apply suggestion from @greptile-apps[bot] Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- crates/storage/src/store.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/storage/src/store.rs b/crates/storage/src/store.rs index 2be5ca69..72db9fd1 100644 --- a/crates/storage/src/store.rs +++ b/crates/storage/src/store.rs @@ -850,11 +850,11 @@ impl Store { Ok(()) } - /// Bound storage by evicting old state snapshots and finalized signatures. + /// Prune finalized block signatures to keep signature storage bounded. /// - /// State diffs, block headers, and block bodies are retained for the full - /// history; only full-state snapshots outside the hot window (diffs remain) - /// and signatures of finalized blocks are removed. + /// State diffs, block headers, block bodies, and full-state snapshots are + /// all retained for the full history and are never pruned. Only signatures + /// of finalized blocks older than the pruning window are removed. /// /// This is separated from `update_checkpoints` so callers can defer heavy /// pruning until after a batch of blocks has been fully processed. Running From 3bb11c3952915f14bbc703fd58d5e9f7cd033d41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 25 Jun 2026 14:10:10 -0300 Subject: [PATCH 09/17] refactor(storage): drop DiffBase, derive diff base from the post-state insert_state required callers to first capture a DiffBase (parent root, hbh length, slot) before the parent state was consumed into the post-state, so the store could build the diff without re-reading the parent. That leaked diff-construction details into every caller. The post-state's latest_block_header.parent_root already identifies the parent (the STF sets it in process_block_header), so insert_state now takes just (root, state): it derives the parent root, fetches the parent state via get_state (normally a cache hit), and builds the diff from both states. This restores the public signature and the block-import call site to their pre-diff-layer form. --- crates/blockchain/src/store.rs | 21 ++--- crates/storage/src/lib.rs | 1 - crates/storage/src/state_diff.rs | 53 +++--------- crates/storage/src/store.rs | 134 ++++++++++++++----------------- 4 files changed, 82 insertions(+), 127 deletions(-) diff --git a/crates/blockchain/src/store.rs b/crates/blockchain/src/store.rs index 9b5fbb4c..36bbfb99 100644 --- a/crates/blockchain/src/store.rs +++ b/crates/blockchain/src/store.rs @@ -1,7 +1,7 @@ use std::collections::HashSet; use ethlambda_state_transition::{is_proposer, slot_is_justifiable_after}; -use ethlambda_storage::{DiffBase, ForkCheckpoints, Store}; +use ethlambda_storage::{ForkCheckpoints, Store}; use ethlambda_types::{ ShortRoot, attestation::{ @@ -564,10 +564,6 @@ fn on_block_core( let block = signed_block.message.clone(); - // Capture the diff base before the parent is consumed into the post-state - // (avoids cloning the multi-MB historical_block_hashes list). - let diff_base = DiffBase::from_state(block.parent_root, &parent_state); - // Execute state transition function to compute post-block state let state_transition_start = std::time::Instant::now(); let mut post_state = parent_state; @@ -595,7 +591,7 @@ fn on_block_core( .insert_signed_block(block_root, signed_block.clone()) .expect("DB insert should succeed"); store - .insert_state_with_diff(block_root, diff_base, post_state) + .insert_state(block_root, post_state) .expect("DB insert should succeed"); for att in block.body.attestations.iter() { @@ -1271,14 +1267,13 @@ mod tests { let head_justified = Checkpoint { root: a, slot: 1 }; let mut head_state = State::from_genesis(1000, vec![]); head_state.latest_justified = head_justified; - // Persist `b`'s post-state via the diff API, diffed against the genesis - // anchor that already lives in the store. The base must describe the - // parent (genesis) state, not the target; `get_state(b)` then resolves - // via the cache or by replaying this diff onto the genesis snapshot. - let genesis_state = store.get_state(&genesis).expect("genesis state"); - let diff_base = DiffBase::from_state(genesis, &genesis_state); + // Persist `b`'s post-state via the diff API. `insert_state` reads the base + // to diff against from the post-state's own `latest_block_header.parent_root`; + // point it at the genesis anchor already in the store, so `get_state(b)` + // resolves via the cache or by replaying this diff onto the genesis snapshot. + head_state.latest_block_header.parent_root = genesis; store - .insert_state_with_diff(b, diff_base, head_state) + .insert_state(b, head_state) .expect("insert head state should succeed"); // Store's global justified latched onto a higher, off-head checkpoint, diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs index 13b6410d..9b21dc85 100644 --- a/crates/storage/src/lib.rs +++ b/crates/storage/src/lib.rs @@ -5,5 +5,4 @@ mod state_diff; mod store; pub use api::{ALL_TABLES, StorageBackend, StorageReadView, StorageWriteBatch, Table}; -pub use state_diff::DiffBase; pub use store::{ForkCheckpoints, GetForkchoiceStoreError, MAX_RESUMABLE_DB_STATE_AGE, Store}; diff --git a/crates/storage/src/state_diff.rs b/crates/storage/src/state_diff.rs index 3a906e39..94b23b35 100644 --- a/crates/storage/src/state_diff.rs +++ b/crates/storage/src/state_diff.rs @@ -27,35 +27,6 @@ use libssz_types::SszList; /// full list. pub type HistoricalBlockHashesTail = SszList; -/// Describes the parent state a new state's diff is built against. -/// -/// Captured by the caller before the parent is consumed into the post-state, so -/// the store can build the diff and decide anchoring without re-reading it. -/// Construct via [`DiffBase::from_state`]; fields are crate-internal. -pub struct DiffBase { - /// Block root of the parent state (the diff's `base_root`). - pub(crate) root: H256, - /// Parent state's `historical_block_hashes` length. - pub(crate) hbh_len: usize, - /// Parent state's slot (used for the anchor-boundary check). - pub(crate) slot: u64, -} - -impl DiffBase { - /// Build the diff base from the parent state and its block root. - /// - /// `root` is the parent block root (the child's `parent_root`), passed in - /// since the caller already has it; `hbh_len` and `slot` are read from - /// `state`. Call this before the parent is consumed into the child. - pub fn from_state(root: H256, state: &State) -> Self { - Self { - root, - hbh_len: state.historical_block_hashes.len(), - slot: state.slot, - } - } -} - /// The change from a base (parent) state to a target state. /// /// Reconstruct the target with [`StateDiff`] applied against the nearest @@ -81,13 +52,13 @@ pub struct StateDiff { } impl StateDiff { - /// Build a diff from a consumed target state against a base identified by its - /// `historical_block_hashes` length. + /// Build a diff from a base (parent) state and the consumed target state. /// - /// Takes `target` by value so the multi-MB justification fields are moved - /// into the diff rather than cloned. On the block-import path the base state - /// has already been consumed into `target`, so only its length is retained; - /// `base_hbh_len` is that length. + /// Takes `target` by value so its multi-MB justification fields are moved + /// into the diff rather than cloned; `base` is read only to find the length + /// of its `historical_block_hashes` (the diff stores just the tail `target` + /// appended on top). `base_root` is the parent block root the diff is + /// relative to. /// /// # Assumptions about how the base is modified into the target /// @@ -102,10 +73,10 @@ impl StateDiff { /// fixed at genesis and `config` is static.) /// - **`historical_block_hashes` only grows by appending.** The base's list /// is a prefix of the target's, so only the appended tail - /// (`target[base_hbh_len..]`) is stored and the earlier entries are never + /// (`target[base_len..]`) is stored and the earlier entries are never /// reordered or rewritten. (`process_slots` pushes the parent root and /// zero-fills skipped slots, leaving the existing prefix intact.) This is - /// why `base_hbh_len` alone is enough to identify the base's contribution. + /// why the base's length alone is enough to identify its contribution. /// - **`latest_block_header` is not stored here.** It is read back from the /// `BlockHeaders` table during reconstruction; the persisted post-state /// caches the real `state_root` there, so the two are byte-identical. @@ -116,9 +87,10 @@ impl StateDiff { /// /// # Panics /// - /// Panics if `target.historical_block_hashes` is shorter than `base_hbh_len`, + /// Panics if `target.historical_block_hashes` is shorter than the base's, /// i.e. the append-only assumption above was violated. - pub fn from_base(base_root: H256, base_hbh_len: usize, target: State) -> Self { + pub fn from_states(base_root: H256, base: &State, target: State) -> Self { + let base_hbh_len = base.historical_block_hashes.len(); let State { slot, latest_justified, @@ -224,7 +196,6 @@ mod tests { #[test] fn from_base_captures_appended_tail_and_absolute_fields() { let base = base_state(); - let base_len = base.historical_block_hashes.len(); let mut target = base.clone(); target.slot = 5; @@ -238,7 +209,7 @@ mod tests { hbh.extend([h256(9), H256::ZERO, H256::ZERO]); target.historical_block_hashes = hbh.try_into().unwrap(); - let diff = StateDiff::from_base(h256(1), base_len, target); + let diff = StateDiff::from_states(h256(1), &base, target); assert_eq!(diff.base_root, h256(1)); assert_eq!(diff.slot, 5); diff --git a/crates/storage/src/store.rs b/crates/storage/src/store.rs index 72db9fd1..17fbabd6 100644 --- a/crates/storage/src/store.rs +++ b/crates/storage/src/store.rs @@ -19,7 +19,7 @@ use ethlambda_types::{ }; use libssz::{SszDecode, SszEncode}; -use crate::state_diff::{DiffBase, StateDiff}; +use crate::state_diff::StateDiff; use thiserror::Error; use tracing::{info, warn}; @@ -857,9 +857,7 @@ impl Store { /// of finalized blocks older than the pruning window are removed. /// /// This is separated from `update_checkpoints` so callers can defer heavy - /// pruning until after a batch of blocks has been fully processed. Running - /// this mid-cascade would delete snapshots that pending children still need, - /// causing infinite re-processing loops when fallback pruning is active. + /// pruning until after a batch of blocks has been fully processed. pub fn prune_old_data(&mut self) { let finalized_slot = self.latest_finalized().slot; let tip_slot = self @@ -1228,24 +1226,33 @@ impl Store { /// also inserted into the in-memory cache so the immediate next read (e.g. as /// a child block's parent state) is hot without reconstruction. /// - /// `base` describes the parent state the diff is built against (see - /// [`DiffBase`]); its fields are captured before the parent is consumed into - /// `state`. - pub fn insert_state_with_diff( - &mut self, - root: H256, - base: DiffBase, - state: State, - ) -> Result<(), Error> { - let slot = state.slot; - let is_anchor = slot / SNAPSHOT_ANCHOR_INTERVAL > base.slot / SNAPSHOT_ANCHOR_INTERVAL; + /// The diff is built against the parent state, identified by the post-state's + /// own `latest_block_header.parent_root` (the state transition sets it to the + /// block's parent) and fetched via [`get_state`](Self::get_state). The parent + /// was persisted when its own block was imported, so this read is normally a + /// cache hit; a cold cache falls back to a snapshot read or a diff-chain + /// reconstruction. + /// + /// # Panics + /// + /// Panics if no state exists for the parent root: a child state can only be + /// inserted after its parent's state has been persisted. + pub fn insert_state(&mut self, root: H256, state: State) -> Result<(), Error> { + // The post-state's latest_block_header is the block's own header, so its + // parent_root identifies the parent (base) state to diff against. + let parent_root = state.latest_block_header.parent_root; + let parent_state = self + .get_state(&parent_root) + .expect("parent state must exist to diff against"); + let is_anchor = + state.slot / SNAPSHOT_ANCHOR_INTERVAL > parent_state.slot / SNAPSHOT_ANCHOR_INTERVAL; // Snapshot only at anchors; serialize before `state` is consumed. let snapshot_bytes = is_anchor.then(|| state.to_ssz()); // Memoize the post-state for fast reads, then move it into the diff so // its multi-MB justification fields are not cloned again. self.state_cache.lock().unwrap().put(root, state.clone()); - let diff_bytes = StateDiff::from_base(base.root, base.hbh_len, state).to_ssz(); + let diff_bytes = StateDiff::from_states(parent_root, &parent_state, state).to_ssz(); let key = root.to_ssz(); let mut batch = self.backend.begin_write().expect("write batch"); @@ -1544,15 +1551,11 @@ mod tests { use super::*; use crate::backend::InMemoryBackend; - /// Insert a block header (and dummy body + signature) for a given root and slot. - fn insert_header(backend: &dyn StorageBackend, root: H256, slot: u64) { - let header = BlockHeader { - slot, - proposer_index: 0, - parent_root: H256::ZERO, - state_root: H256::ZERO, - body_root: H256::ZERO, - }; + /// Insert a block header (and dummy body + signature) for a given root, slot, + /// and parent. The stored header equals `header_at(slot, parent_root)`, so a + /// state built from the same `(slot, parent_root)` reconstructs byte-identically. + fn insert_header(backend: &dyn StorageBackend, root: H256, slot: u64, parent_root: H256) { + let header = header_at(slot, parent_root); let mut batch = backend.begin_write().expect("write batch"); let key = root.to_ssz(); batch @@ -1648,7 +1651,7 @@ mod tests { // Blocks at slots 0..12, each with header + body + signature. for i in 0..13u64 { - insert_header(backend.as_ref(), root(i), i); + insert_header(backend.as_ref(), root(i), i, H256::ZERO); } // Healthy finality: non-finalized gap (5) < SIGNATURE_PRUNING_RANGE. @@ -1679,7 +1682,7 @@ mod tests { let mut store = Store::test_store_with_backend(backend.clone()); for i in 0..10u64 { - insert_header(backend.as_ref(), root(i), i); + insert_header(backend.as_ref(), root(i), i, H256::ZERO); } // Deep non-finality: gap (tip - finalized) > SIGNATURE_PRUNING_RANGE, so @@ -1699,7 +1702,7 @@ mod tests { let mut store = Store::test_store_with_backend(backend.clone()); for i in 0..10u64 { - insert_header(backend.as_ref(), root(i), i); + insert_header(backend.as_ref(), root(i), i, H256::ZERO); } // Early chain: tip < SIGNATURE_PRUNING_RANGE → cutoff saturates to 0, @@ -1713,20 +1716,22 @@ mod tests { use ethlambda_types::state::Validator; - /// The header `insert_header` writes for a given slot. - fn header_at(slot: u64) -> BlockHeader { + /// The header `insert_header` writes for a given slot and parent. + fn header_at(slot: u64, parent_root: H256) -> BlockHeader { BlockHeader { slot, proposer_index: 0, - parent_root: H256::ZERO, + parent_root, state_root: H256::ZERO, body_root: H256::ZERO, } } - /// A real `State` at `slot` with the given historical_block_hashes and a - /// `latest_block_header` matching what `insert_header` stores. - fn sample_state(slot: u64, hbh: Vec) -> State { + /// A real `State` at `slot` whose `latest_block_header` matches what + /// `insert_header` stores for `(slot, parent_root)`; `parent_root` is also the + /// base the diff is built against (`insert_state` reads it back from the + /// post-state's `latest_block_header`). + fn sample_state(slot: u64, parent_root: H256, hbh: Vec) -> State { let validators = vec![Validator { attestation_pubkey: [7u8; 52], proposal_pubkey: [9u8; 52], @@ -1734,7 +1739,7 @@ mod tests { }]; let mut state = State::from_genesis(1_000, validators); state.slot = slot; - state.latest_block_header = header_at(slot); + state.latest_block_header = header_at(slot, parent_root); state.historical_block_hashes = hbh.try_into().unwrap(); state } @@ -1745,23 +1750,20 @@ mod tests { let mut store = Store::test_store_with_backend(backend.clone()); // Genesis snapshot at slot 0. - let s0 = sample_state(0, vec![]); + let s0 = sample_state(0, H256::ZERO, vec![]); let r0 = root(0); - insert_header(backend.as_ref(), r0, 0); + insert_header(backend.as_ref(), r0, 0, H256::ZERO); insert_snapshot(backend.as_ref(), r0, &s0); // Child at slot 1: appends one historical root, sets a checkpoint. let r1 = root(1); - let mut s1 = sample_state(1, vec![root(42)]); + let mut s1 = sample_state(1, r0, vec![root(42)]); s1.latest_justified = Checkpoint { root: root(7), slot: 0, }; - insert_header(backend.as_ref(), r1, 1); - let base = DiffBase::from_state(r0, &s0); - store - .insert_state_with_diff(r1, base, s1.clone()) - .expect("insert state"); + insert_header(backend.as_ref(), r1, 1, r0); + store.insert_state(r1, s1.clone()).expect("insert state"); // Not an anchor, so no snapshot was written; only the diff. assert!(!has_key(backend.as_ref(), Table::States, &r1)); @@ -1782,26 +1784,20 @@ mod tests { let mut store = Store::test_store_with_backend(backend.clone()); // Snapshot s0, then two chained diffs s1 -> s2. - let s0 = sample_state(0, vec![]); + let s0 = sample_state(0, H256::ZERO, vec![]); let r0 = root(0); - insert_header(backend.as_ref(), r0, 0); + insert_header(backend.as_ref(), r0, 0, H256::ZERO); insert_snapshot(backend.as_ref(), r0, &s0); let r1 = root(1); - let s1 = sample_state(1, vec![root(42)]); - insert_header(backend.as_ref(), r1, 1); - let base = DiffBase::from_state(r0, &s0); - store - .insert_state_with_diff(r1, base, s1.clone()) - .expect("insert state"); + let s1 = sample_state(1, r0, vec![root(42)]); + insert_header(backend.as_ref(), r1, 1, r0); + store.insert_state(r1, s1.clone()).expect("insert state"); let r2 = root(2); - let s2 = sample_state(2, vec![root(42), root(43)]); - insert_header(backend.as_ref(), r2, 2); - let base = DiffBase::from_state(r1, &s1); - store - .insert_state_with_diff(r2, base, s2.clone()) - .expect("insert state"); + let s2 = sample_state(2, r1, vec![root(42), root(43)]); + insert_header(backend.as_ref(), r2, 2, r1); + store.insert_state(r2, s2.clone()).expect("insert state"); // Neither child is an anchor, so a cold store reconstructs s2 by walking // the diff chain back to the s0 snapshot. @@ -1813,33 +1809,27 @@ mod tests { } #[test] - fn insert_state_with_diff_snapshots_only_on_boundary_crossing() { + fn insert_state_snapshots_only_on_boundary_crossing() { let backend = Arc::new(InMemoryBackend::new()); let mut store = Store::test_store_with_backend(backend.clone()); - let s0 = sample_state(SNAPSHOT_ANCHOR_INTERVAL - 1, vec![]); + let s0 = sample_state(SNAPSHOT_ANCHOR_INTERVAL - 1, H256::ZERO, vec![]); let r0 = root(0); - insert_header(backend.as_ref(), r0, s0.slot); + insert_header(backend.as_ref(), r0, s0.slot, H256::ZERO); insert_snapshot(backend.as_ref(), r0, &s0); // Crossing the interval boundary records an anchor. let r1 = root(1); - let s1 = sample_state(SNAPSHOT_ANCHOR_INTERVAL, vec![root(42)]); - insert_header(backend.as_ref(), r1, s1.slot); - let base = DiffBase::from_state(r0, &s0); - store - .insert_state_with_diff(r1, base, s1.clone()) - .expect("insert state"); + let s1 = sample_state(SNAPSHOT_ANCHOR_INTERVAL, r0, vec![root(42)]); + insert_header(backend.as_ref(), r1, s1.slot, r0); + store.insert_state(r1, s1.clone()).expect("insert state"); assert!(has_key(backend.as_ref(), Table::States, &r1)); // A non-crossing child does not. let r2 = root(2); - let s2 = sample_state(SNAPSHOT_ANCHOR_INTERVAL + 1, vec![root(42), root(43)]); - insert_header(backend.as_ref(), r2, s2.slot); - let base = DiffBase::from_state(r1, &s1); - store - .insert_state_with_diff(r2, base, s2.clone()) - .expect("insert state"); + let s2 = sample_state(SNAPSHOT_ANCHOR_INTERVAL + 1, r1, vec![root(42), root(43)]); + insert_header(backend.as_ref(), r2, s2.slot, r1); + store.insert_state(r2, s2.clone()).expect("insert state"); assert!(!has_key(backend.as_ref(), Table::States, &r2)); } From 84e87e20ee38a03c40fda958a7f61127101a0758 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 25 Jun 2026 15:45:37 -0300 Subject: [PATCH 10/17] refactor(storage): inline SSZ reads, drop the get_ssz helper --- crates/storage/src/store.rs | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/crates/storage/src/store.rs b/crates/storage/src/store.rs index 17fbabd6..3deb51c9 100644 --- a/crates/storage/src/store.rs +++ b/crates/storage/src/store.rs @@ -1025,7 +1025,10 @@ impl Store { /// Get the block header by root. pub fn get_block_header(&self, root: &H256) -> Option { - self.get_ssz(Table::BlockHeaders, root) + let view = self.backend.begin_read().expect("read view"); + view.get(Table::BlockHeaders, &root.to_ssz()) + .expect("get") + .map(|bytes| BlockHeader::from_ssz_bytes(&bytes).expect("valid header")) } // ============ Signed Blocks ============ @@ -1160,21 +1163,17 @@ impl Store { return Some(state.clone()); } // Anchor snapshot in `States`, otherwise reconstruct from the diff chain. - let state = self - .get_ssz::(Table::States, root) - .or_else(|| self.reconstruct_state(root))?; + let snapshot = { + let view = self.backend.begin_read().expect("read view"); + view.get(Table::States, &root.to_ssz()) + .expect("get") + .map(|bytes| State::from_ssz_bytes(&bytes).expect("valid state")) + }; + let state = snapshot.or_else(|| self.reconstruct_state(root))?; self.state_cache.lock().unwrap().put(*root, state.clone()); Some(state) } - /// Read and SSZ-decode a value keyed by block root from `table`. - fn get_ssz(&self, table: Table, root: &H256) -> Option { - let view = self.backend.begin_read().expect("read view"); - view.get(table, &root.to_ssz()) - .expect("get") - .map(|bytes| T::from_ssz_bytes(&bytes).expect("valid encoding")) - } - /// Reconstruct a state from diffs and the nearest ancestor snapshot. /// /// Walks `base_root` pointers back until a snapshot is found, fetches the @@ -1182,16 +1181,19 @@ impl Store { /// [`state_diff::reconstruct`](crate::state_diff::reconstruct). fn reconstruct_state(&self, root: &H256) -> Option { // Walk back collecting diffs until we reach a snapshot. + let view = self.backend.begin_read().expect("read view"); let mut diffs: Vec = Vec::new(); let mut cursor = *root; let snapshot = loop { - if let Some(snapshot) = self.get_ssz::(Table::States, &cursor) { - break snapshot; + if let Some(bytes) = view.get(Table::States, &cursor.to_ssz()).expect("get") { + break State::from_ssz_bytes(&bytes).expect("valid state"); } - let diff = self.get_ssz::(Table::StateDiffs, &cursor)?; + let diff_bytes = view.get(Table::StateDiffs, &cursor.to_ssz()).expect("get")?; + let diff = StateDiff::from_ssz_bytes(&diff_bytes).expect("valid state diff"); cursor = diff.base_root; diffs.push(diff); }; + drop(view); // `diffs` runs target -> snapshot child; reverse to snapshot child -> target. diffs.reverse(); From a78768c55b715acb58638b766dde62378e0dc1a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 25 Jun 2026 16:00:17 -0300 Subject: [PATCH 11/17] refactor(storage): build StateDiff from pre- and post-state StateDiff::from_states took an explicit base_root plus the base state. Drop the base_root parameter: derive it inside from the pre-state as hash_tree_root(pre_state.latest_block_header). A Block and its BlockHeader share a hash tree root, so this equals the parent block root the diff chain is keyed by (the bootstrap anchor is stored under the same formula). from_states now takes just (pre_state, post_state). Update the reconstruction tests to key states by their real header hash instead of arbitrary roots, matching how production assigns block roots. --- crates/storage/src/state_diff.rs | 34 +++++++++++++++++++------------- crates/storage/src/store.rs | 19 +++++++++--------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/crates/storage/src/state_diff.rs b/crates/storage/src/state_diff.rs index 94b23b35..ea136eed 100644 --- a/crates/storage/src/state_diff.rs +++ b/crates/storage/src/state_diff.rs @@ -15,7 +15,7 @@ use ethlambda_types::{ block::BlockHeader, checkpoint::Checkpoint, - primitives::H256, + primitives::{H256, HashTreeRoot}, state::{ HISTORICAL_ROOTS_LIMIT, JustificationRoots, JustificationValidators, JustifiedSlots, State, }, @@ -52,13 +52,18 @@ pub struct StateDiff { } impl StateDiff { - /// Build a diff from a base (parent) state and the consumed target state. + /// Build a diff from the pre-state (the parent block's post-state) and the + /// consumed post-state. /// - /// Takes `target` by value so its multi-MB justification fields are moved - /// into the diff rather than cloned; `base` is read only to find the length - /// of its `historical_block_hashes` (the diff stores just the tail `target` - /// appended on top). `base_root` is the parent block root the diff is - /// relative to. + /// Takes `post_state` by value so its multi-MB justification fields are moved + /// into the diff rather than cloned; `pre_state` is read to derive `base_root` + /// and the length of its `historical_block_hashes` (the diff stores just the + /// tail `post_state` appended on top). + /// + /// `base_root` is the parent block root, computed as the `hash_tree_root` of + /// the pre-state's `latest_block_header`. A `Block` and its `BlockHeader` + /// share a hash tree root (the header's `body_root` is the body's root), so + /// this equals the key under which the parent's snapshot/diff is stored. /// /// # Assumptions about how the base is modified into the target /// @@ -87,10 +92,11 @@ impl StateDiff { /// /// # Panics /// - /// Panics if `target.historical_block_hashes` is shorter than the base's, - /// i.e. the append-only assumption above was violated. - pub fn from_states(base_root: H256, base: &State, target: State) -> Self { - let base_hbh_len = base.historical_block_hashes.len(); + /// Panics if `post_state.historical_block_hashes` is shorter than the + /// pre-state's, i.e. the append-only assumption above was violated. + pub fn from_states(pre_state: &State, post_state: State) -> Self { + let base_root = pre_state.latest_block_header.hash_tree_root(); + let base_hbh_len = pre_state.historical_block_hashes.len(); let State { slot, latest_justified, @@ -100,7 +106,7 @@ impl StateDiff { justifications_roots, justifications_validators, .. - } = target; + } = post_state; let hbh = historical_block_hashes.into_inner(); assert!( @@ -209,9 +215,9 @@ mod tests { hbh.extend([h256(9), H256::ZERO, H256::ZERO]); target.historical_block_hashes = hbh.try_into().unwrap(); - let diff = StateDiff::from_states(h256(1), &base, target); + let diff = StateDiff::from_states(&base, target); - assert_eq!(diff.base_root, h256(1)); + assert_eq!(diff.base_root, base.latest_block_header.hash_tree_root()); assert_eq!(diff.slot, 5); assert_eq!(diff.latest_justified, expected_justified); assert_eq!(diff.hbh_appended.len(), 3); diff --git a/crates/storage/src/store.rs b/crates/storage/src/store.rs index 3deb51c9..bf30f403 100644 --- a/crates/storage/src/store.rs +++ b/crates/storage/src/store.rs @@ -1254,7 +1254,7 @@ impl Store { // Memoize the post-state for fast reads, then move it into the diff so // its multi-MB justification fields are not cloned again. self.state_cache.lock().unwrap().put(root, state.clone()); - let diff_bytes = StateDiff::from_states(parent_root, &parent_state, state).to_ssz(); + let diff_bytes = StateDiff::from_states(&parent_state, state).to_ssz(); let key = root.to_ssz(); let mut batch = self.backend.begin_write().expect("write batch"); @@ -1751,19 +1751,19 @@ mod tests { let backend = Arc::new(InMemoryBackend::new()); let mut store = Store::test_store_with_backend(backend.clone()); - // Genesis snapshot at slot 0. + // Genesis snapshot at slot 0; its block root is its header's hash. let s0 = sample_state(0, H256::ZERO, vec![]); - let r0 = root(0); + let r0 = s0.latest_block_header.hash_tree_root(); insert_header(backend.as_ref(), r0, 0, H256::ZERO); insert_snapshot(backend.as_ref(), r0, &s0); - // Child at slot 1: appends one historical root, sets a checkpoint. - let r1 = root(1); + // Child at slot 1 (parent r0): appends one historical root, sets a checkpoint. let mut s1 = sample_state(1, r0, vec![root(42)]); s1.latest_justified = Checkpoint { root: root(7), slot: 0, }; + let r1 = s1.latest_block_header.hash_tree_root(); insert_header(backend.as_ref(), r1, 1, r0); store.insert_state(r1, s1.clone()).expect("insert state"); @@ -1785,19 +1785,20 @@ mod tests { let backend = Arc::new(InMemoryBackend::new()); let mut store = Store::test_store_with_backend(backend.clone()); - // Snapshot s0, then two chained diffs s1 -> s2. + // Snapshot s0, then two chained diffs s1 -> s2; each block root is the + // hash of its header, as in production. let s0 = sample_state(0, H256::ZERO, vec![]); - let r0 = root(0); + let r0 = s0.latest_block_header.hash_tree_root(); insert_header(backend.as_ref(), r0, 0, H256::ZERO); insert_snapshot(backend.as_ref(), r0, &s0); - let r1 = root(1); let s1 = sample_state(1, r0, vec![root(42)]); + let r1 = s1.latest_block_header.hash_tree_root(); insert_header(backend.as_ref(), r1, 1, r0); store.insert_state(r1, s1.clone()).expect("insert state"); - let r2 = root(2); let s2 = sample_state(2, r1, vec![root(42), root(43)]); + let r2 = s2.latest_block_header.hash_tree_root(); insert_header(backend.as_ref(), r2, 2, r1); store.insert_state(r2, s2.clone()).expect("insert state"); From a9270325830aafff3f3f61b001d3d3e1e473c827 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 25 Jun 2026 16:05:26 -0300 Subject: [PATCH 12/17] test(storage): drop diff_at, build reconstruct inputs via from_states The reconstruct tests fabricated StateDiffs directly with a diff_at helper, bypassing the production constructor. Replace it with a child_state helper that builds realistic post-states (mirroring the STF's historical_block_hashes append), feed those through StateDiff::from_states, and assert a full round-trip through reconstruct (including a skipped-slot gap). Now the tests exercise the real diff creation path instead of hand-rolled diffs. --- crates/storage/src/state_diff.rs | 117 ++++++++++++++----------------- 1 file changed, 52 insertions(+), 65 deletions(-) diff --git a/crates/storage/src/state_diff.rs b/crates/storage/src/state_diff.rs index ea136eed..fc6d5405 100644 --- a/crates/storage/src/state_diff.rs +++ b/crates/storage/src/state_diff.rs @@ -175,6 +175,7 @@ pub(crate) fn reconstruct( #[cfg(test)] mod tests { use ethlambda_types::state::{State, Validator}; + use libssz::SszEncode; use super::*; @@ -237,99 +238,85 @@ mod tests { } } - /// Build a diff against `base_root` that appends `appended` to - /// `historical_block_hashes`. Absolute fields default; tests override the - /// ones they assert on. - fn diff_at(base_root: H256, slot: u64, appended: Vec) -> StateDiff { - StateDiff { - base_root, + /// Build the post-state of a block at `slot` whose parent post-state is + /// `parent`, mirroring the STF: append the parent block root (the hash of + /// `parent`'s `latest_block_header`), zero-fill skipped slots, and set the + /// child's own header. Absolute fields are inherited; callers override what + /// they assert on. Feeds realistic states into the production `from_states`, + /// so the tests exercise diff creation rather than fabricating diffs by hand. + fn child_state(parent: &State, slot: u64) -> State { + let parent_root = parent.latest_block_header.hash_tree_root(); + let empty_slots = (slot - parent.slot - 1) as usize; + + let mut hbh = parent.historical_block_hashes.to_vec(); + hbh.push(parent_root); + hbh.extend(std::iter::repeat_n(H256::ZERO, empty_slots)); + + let mut child = parent.clone(); + child.slot = slot; + child.historical_block_hashes = hbh.try_into().unwrap(); + child.latest_block_header = BlockHeader { slot, - latest_justified: Checkpoint::default(), - latest_finalized: Checkpoint::default(), - justified_slots: JustifiedSlots::new(), - justifications_roots: JustificationRoots::default(), - justifications_validators: JustificationValidators::new(), - hbh_appended: HistoricalBlockHashesTail::try_from(appended).unwrap(), - } + proposer_index: 0, + parent_root, + state_root: H256::ZERO, + body_root: H256::ZERO, + }; + child } #[test] - fn reconstruct_merges_snapshot_with_diff_chain() { - // Snapshot: distinctive config + validators, plus one pre-existing root. + fn reconstruct_round_trips_a_diff_chain() { + // Snapshot at slot 100 with one pre-existing historical root. let mut snapshot = base_state(); snapshot.slot = 100; + snapshot.latest_block_header = header_at(100); snapshot.historical_block_hashes = vec![h256(1)].try_into().unwrap(); - // Intermediate diff (snapshot's child): appends one root, default fields. - let intermediate = diff_at(h256(50), 101, vec![h256(2)]); - - // Target diff (last): appends two roots and carries the absolute fields - // the reconstructed state must adopt, all different from the intermediate. - let mut target = diff_at(h256(51), 102, vec![h256(3), h256(4)]); - target.latest_justified = Checkpoint { + // s1 is the snapshot's child (consecutive slot); s2 is s1's child three + // slots later, so slots 102 and 103 are skipped and zero-filled. s2 also + // carries distinctive absolute fields the reconstruction must adopt. + let s1 = child_state(&snapshot, 101); + let mut s2 = child_state(&s1, 104); + s2.latest_justified = Checkpoint { root: h256(7), slot: 101, }; - target.latest_finalized = Checkpoint { + s2.latest_finalized = Checkpoint { root: h256(8), slot: 100, }; - target.justified_slots = JustifiedSlots::try_from(vec![true, false, true]).unwrap(); - target.justifications_roots = JustificationRoots::try_from(vec![h256(9)]).unwrap(); - target.justifications_validators = JustificationValidators::try_from(vec![true]).unwrap(); + s2.justified_slots = JustifiedSlots::try_from(vec![true, false, true]).unwrap(); + s2.justifications_roots = JustificationRoots::try_from(vec![h256(9)]).unwrap(); + s2.justifications_validators = JustificationValidators::try_from(vec![true]).unwrap(); - let header = header_at(102); - let state = reconstruct(snapshot, &[intermediate, target.clone()], header.clone()); + // Diffs are built the production way, from each (pre, post) pair. + let diff1 = StateDiff::from_states(&snapshot, s1.clone()); + let diff2 = StateDiff::from_states(&s1, s2.clone()); - // Structural fields come from the snapshot (diffs never carry them). - assert_eq!(state.config.genesis_time, 1_000); - assert_eq!(state.validators.len(), 2); - assert_eq!(state.validators[0].attestation_pubkey, [1u8; 52]); - assert_eq!(state.validators[1].index, 1); + let reconstructed = reconstruct(snapshot, &[diff1, diff2], s2.latest_block_header.clone()); - // latest_block_header is the argument, passed through verbatim. - assert_eq!(state.latest_block_header, header); - - // Absolute fields come from the LAST diff, not the intermediate one. - assert_eq!(state.slot, 102); - assert_eq!(state.latest_justified, target.latest_justified); - assert_eq!(state.latest_finalized, target.latest_finalized); - assert_eq!(state.justified_slots, target.justified_slots); - assert_eq!(state.justifications_roots, target.justifications_roots); - assert_eq!( - state.justifications_validators, - target.justifications_validators - ); - - // historical_block_hashes = snapshot tail ++ each diff's appended tail, - // replayed in order. - assert_eq!( - state.historical_block_hashes.to_vec(), - vec![h256(1), h256(2), h256(3), h256(4)], - ); + // Full round-trip: structural fields (config/validators) from the snapshot, + // absolute fields from the last diff, and the appended-with-gaps history. + assert_eq!(reconstructed.to_ssz(), s2.to_ssz()); } #[test] - fn reconstruct_with_single_diff_uses_it_as_target() { + fn reconstruct_with_single_diff_round_trips() { let mut snapshot = base_state(); snapshot.slot = 7; + snapshot.latest_block_header = header_at(7); snapshot.historical_block_hashes = vec![h256(1)].try_into().unwrap(); - let mut diff = diff_at(h256(50), 8, vec![h256(2)]); - diff.latest_justified = Checkpoint { + let mut child = child_state(&snapshot, 8); + child.latest_justified = Checkpoint { root: h256(7), slot: 7, }; - let header = header_at(8); - let state = reconstruct(snapshot, &[diff.clone()], header.clone()); + let diff = StateDiff::from_states(&snapshot, child.clone()); + let reconstructed = reconstruct(snapshot, &[diff], child.latest_block_header.clone()); - assert_eq!(state.slot, 8); - assert_eq!(state.latest_justified, diff.latest_justified); - assert_eq!(state.latest_block_header, header); - assert_eq!( - state.historical_block_hashes.to_vec(), - vec![h256(1), h256(2)], - ); + assert_eq!(reconstructed.to_ssz(), child.to_ssz()); } } From a43b42ca7fc284eafcf0fd58654bfa0d1dae3301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 25 Jun 2026 16:24:59 -0300 Subject: [PATCH 13/17] refactor(storage): derive StateDiff history instead of storing it A StateDiff stored the appended historical_block_hashes tail as a list. That tail is always the parent block root followed by one zero per skipped slot, and the parent root already equals base_root, so the whole tail is recoverable from base_root and the slot gap to the previous state. Drop the hbh_appended field: reconstruct now regenerates it, and from_states asserts the append matched that shape (right length for the slot gap, first entry == base_root, rest zero) so a future STF that broke the invariant fails loudly instead of corrupting reads. Update the store and blockchain tests to build states with realistic append histories, since from_states now validates them. --- crates/blockchain/src/store.rs | 17 +++-- crates/storage/src/state_diff.rs | 106 +++++++++++++++++++------------ crates/storage/src/store.rs | 18 +++--- 3 files changed, 85 insertions(+), 56 deletions(-) diff --git a/crates/blockchain/src/store.rs b/crates/blockchain/src/store.rs index 36bbfb99..f0a4d908 100644 --- a/crates/blockchain/src/store.rs +++ b/crates/blockchain/src/store.rs @@ -1265,13 +1265,20 @@ mod tests { // Head state justified `a` (slot 1), which lies on the head's chain. let head_justified = Checkpoint { root: a, slot: 1 }; - let mut head_state = State::from_genesis(1000, vec![]); + // Persist `b`'s post-state via the diff API, diffed against the genesis + // anchor. Build it as a valid direct child of genesis (the STF appends the + // parent block root to historical_block_hashes), with the head's justified + // checkpoint set; `insert_state` reads the base from + // `latest_block_header.parent_root`, and `get_state(b)` then returns it + // from the cache. + let genesis_state = store.get_state(&genesis).expect("genesis state"); + let mut head_state = genesis_state.clone(); + head_state.slot = genesis_state.slot + 1; head_state.latest_justified = head_justified; - // Persist `b`'s post-state via the diff API. `insert_state` reads the base - // to diff against from the post-state's own `latest_block_header.parent_root`; - // point it at the genesis anchor already in the store, so `get_state(b)` - // resolves via the cache or by replaying this diff onto the genesis snapshot. head_state.latest_block_header.parent_root = genesis; + let mut hbh = genesis_state.historical_block_hashes.to_vec(); + hbh.push(genesis); + head_state.historical_block_hashes = hbh.try_into().expect("within limit"); store .insert_state(b, head_state) .expect("insert head state should succeed"); diff --git a/crates/storage/src/state_diff.rs b/crates/storage/src/state_diff.rs index fc6d5405..50a65520 100644 --- a/crates/storage/src/state_diff.rs +++ b/crates/storage/src/state_diff.rs @@ -7,8 +7,9 @@ //! Field handling: //! - `config`, `validators`: never change; omitted (taken from the snapshot). //! - `latest_block_header`: omitted; reconstructed from the `BlockHeaders` table. -//! - `historical_block_hashes`: pure-append in the STF, so only the appended -//! tail (`hbh_appended`) is stored. +//! - `historical_block_hashes`: pure-append in the STF (the parent block root +//! plus one zero per skipped slot), recoverable from `base_root` and the slot +//! gap, so nothing is stored for it. //! - everything else: stored verbatim (the justification fields are bounded by //! the non-finalized window, so they stay small under healthy finality). @@ -16,16 +17,9 @@ use ethlambda_types::{ block::BlockHeader, checkpoint::Checkpoint, primitives::{H256, HashTreeRoot}, - state::{ - HISTORICAL_ROOTS_LIMIT, JustificationRoots, JustificationValidators, JustifiedSlots, State, - }, + state::{JustificationRoots, JustificationValidators, JustifiedSlots, State}, }; use libssz_derive::{SszDecode, SszEncode}; -use libssz_types::SszList; - -/// Appended tail of `historical_block_hashes`, bounded by the same limit as the -/// full list. -pub type HistoricalBlockHashesTail = SszList; /// The change from a base (parent) state to a target state. /// @@ -47,8 +41,6 @@ pub struct StateDiff { pub justifications_roots: JustificationRoots, /// Target state's `justifications_validators` (stored in full). pub justifications_validators: JustificationValidators, - /// Elements appended to `historical_block_hashes` relative to the base. - pub hbh_appended: HistoricalBlockHashesTail, } impl StateDiff { @@ -57,8 +49,8 @@ impl StateDiff { /// /// Takes `post_state` by value so its multi-MB justification fields are moved /// into the diff rather than cloned; `pre_state` is read to derive `base_root` - /// and the length of its `historical_block_hashes` (the diff stores just the - /// tail `post_state` appended on top). + /// and to validate the `historical_block_hashes` append (which is not stored, + /// only checked here and regenerated by `reconstruct`). /// /// `base_root` is the parent block root, computed as the `hash_tree_root` of /// the pre-state's `latest_block_header`. A `Block` and its `BlockHeader` @@ -76,12 +68,12 @@ impl StateDiff { /// are not stored in the diff; reconstruction takes them from the nearest /// ancestor snapshot. (The lean STF never mutates either: `validators` is /// fixed at genesis and `config` is static.) - /// - **`historical_block_hashes` only grows by appending.** The base's list - /// is a prefix of the target's, so only the appended tail - /// (`target[base_len..]`) is stored and the earlier entries are never - /// reordered or rewritten. (`process_slots` pushes the parent root and - /// zero-fills skipped slots, leaving the existing prefix intact.) This is - /// why the base's length alone is enough to identify its contribution. + /// - **`historical_block_hashes` grows only by appending the parent block + /// root, then one zero per skipped slot.** (`process_block` pushes the + /// parent root and zero-fills skipped slots, leaving the existing prefix + /// intact.) Nothing is stored for it: `reconstruct` regenerates the tail + /// from `base_root` and the slot gap. This function asserts the invariant, + /// so a future STF that broke it fails loudly instead of corrupting reads. /// - **`latest_block_header` is not stored here.** It is read back from the /// `BlockHeaders` table during reconstruction; the persisted post-state /// caches the real `state_root` there, so the two are byte-identical. @@ -92,8 +84,10 @@ impl StateDiff { /// /// # Panics /// - /// Panics if `post_state.historical_block_hashes` is shorter than the - /// pre-state's, i.e. the append-only assumption above was violated. + /// Panics if the `historical_block_hashes` append does not match the + /// assumption above: a length that disagrees with the slot gap, a first + /// appended entry other than `base_root`, or a non-zero entry for a skipped + /// slot. pub fn from_states(pre_state: &State, post_state: State) -> Self { let base_root = pre_state.latest_block_header.hash_tree_root(); let base_hbh_len = pre_state.historical_block_hashes.len(); @@ -108,14 +102,30 @@ impl StateDiff { .. } = post_state; + // The diff stores no historical_block_hashes; reconstruct regenerates the + // appended tail from base_root and the slot gap. Assert the append matched + // that shape so a future STF that broke it fails here, not silently later. let hbh = historical_block_hashes.into_inner(); assert!( hbh.len() >= base_hbh_len, - "target historical_block_hashes shorter than base: {} < {base_hbh_len}", + "post-state historical_block_hashes shorter than pre-state: {} < {base_hbh_len}", hbh.len() ); - let hbh_appended = HistoricalBlockHashesTail::try_from(hbh[base_hbh_len..].to_vec()) - .expect("appended tail cannot exceed HISTORICAL_ROOTS_LIMIT"); + let appended = &hbh[base_hbh_len..]; + assert_eq!( + appended.len(), + (slot - pre_state.slot) as usize, + "appended historical_block_hashes length does not match the slot gap" + ); + assert_eq!( + appended.first().copied(), + Some(base_root), + "first appended historical_block_hash is not the base (parent) root" + ); + assert!( + appended[1..].iter().all(|h| *h == H256::ZERO), + "skipped-slot historical_block_hashes are not zero-filled" + ); Self { base_root, @@ -125,7 +135,6 @@ impl StateDiff { justified_slots, justifications_roots, justifications_validators, - hbh_appended, } } } @@ -135,8 +144,9 @@ impl StateDiff { /// `diffs` are ordered from the snapshot's child up to the target (inclusive, /// non-empty). `latest_block_header` is the target's header (kept in the /// `BlockHeaders` table rather than the diff). `config`/`validators` come from -/// `snapshot` (they never change), `historical_block_hashes` is replayed from -/// the appended tails, and the remaining fields come from the last diff. +/// `snapshot` (they never change), `historical_block_hashes` is replayed by +/// appending each diff's `base_root` and one zero per skipped slot, and the +/// remaining fields come from the last diff. /// /// # Panics /// @@ -150,9 +160,16 @@ pub(crate) fn reconstruct( .last() .expect("reconstruct requires at least one diff"); + // Replay the appended history: each diff added its base (parent) block root, + // then one zero per slot skipped before it. Both are recovered from base_root + // and the gap to the previous state's slot, so no hbh is stored in the diff. let mut hbh: Vec = snapshot.historical_block_hashes.to_vec(); + let mut prev_slot = snapshot.slot; for diff in diffs { - hbh.extend_from_slice(&diff.hbh_appended); + hbh.push(diff.base_root); + let empty_slots = (diff.slot - prev_slot - 1) as usize; + hbh.extend(std::iter::repeat_n(H256::ZERO, empty_slots)); + prev_slot = diff.slot; } let historical_block_hashes = hbh .try_into() @@ -201,29 +218,34 @@ mod tests { } #[test] - fn from_base_captures_appended_tail_and_absolute_fields() { + fn from_states_captures_base_root_and_absolute_fields() { + // base at slot 0; post at slot 5 skips slots 1-4 (zero-filled). let base = base_state(); - - let mut target = base.clone(); - target.slot = 5; + let mut post = child_state(&base, 5); let expected_justified = Checkpoint { root: h256(7), slot: 4, }; - target.latest_justified = expected_justified; - // Append three roots (one real parent + two zero-filled empty slots). - let mut hbh: Vec = base.historical_block_hashes.to_vec(); - hbh.extend([h256(9), H256::ZERO, H256::ZERO]); - target.historical_block_hashes = hbh.try_into().unwrap(); + post.latest_justified = expected_justified; - let diff = StateDiff::from_states(&base, target); + let diff = StateDiff::from_states(&base, post); assert_eq!(diff.base_root, base.latest_block_header.hash_tree_root()); assert_eq!(diff.slot, 5); assert_eq!(diff.latest_justified, expected_justified); - assert_eq!(diff.hbh_appended.len(), 3); - assert_eq!(diff.hbh_appended[0], h256(9)); - assert_eq!(diff.hbh_appended[1], H256::ZERO); + } + + #[test] + #[should_panic(expected = "first appended historical_block_hash")] + fn from_states_rejects_non_append_history() { + let base = base_state(); + let mut post = child_state(&base, 1); + // Corrupt the single appended entry so it no longer equals base_root. + let mut hbh = post.historical_block_hashes.to_vec(); + *hbh.last_mut().unwrap() = h256(123); + post.historical_block_hashes = hbh.try_into().unwrap(); + + let _ = StateDiff::from_states(&base, post); } /// A block header distinct from any snapshot/diff field, so the test can diff --git a/crates/storage/src/store.rs b/crates/storage/src/store.rs index bf30f403..5a2e77d0 100644 --- a/crates/storage/src/store.rs +++ b/crates/storage/src/store.rs @@ -1757,8 +1757,8 @@ mod tests { insert_header(backend.as_ref(), r0, 0, H256::ZERO); insert_snapshot(backend.as_ref(), r0, &s0); - // Child at slot 1 (parent r0): appends one historical root, sets a checkpoint. - let mut s1 = sample_state(1, r0, vec![root(42)]); + // Child at slot 1 (parent r0): appends r0 (slot 0's block root), sets a checkpoint. + let mut s1 = sample_state(1, r0, vec![r0]); s1.latest_justified = Checkpoint { root: root(7), slot: 0, @@ -1792,12 +1792,12 @@ mod tests { insert_header(backend.as_ref(), r0, 0, H256::ZERO); insert_snapshot(backend.as_ref(), r0, &s0); - let s1 = sample_state(1, r0, vec![root(42)]); + let s1 = sample_state(1, r0, vec![r0]); let r1 = s1.latest_block_header.hash_tree_root(); insert_header(backend.as_ref(), r1, 1, r0); store.insert_state(r1, s1.clone()).expect("insert state"); - let s2 = sample_state(2, r1, vec![root(42), root(43)]); + let s2 = sample_state(2, r1, vec![r0, r1]); let r2 = s2.latest_block_header.hash_tree_root(); insert_header(backend.as_ref(), r2, 2, r1); store.insert_state(r2, s2.clone()).expect("insert state"); @@ -1817,20 +1817,20 @@ mod tests { let mut store = Store::test_store_with_backend(backend.clone()); let s0 = sample_state(SNAPSHOT_ANCHOR_INTERVAL - 1, H256::ZERO, vec![]); - let r0 = root(0); + let r0 = s0.latest_block_header.hash_tree_root(); insert_header(backend.as_ref(), r0, s0.slot, H256::ZERO); insert_snapshot(backend.as_ref(), r0, &s0); // Crossing the interval boundary records an anchor. - let r1 = root(1); - let s1 = sample_state(SNAPSHOT_ANCHOR_INTERVAL, r0, vec![root(42)]); + let s1 = sample_state(SNAPSHOT_ANCHOR_INTERVAL, r0, vec![r0]); + let r1 = s1.latest_block_header.hash_tree_root(); insert_header(backend.as_ref(), r1, s1.slot, r0); store.insert_state(r1, s1.clone()).expect("insert state"); assert!(has_key(backend.as_ref(), Table::States, &r1)); // A non-crossing child does not. - let r2 = root(2); - let s2 = sample_state(SNAPSHOT_ANCHOR_INTERVAL + 1, r1, vec![root(42), root(43)]); + let s2 = sample_state(SNAPSHOT_ANCHOR_INTERVAL + 1, r1, vec![r0, r1]); + let r2 = s2.latest_block_header.hash_tree_root(); insert_header(backend.as_ref(), r2, s2.slot, r1); store.insert_state(r2, s2.clone()).expect("insert state"); assert!(!has_key(backend.as_ref(), Table::States, &r2)); From d351ffc8178fa47c6d627dfc1a7c3c2856620b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 25 Jun 2026 16:28:28 -0300 Subject: [PATCH 14/17] docs: revert comment --- crates/blockchain/src/store.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/blockchain/src/store.rs b/crates/blockchain/src/store.rs index f0a4d908..64db40c9 100644 --- a/crates/blockchain/src/store.rs +++ b/crates/blockchain/src/store.rs @@ -586,7 +586,7 @@ fn on_block_core( .expect("update_checkpoints should succeed"); } - // Store signed block and state (as a parent-linked diff + snapshot) + // Store signed block and state store .insert_signed_block(block_root, signed_block.clone()) .expect("DB insert should succeed"); From da728e0fbe11ecd7184f1a6b51d5c6a42c4a591a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 25 Jun 2026 16:49:16 -0300 Subject: [PATCH 15/17] refactor(storage): return StateDiffError from from_states instead of asserting The historical_block_hashes append invariant was enforced with asserts inside from_states, which panicked on violation and tangled the checks with diff construction. Extract them into validate_history_append, which returns a thiserror StateDiffError, and have from_states surface it as a Result. The production caller expects on it: a non-append history is a state-transition bug, not a recoverable condition, so it should still abort, now with a typed cause and check logic testable in isolation. --- crates/storage/src/state_diff.rs | 122 +++++++++++++++++++++---------- crates/storage/src/store.rs | 4 +- 2 files changed, 87 insertions(+), 39 deletions(-) diff --git a/crates/storage/src/state_diff.rs b/crates/storage/src/state_diff.rs index 50a65520..48957f32 100644 --- a/crates/storage/src/state_diff.rs +++ b/crates/storage/src/state_diff.rs @@ -43,6 +43,32 @@ pub struct StateDiff { pub justifications_validators: JustificationValidators, } +/// Why a post-state could not be reduced to a [`StateDiff`]. +/// +/// Every variant means the state transition mutated `historical_block_hashes` +/// in a way the diff layer does not model: the diff stores no history and +/// [`reconstruct`] regenerates the append from `base_root` plus the slot gap, so +/// a mismatch here would corrupt later reads. These are state-transition +/// invariants, not recoverable conditions, so the production caller `expect`s on +/// them. +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum StateDiffError { + /// The post-state's history is shorter than the pre-state's: the STF removed + /// entries instead of appending. + #[error("post-state historical_block_hashes ({actual}) is shorter than pre-state ({base})")] + HistoryShrank { actual: usize, base: usize }, + /// The number of appended entries does not equal the slot gap (one entry per + /// slot from the parent exclusive to the target inclusive). + #[error("appended historical_block_hashes length ({appended}) does not match slot gap ({gap})")] + AppendLengthMismatch { appended: usize, gap: usize }, + /// The first appended entry is not the base (parent) block root. + #[error("first appended historical_block_hash is not the base (parent) root")] + FirstAppendedNotBase, + /// A skipped-slot entry is non-zero (skipped slots must be zero-filled). + #[error("skipped-slot historical_block_hashes are not zero-filled")] + SkippedSlotsNotZero, +} + impl StateDiff { /// Build a diff from the pre-state (the parent block's post-state) and the /// consumed post-state. @@ -72,8 +98,9 @@ impl StateDiff { /// root, then one zero per skipped slot.** (`process_block` pushes the /// parent root and zero-fills skipped slots, leaving the existing prefix /// intact.) Nothing is stored for it: `reconstruct` regenerates the tail - /// from `base_root` and the slot gap. This function asserts the invariant, - /// so a future STF that broke it fails loudly instead of corrupting reads. + /// from `base_root` and the slot gap. This function validates the invariant + /// and returns a [`StateDiffError`] if a future STF breaks it, instead of + /// corrupting reads. /// - **`latest_block_header` is not stored here.** It is read back from the /// `BlockHeaders` table during reconstruction; the persisted post-state /// caches the real `state_root` there, so the two are byte-identical. @@ -82,13 +109,14 @@ impl StateDiff { /// justification fields) are captured verbatim, so the diff makes no /// assumption about how those change. /// - /// # Panics + /// # Errors /// - /// Panics if the `historical_block_hashes` append does not match the - /// assumption above: a length that disagrees with the slot gap, a first - /// appended entry other than `base_root`, or a non-zero entry for a skipped - /// slot. - pub fn from_states(pre_state: &State, post_state: State) -> Self { + /// Returns [`StateDiffError`] if the `historical_block_hashes` append does + /// not match the assumption above: a length that disagrees with the slot + /// gap, a first appended entry other than `base_root`, or a non-zero entry + /// for a skipped slot. These are state-transition invariants, so the + /// production caller treats a failure as a bug and `expect`s on it. + pub fn from_states(pre_state: &State, post_state: State) -> Result { let base_root = pre_state.latest_block_header.hash_tree_root(); let base_hbh_len = pre_state.historical_block_hashes.len(); let State { @@ -103,31 +131,14 @@ impl StateDiff { } = post_state; // The diff stores no historical_block_hashes; reconstruct regenerates the - // appended tail from base_root and the slot gap. Assert the append matched - // that shape so a future STF that broke it fails here, not silently later. + // appended tail from base_root and the slot gap. Validate the append + // matched that shape so a future STF that broke it fails here, not + // silently later. let hbh = historical_block_hashes.into_inner(); - assert!( - hbh.len() >= base_hbh_len, - "post-state historical_block_hashes shorter than pre-state: {} < {base_hbh_len}", - hbh.len() - ); - let appended = &hbh[base_hbh_len..]; - assert_eq!( - appended.len(), - (slot - pre_state.slot) as usize, - "appended historical_block_hashes length does not match the slot gap" - ); - assert_eq!( - appended.first().copied(), - Some(base_root), - "first appended historical_block_hash is not the base (parent) root" - ); - assert!( - appended[1..].iter().all(|h| *h == H256::ZERO), - "skipped-slot historical_block_hashes are not zero-filled" - ); + let slot_gap = (slot - pre_state.slot) as usize; + validate_history_append(&hbh, base_hbh_len, base_root, slot_gap)?; - Self { + Ok(Self { base_root, slot, latest_justified, @@ -135,10 +146,43 @@ impl StateDiff { justified_slots, justifications_roots, justifications_validators, - } + }) } } +/// Validate that the post-state's `historical_block_hashes` extends the +/// pre-state's by exactly the tail the STF appends: the base (parent) block +/// root, then one zero per skipped slot. This is the invariant that lets a diff +/// store no history (see [`StateDiff::from_states`]); `reconstruct` relies on it +/// to regenerate the tail from `base_root` and the slot gap. +fn validate_history_append( + hbh: &[H256], + base_hbh_len: usize, + base_root: H256, + slot_gap: usize, +) -> Result<(), StateDiffError> { + if hbh.len() < base_hbh_len { + return Err(StateDiffError::HistoryShrank { + actual: hbh.len(), + base: base_hbh_len, + }); + } + let appended = &hbh[base_hbh_len..]; + if appended.len() != slot_gap { + return Err(StateDiffError::AppendLengthMismatch { + appended: appended.len(), + gap: slot_gap, + }); + } + if appended.first().copied() != Some(base_root) { + return Err(StateDiffError::FirstAppendedNotBase); + } + if !appended[1..].iter().all(|h| *h == H256::ZERO) { + return Err(StateDiffError::SkippedSlotsNotZero); + } + Ok(()) +} + /// Rebuild a state from a base snapshot and the diffs leading to the target. /// /// `diffs` are ordered from the snapshot's child up to the target (inclusive, @@ -228,7 +272,7 @@ mod tests { }; post.latest_justified = expected_justified; - let diff = StateDiff::from_states(&base, post); + let diff = StateDiff::from_states(&base, post).expect("valid append"); assert_eq!(diff.base_root, base.latest_block_header.hash_tree_root()); assert_eq!(diff.slot, 5); @@ -236,7 +280,6 @@ mod tests { } #[test] - #[should_panic(expected = "first appended historical_block_hash")] fn from_states_rejects_non_append_history() { let base = base_state(); let mut post = child_state(&base, 1); @@ -245,7 +288,10 @@ mod tests { *hbh.last_mut().unwrap() = h256(123); post.historical_block_hashes = hbh.try_into().unwrap(); - let _ = StateDiff::from_states(&base, post); + assert_eq!( + StateDiff::from_states(&base, post), + Err(StateDiffError::FirstAppendedNotBase) + ); } /// A block header distinct from any snapshot/diff field, so the test can @@ -313,8 +359,8 @@ mod tests { s2.justifications_validators = JustificationValidators::try_from(vec![true]).unwrap(); // Diffs are built the production way, from each (pre, post) pair. - let diff1 = StateDiff::from_states(&snapshot, s1.clone()); - let diff2 = StateDiff::from_states(&s1, s2.clone()); + let diff1 = StateDiff::from_states(&snapshot, s1.clone()).expect("valid append"); + let diff2 = StateDiff::from_states(&s1, s2.clone()).expect("valid append"); let reconstructed = reconstruct(snapshot, &[diff1, diff2], s2.latest_block_header.clone()); @@ -336,7 +382,7 @@ mod tests { slot: 7, }; - let diff = StateDiff::from_states(&snapshot, child.clone()); + let diff = StateDiff::from_states(&snapshot, child.clone()).expect("valid append"); let reconstructed = reconstruct(snapshot, &[diff], child.latest_block_header.clone()); assert_eq!(reconstructed.to_ssz(), child.to_ssz()); diff --git a/crates/storage/src/store.rs b/crates/storage/src/store.rs index 5a2e77d0..26da6998 100644 --- a/crates/storage/src/store.rs +++ b/crates/storage/src/store.rs @@ -1254,7 +1254,9 @@ impl Store { // Memoize the post-state for fast reads, then move it into the diff so // its multi-MB justification fields are not cloned again. self.state_cache.lock().unwrap().put(root, state.clone()); - let diff_bytes = StateDiff::from_states(&parent_state, state).to_ssz(); + let diff_bytes = StateDiff::from_states(&parent_state, state) + .expect("state transition produced a non-append historical_block_hashes") + .to_ssz(); let key = root.to_ssz(); let mut batch = self.backend.begin_write().expect("write batch"); From 86dd88165a6c4aa8e5579469ec20c2d83153c43c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 25 Jun 2026 16:51:21 -0300 Subject: [PATCH 16/17] test(storage): drop the from_states error-path unit test The dedicated non-append-history test is redundant: validate_history_append runs on every from_states call, so all four StateDiffError paths are already exercised by the reconstruct round-trip tests and the production import path. --- crates/storage/src/state_diff.rs | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/crates/storage/src/state_diff.rs b/crates/storage/src/state_diff.rs index 48957f32..1e9e5028 100644 --- a/crates/storage/src/state_diff.rs +++ b/crates/storage/src/state_diff.rs @@ -279,21 +279,6 @@ mod tests { assert_eq!(diff.latest_justified, expected_justified); } - #[test] - fn from_states_rejects_non_append_history() { - let base = base_state(); - let mut post = child_state(&base, 1); - // Corrupt the single appended entry so it no longer equals base_root. - let mut hbh = post.historical_block_hashes.to_vec(); - *hbh.last_mut().unwrap() = h256(123); - post.historical_block_hashes = hbh.try_into().unwrap(); - - assert_eq!( - StateDiff::from_states(&base, post), - Err(StateDiffError::FirstAppendedNotBase) - ); - } - /// A block header distinct from any snapshot/diff field, so the test can /// assert it is passed through `reconstruct` verbatim. fn header_at(slot: u64) -> BlockHeader { From f7c9f5fdb67e508f33c6652e88cd54f289991bae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Gr=C3=BCner?= <47506558+MegaRedHand@users.noreply.github.com> Date: Thu, 25 Jun 2026 17:01:12 -0300 Subject: [PATCH 17/17] chore: fmt --- crates/storage/src/store.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/storage/src/store.rs b/crates/storage/src/store.rs index 26da6998..747d66dc 100644 --- a/crates/storage/src/store.rs +++ b/crates/storage/src/store.rs @@ -1188,7 +1188,9 @@ impl Store { if let Some(bytes) = view.get(Table::States, &cursor.to_ssz()).expect("get") { break State::from_ssz_bytes(&bytes).expect("valid state"); } - let diff_bytes = view.get(Table::StateDiffs, &cursor.to_ssz()).expect("get")?; + let diff_bytes = view + .get(Table::StateDiffs, &cursor.to_ssz()) + .expect("get")?; let diff = StateDiff::from_ssz_bytes(&diff_bytes).expect("valid state diff"); cursor = diff.base_root; diffs.push(diff);