From 55477054fffbfcc5233dc3c206f375915e773e9c Mon Sep 17 00:00:00 2001 From: SeongChan Lee Date: Mon, 4 Nov 2019 11:16:37 +0900 Subject: [PATCH] Implement Snapshot & Restore modules --- Cargo.lock | 2 + util/merkle/Cargo.toml | 3 + util/merkle/src/lib.rs | 5 + util/merkle/src/nibbleslice.rs | 2 +- util/merkle/src/node.rs | 7 + util/merkle/src/snapshot/chunk.rs | 284 +++++++++++++++++++ util/merkle/src/snapshot/compress.rs | 119 ++++++++ util/merkle/src/snapshot/error.rs | 65 +++++ util/merkle/src/snapshot/mod.rs | 337 +++++++++++++++++++++++ util/merkle/src/snapshot/ordered_heap.rs | 76 +++++ util/merkle/src/triedbmut.rs | 107 +++++++ 11 files changed, 1006 insertions(+), 1 deletion(-) create mode 100644 util/merkle/src/snapshot/chunk.rs create mode 100644 util/merkle/src/snapshot/compress.rs create mode 100644 util/merkle/src/snapshot/error.rs create mode 100644 util/merkle/src/snapshot/mod.rs create mode 100644 util/merkle/src/snapshot/ordered_heap.rs diff --git a/Cargo.lock b/Cargo.lock index a7644feaf2..f16f8e1943 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -418,6 +418,8 @@ dependencies = [ "primitives 0.4.0 (git+https://github.com/CodeChain-io/rust-codechain-primitives.git)", "rand 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "rlp 0.2.1", + "rlp_derive 0.1.0", + "snap 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", "trie-standardmap 0.1.0", ] diff --git a/util/merkle/Cargo.toml b/util/merkle/Cargo.toml index 4cf53e157a..b2f738f660 100644 --- a/util/merkle/Cargo.toml +++ b/util/merkle/Cargo.toml @@ -8,8 +8,11 @@ elastic-array = "0.10" rand = "0.6.1" hashdb = {path = "../hashdb" } codechain-crypto = { git = "https://github.com/CodeChain-io/rust-codechain-crypto.git", version = "0.1" } +memorydb = { path = "../memorydb" } primitives = { git = "https://github.com/CodeChain-io/rust-codechain-primitives.git", version = "0.4" } rlp = {path = "../rlp" } +rlp_derive = { path = "../rlp_derive" } +snap = "0.2" [dev-dependencies] trie-standardmap = { path = "../trie-standardmap" } diff --git a/util/merkle/src/lib.rs b/util/merkle/src/lib.rs index 63f7b708f7..7442753cf6 100644 --- a/util/merkle/src/lib.rs +++ b/util/merkle/src/lib.rs @@ -21,6 +21,9 @@ extern crate hashdb; extern crate memorydb; extern crate primitives; extern crate rlp; +#[macro_use] +extern crate rlp_derive; +extern crate snap; #[cfg(test)] extern crate trie_standardmap as standardmap; @@ -34,6 +37,8 @@ use primitives::H256; mod nibbleslice; pub mod node; mod skewed; +#[allow(dead_code)] +pub mod snapshot; pub mod triedb; pub mod triedbmut; pub mod triehash; diff --git a/util/merkle/src/nibbleslice.rs b/util/merkle/src/nibbleslice.rs index 77a9705111..1d1316bdb6 100644 --- a/util/merkle/src/nibbleslice.rs +++ b/util/merkle/src/nibbleslice.rs @@ -19,7 +19,7 @@ use std::fmt; use elastic_array::ElasticArray36; -#[derive(Eq, Ord)] +#[derive(Eq, Ord, Copy, Clone)] pub struct NibbleSlice<'a> { pub data: &'a [u8], pub offset: usize, diff --git a/util/merkle/src/node.rs b/util/merkle/src/node.rs index 41c03614a9..6cedaaaec0 100644 --- a/util/merkle/src/node.rs +++ b/util/merkle/src/node.rs @@ -113,4 +113,11 @@ impl<'a> Node<'a> { } } } + + pub fn mid(self, offset: usize) -> Self { + match self { + Node::Leaf(partial, value) => Node::Leaf(partial.mid(offset), value), + Node::Branch(partial, child) => Node::Branch(partial.mid(offset), child), + } + } } diff --git a/util/merkle/src/snapshot/chunk.rs b/util/merkle/src/snapshot/chunk.rs new file mode 100644 index 0000000000..afac5d8331 --- /dev/null +++ b/util/merkle/src/snapshot/chunk.rs @@ -0,0 +1,284 @@ +// Copyright 2019 Kodebox, Inc. +// This file is part of CodeChain. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::collections::VecDeque; +use std::convert::From; + +use ccrypto::BLAKE_NULL_RLP; +use hashdb::{DBValue, HashDB}; +use primitives::H256; + +use super::error::{ChunkError, Error}; +use super::{DecodedPathSlice, PathSlice, CHUNK_HEIGHT}; +use crate::nibbleslice::NibbleSlice; +use crate::{Node, TrieDBMut}; + +#[derive(RlpEncodable, RlpDecodable, Eq, PartialEq)] +pub struct TerminalNode { + // Relative path from the chunk root. + pub path_slice: PathSlice, + pub node_rlp: Vec, +} + +impl std::fmt::Debug for TerminalNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + let path_slice = NibbleSlice::from_encoded(&self.path_slice); + f.debug_struct("TerminalNode") + .field("path_slice", &path_slice) + .field("node_rlp", &NodeDebugAdaptor { + rlp: &self.node_rlp, + }) + .finish() + } +} + +struct NodeDebugAdaptor<'a> { + rlp: &'a [u8], +} + +impl<'a> std::fmt::Debug for NodeDebugAdaptor<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + match Node::decoded(&self.rlp) { + Some(node) => write!(f, "{:?}", &node), + None => write!(f, "{:?}", self.rlp), + } + } +} + +/// An unverified chunk from the network +#[derive(Debug)] +pub struct RawChunk { + pub nodes: Vec, +} + +/// Fully recovered, and re-hydrated chunk. +pub struct RecoveredChunk { + pub(crate) root: H256, + /// contains all nodes including non-terminal nodes and terminal nodes. + /// You can blindly pour all items in `nodes` into `HashDB`. + pub(crate) nodes: Vec<(H256, DBValue)>, + /// Their path slices are relative to this chunk root. + pub(crate) unresolved_chunks: Vec, +} + +impl RawChunk { + /// Verify and recover the chunk + pub fn recover(&self, expected_chunk_root: H256) -> Result { + let mut memorydb = memorydb::MemoryDB::new(); + let mut chunk_root = H256::new(); + + { + let mut trie = TrieDBMut::new(&mut memorydb, &mut chunk_root); + for node in self.nodes.iter() { + let old_val = match Node::decoded(&node.node_rlp) { + Some(Node::Branch(slice, child)) => { + let encoded = DecodedPathSlice::from_encoded(&node.path_slice).with_slice(slice).encode(); + trie.insert_raw(Node::Branch(NibbleSlice::from_encoded(&encoded), child))? + } + Some(Node::Leaf(slice, data)) => { + let encoded = DecodedPathSlice::from_encoded(&node.path_slice).with_slice(slice).encode(); + trie.insert_raw(Node::Leaf(NibbleSlice::from_encoded(&encoded), data))? + } + None => return Err(ChunkError::InvalidContent.into()), + }; + + if let Some(old_val) = old_val { + if old_val.as_ref() != node.node_rlp.as_slice() { + return Err(ChunkError::InvalidContent.into()) + } + } + } + } + + // Some nodes in the chunk is different from the expected. + if chunk_root != expected_chunk_root { + return Err(ChunkError::ChunkRootMismatch { + expected: expected_chunk_root, + actual: chunk_root, + } + .into()) + } + + let mut nodes = Vec::new(); + let mut unresolved_chunks = Vec::new(); + let mut queue: VecDeque = VecDeque::from(vec![NodePath::new(chunk_root)]); + while let Some(path) = queue.pop_front() { + let node = match memorydb.get(&path.key) { + Some(x) => x, + None => { + // all unresolved should depth == CHUNK_HEIGHT + 1 + if path.depth != CHUNK_HEIGHT + 1 { + return Err(ChunkError::InvalidHeight.into()) + } + + unresolved_chunks.push(UnresolvedChunk::from(path)); + continue + } + }; + + if path.depth > CHUNK_HEIGHT { + return Err(ChunkError::InvalidHeight.into()) + } + nodes.push((path.key, node.clone())); + + let node = Node::decoded(&node).expect("Chunk root was verified; Node can't be wrong"); + if let Node::Branch(slice, children) = node { + for (index, child) in children.iter().enumerate() { + if let Some(child) = child { + queue.push_back(path.with_slice_and_index(slice, index, *child)); + } + } + } + } + + Ok(RecoveredChunk { + root: expected_chunk_root, + nodes, + unresolved_chunks, + }) + } +} + +impl std::fmt::Debug for RecoveredChunk { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + struct Adapter<'a>(&'a [(H256, DBValue)]); + impl<'a> std::fmt::Debug for Adapter<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + f.debug_list() + .entries(self.0.iter().map(|(hash, rlp)| { + (hash, NodeDebugAdaptor { + rlp, + }) + })) + .finish() + } + } + + f.debug_struct("RecoveredChunk") + .field("root", &self.root) + .field("nodes", &Adapter(&self.nodes)) + .field("unresolved_chunks", &self.unresolved_chunks) + .finish() + } +} + +/// Chunk obtained from the state db. +#[derive(Debug)] +pub struct Chunk { + pub root: H256, + pub terminal_nodes: Vec, +} + +impl Chunk { + pub(crate) fn from_chunk_root(db: &dyn HashDB, chunk_root: H256) -> Chunk { + let mut unresolved: VecDeque = VecDeque::from(vec![NodePath::new(chunk_root)]); + let mut terminal_nodes: Vec = Vec::new(); + while let Some(path) = unresolved.pop_front() { + assert!(path.key != BLAKE_NULL_RLP, "Empty DB"); + assert!(path.depth <= CHUNK_HEIGHT); + let node = db.get(&path.key).expect("Can't find the node in a db. DB is inconsistent"); + let node_decoded = Node::decoded(&node).expect("Node cannot be decoded. DB is inconsistent"); + + match node_decoded { + // Continue to BFS + Node::Branch(slice, ref children) if path.depth < CHUNK_HEIGHT => { + for (i, hash) in children.iter().enumerate() { + if let Some(hash) = hash { + unresolved.push_back(path.with_slice_and_index(slice, i, *hash)); + } + } + } + // Reached the terminal node. Branch at path.depth == CHUNK_HEIGHT || Leaf + _ => terminal_nodes.push(TerminalNode { + path_slice: path.path_slice.encode(), + node_rlp: node.to_vec(), + }), + }; + } + Chunk { + root: chunk_root, + terminal_nodes, + } + } + + // Returns path slices to unresolved chunk roots relative to this chunk root + pub(crate) fn unresolved_chunks(&self) -> Vec { + let mut result = Vec::new(); + for node in self.terminal_nodes.iter() { + let decoded = Node::decoded(&node.node_rlp).expect("All terminal nodes should be valid"); + if let Node::Branch(slice, children) = decoded { + for (i, child) in children.iter().enumerate() { + if let Some(child) = child { + result.push(UnresolvedChunk { + path_slice: DecodedPathSlice::from_encoded(&node.path_slice).with_slice_and_index(slice, i), + chunk_root: *child, + }) + } + } + } + } + result + } + + #[cfg(test)] + pub(crate) fn into_raw_chunk(self) -> RawChunk { + RawChunk { + nodes: self.terminal_nodes, + } + } +} + +/// path slice to `chunk_root` is relative to the root of originating chunk. +#[derive(Debug)] +pub(crate) struct UnresolvedChunk { + pub path_slice: DecodedPathSlice, + pub chunk_root: H256, +} + +impl From for UnresolvedChunk { + fn from(path: NodePath) -> Self { + Self { + path_slice: path.path_slice, + chunk_root: path.key, + } + } +} + +#[derive(Debug)] +struct NodePath { + // path slice to the node relative to chunk_root + path_slice: DecodedPathSlice, + depth: usize, + key: H256, +} + +impl NodePath { + fn new(key: H256) -> NodePath { + NodePath { + path_slice: DecodedPathSlice::new(), + depth: 1, + key, + } + } + + fn with_slice_and_index(&self, slice: NibbleSlice, index: usize, key: H256) -> NodePath { + NodePath { + path_slice: self.path_slice.with_slice_and_index(slice, index), + depth: self.depth + 1, + key, + } + } +} diff --git a/util/merkle/src/snapshot/compress.rs b/util/merkle/src/snapshot/compress.rs new file mode 100644 index 0000000000..d975ad0411 --- /dev/null +++ b/util/merkle/src/snapshot/compress.rs @@ -0,0 +1,119 @@ +// Copyright 2019 Kodebox, Inc. +// This file is part of CodeChain. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::io::{Cursor, Read, Write}; + +use rlp::{RlpStream, UntrustedRlp}; + +use super::chunk::{Chunk, RawChunk}; +use super::error::{ChunkError, Error}; +use super::CHUNK_MAX_NODES; + +pub struct ChunkDecompressor { + read: R, +} + +impl ChunkDecompressor { + pub fn new(read: R) -> Self { + ChunkDecompressor { + read, + } + } +} + +impl<'a> ChunkDecompressor> { + fn from_slice(slice: &'a [u8]) -> Self { + ChunkDecompressor::new(Cursor::new(slice)) + } +} + +impl ChunkDecompressor +where + R: Read + Clone, +{ + pub fn decompress(self) -> Result { + let mut buf = Vec::new(); + + let mut snappy = snap::Reader::new(self.read); + snappy.read_to_end(&mut buf)?; + + let rlp = UntrustedRlp::new(&buf); + let len = rlp.item_count()?; + if len > CHUNK_MAX_NODES { + return Err(ChunkError::TooBig.into()) + } + + Ok(RawChunk { + nodes: rlp.as_list()?, + }) + } +} + +pub struct ChunkCompressor { + write: W, +} + +impl ChunkCompressor { + pub fn new(write: W) -> Self { + ChunkCompressor { + write, + } + } +} + +impl ChunkCompressor +where + W: Write, +{ + pub fn compress_chunk(self, chunk: &Chunk) -> Result<(), Error> { + let mut rlp = RlpStream::new_list(chunk.terminal_nodes.len()); + for node in chunk.terminal_nodes.iter() { + rlp.append(node); + } + let mut snappy = snap::Writer::new(self.write); + snappy.write_all(rlp.as_raw())?; + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::snapshot::chunk::{Chunk, TerminalNode}; + + #[test] + fn test_compress_decompress() { + let chunk = Chunk { + root: Default::default(), + terminal_nodes: vec![ + (TerminalNode { + path_slice: b"12345".to_vec(), + node_rlp: b"45678".to_vec(), + }), + (TerminalNode { + path_slice: b"56789".to_vec(), + node_rlp: b"123abc".to_vec(), + }), + ], + }; + + let mut buffer = Vec::new(); + ChunkCompressor::new(&mut buffer).compress_chunk(&chunk).unwrap(); + let decompressed = ChunkDecompressor::from_slice(&buffer).decompress().unwrap(); + + assert_eq!(chunk.terminal_nodes, decompressed.nodes); + } +} diff --git a/util/merkle/src/snapshot/error.rs b/util/merkle/src/snapshot/error.rs new file mode 100644 index 0000000000..19f6876b06 --- /dev/null +++ b/util/merkle/src/snapshot/error.rs @@ -0,0 +1,65 @@ +// Copyright 2019 Kodebox, Inc. +// This file is part of CodeChain. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::io::Error as IoError; + +use primitives::H256; +use rlp::DecoderError as RlpDecoderError; + +use crate::TrieError; + +#[derive(Debug)] +pub enum Error { + IoError(IoError), + RlpDecoderError(RlpDecoderError), + TrieError(TrieError), + ChunkError(ChunkError), +} + +impl From for Error { + fn from(err: IoError) -> Self { + Error::IoError(err) + } +} + +impl From for Error { + fn from(err: RlpDecoderError) -> Self { + Error::RlpDecoderError(err) + } +} + +impl From for Error { + fn from(err: TrieError) -> Self { + Error::TrieError(err) + } +} + +impl From for Error { + fn from(err: ChunkError) -> Self { + Error::ChunkError(err) + } +} + +#[derive(Debug)] +pub enum ChunkError { + TooBig, + InvalidHeight, + ChunkRootMismatch { + expected: H256, + actual: H256, + }, + InvalidContent, +} diff --git a/util/merkle/src/snapshot/mod.rs b/util/merkle/src/snapshot/mod.rs new file mode 100644 index 0000000000..b4ee77bbdb --- /dev/null +++ b/util/merkle/src/snapshot/mod.rs @@ -0,0 +1,337 @@ +// Copyright 2019 Kodebox, Inc. +// This file is part of CodeChain. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +mod chunk; +mod compress; +mod error; +mod ordered_heap; + +use std::cmp::Ordering; + +use ccrypto::BLAKE_NULL_RLP; +use hashdb::HashDB; +use primitives::H256; + +use self::chunk::{Chunk, RecoveredChunk, UnresolvedChunk}; +use self::ordered_heap::OrderedHeap; +use crate::nibbleslice::NibbleSlice; + +const CHUNK_HEIGHT: usize = 3; +const CHUNK_MAX_NODES: usize = 256; // 16 ^ (CHUNK_HEIGHT-1) + +/// Example: +/// use codechain_merkle::snapshot::Restore; +/// let mut rm = Restore::new(db, root); +/// while let Some(root) = rm.next_to_feed() { +/// let raw_chunk = request(block_hash, root)?; +/// let chunk = raw_chunk.recover(root)?; +/// rm.feed(chunk); +/// } +pub struct Restore<'a> { + db: &'a mut dyn HashDB, + pending: Option, + unresolved: OrderedHeap>, +} + +impl<'a> Restore<'a> { + pub fn new(db: &'a mut dyn HashDB, merkle_root: H256) -> Self { + let mut result = Restore { + db, + pending: None, + unresolved: OrderedHeap::new(), + }; + if merkle_root != BLAKE_NULL_RLP { + result.unresolved.push(ChunkPathPrefix::new(merkle_root).into()); + } + result + } + + pub fn feed(&mut self, chunk: RecoveredChunk) { + let pending_path = self.pending.take().expect("feed() should be called after next()"); + assert_eq!(pending_path.chunk_root, chunk.root, "Unexpected chunk"); + + // Pour nodes into the DB + for (key, value) in chunk.nodes { + self.db.emplace(key, value); + } + + // Extend search paths + for unresolved in chunk.unresolved_chunks { + self.unresolved.push(pending_path.with_unresolved_chunk(&unresolved).into()); + } + + self.pending = None; + } + + pub fn next_to_feed(&mut self) -> Option { + if let Some(path) = self.unresolved.pop() { + assert!(self.pending.is_none(), "Previous feed() was failed"); + let chunk_root = path.chunk_root; + self.pending = Some(path.0); + + Some(chunk_root) + } else { + None + } + } +} + +/// Example: +/// use std::fs::File; +/// use codechain_merkle::snapshot::Snapshot; +/// +/// for chunk in Snapshot::from_hashdb(db, root) { +/// let mut file = File::create(format!("{}/{}", block_id, chunk.root))?; +/// let mut compressor = ChunkCompressor::new(&mut file); +/// compressor.compress(chunk); +/// } +pub struct Snapshot<'a> { + db: &'a dyn HashDB, + remaining: OrderedHeap>, +} + +impl<'a> Snapshot<'a> { + pub fn from_hashdb(db: &'a dyn HashDB, chunk_root: H256) -> Self { + let mut result = Snapshot { + db, + remaining: OrderedHeap::new(), + }; + if chunk_root != BLAKE_NULL_RLP { + result.remaining.push(ChunkPathPrefix::new(chunk_root).into()); + } + result + } +} + +impl<'a> Iterator for Snapshot<'a> { + type Item = Chunk; + + fn next(&mut self) -> Option { + if let Some(path) = self.remaining.pop() { + let chunk = Chunk::from_chunk_root(self.db, path.chunk_root); + for unresolved in chunk.unresolved_chunks() { + self.remaining.push(path.with_unresolved_chunk(&unresolved).into()); + } + Some(chunk) + } else { + None + } + } +} + + +#[derive(Debug)] +struct ChunkPathPrefix { + // Absolute path prefix of the chunk root + path_prefix: DecodedPathSlice, + depth: usize, + chunk_root: H256, +} + +impl ChunkPathPrefix { + fn new(chunk_root: H256) -> ChunkPathPrefix { + ChunkPathPrefix { + path_prefix: DecodedPathSlice::new(), + depth: 1, + chunk_root, + } + } + + fn with_unresolved_chunk(&self, unresolved: &UnresolvedChunk) -> ChunkPathPrefix { + ChunkPathPrefix { + path_prefix: self.path_prefix.with_path_slice(&unresolved.path_slice), + depth: self.depth + 1, + chunk_root: unresolved.chunk_root, + } + } +} + +impl Ord for DepthFirst { + fn cmp(&self, other: &Self) -> Ordering { + self.0.depth.cmp(&other.0.depth) + } +} + +impl From for DepthFirst { + fn from(path: ChunkPathPrefix) -> Self { + DepthFirst(path) + } +} + +/// Encoded value by NibbleSlice::encoded() +pub type PathSlice = Vec; + +/// for item i, i in 0..16 +pub(crate) struct DecodedPathSlice(Vec); + +impl DecodedPathSlice { + fn new() -> DecodedPathSlice { + DecodedPathSlice(Vec::new()) + } + + fn from_encoded(slice: &[u8]) -> DecodedPathSlice { + DecodedPathSlice(NibbleSlice::from_encoded(slice).to_vec()) + } + + fn with_slice_and_index(&self, slice: NibbleSlice, i: usize) -> DecodedPathSlice { + assert!(i < 16); + let mut v = self.0.clone(); + v.append(&mut slice.to_vec()); + v.push(i as u8); + DecodedPathSlice(v) + } + + fn with_slice(&self, slice: NibbleSlice) -> DecodedPathSlice { + let mut v = self.0.clone(); + v.append(&mut slice.to_vec()); + DecodedPathSlice(v) + } + + fn with_path_slice(&self, path_slice: &DecodedPathSlice) -> DecodedPathSlice { + let mut v = self.0.clone(); + v.extend(path_slice.0.as_slice()); + DecodedPathSlice(v) + } + + fn encode(&self) -> PathSlice { + let (encoded, _) = NibbleSlice::from_vec(&self.0); + encoded.to_vec() + } +} + +impl std::fmt::Debug for DecodedPathSlice { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + let (encoded, _) = NibbleSlice::from_vec(&self.0); + let nibble_slice = NibbleSlice::from_encoded(&encoded); + writeln!(f, "{:?}", nibble_slice) + } +} + +#[derive(Debug)] +struct DepthFirst(T); + +impl PartialOrd for DepthFirst +where + Self: Ord, +{ + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(&other)) + } +} + +impl PartialEq for DepthFirst +where + Self: Ord, +{ + fn eq(&self, other: &Self) -> bool { + self.cmp(other) == Ordering::Equal + } +} + +impl Eq for DepthFirst where Self: Ord {} + +impl std::ops::Deref for DepthFirst { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::collections::HashMap; + use std::iter::FromIterator; + + use hashdb::DBValue; + use memorydb::MemoryDB; + use primitives::{Bytes, H256}; + use standardmap::{Alphabet, StandardMap, ValueMode}; + + use super::chunk::RawChunk; + use crate::{Trie, TrieDB, TrieDBMut, TrieMut}; + + fn random_insert_and_restore_with_count(count: usize) { + let standard_map = StandardMap { + alphabet: Alphabet::Custom(b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_".to_vec()), + min_key: 5, + journal_key: 0, + value_mode: ValueMode::Index, + count, + } + .make_with(&mut H256::new()); + // Unique standard map + let unique_map: HashMap = HashMap::from_iter(standard_map.into_iter()); + + let mut root = H256::new(); + let chunks: HashMap = { + // We will throw out `db` after snapshot. + let mut db = MemoryDB::new(); + let mut trie = TrieDBMut::new(&mut db, &mut root); + for (key, value) in &unique_map { + trie.insert(key, value).unwrap(); + } + + Snapshot::from_hashdb(&db, root).map(|chunk| (chunk.root, chunk.into_raw_chunk())).collect() + }; + dbg!(chunks.len()); + + let mut db = MemoryDB::new(); + let mut recover = Restore::new(&mut db, root); + while let Some(chunk_root) = recover.next_to_feed() { + let recovered = chunks[&chunk_root].recover(chunk_root).unwrap(); + recover.feed(recovered); + } + + let trie = TrieDB::try_new(&db, &root).unwrap(); + for (key, value) in &unique_map { + assert_eq!(trie.get(key).unwrap(), Some(DBValue::from_slice(value))); + } + } + + #[test] + fn random_insert_and_restore_0() { + random_insert_and_restore_with_count(0); + } + + #[test] + fn random_insert_and_restore_1() { + random_insert_and_restore_with_count(1); + } + + #[test] + fn random_insert_and_restore_2() { + random_insert_and_restore_with_count(2); + } + + #[test] + fn random_insert_and_restore_100() { + random_insert_and_restore_with_count(100); + } + + #[test] + fn random_insert_and_restore_10000() { + random_insert_and_restore_with_count(10_000); + } + + #[test] + #[ignore] + fn random_insert_and_restore_100000() { + random_insert_and_restore_with_count(100_000); + } +} diff --git a/util/merkle/src/snapshot/ordered_heap.rs b/util/merkle/src/snapshot/ordered_heap.rs new file mode 100644 index 0000000000..d83efd77c1 --- /dev/null +++ b/util/merkle/src/snapshot/ordered_heap.rs @@ -0,0 +1,76 @@ +// Copyright 2019 Kodebox, Inc. +// This file is part of CodeChain. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd}; +use std::collections::BinaryHeap; + +pub struct OrderedHeap { + heap: BinaryHeap>, + seq: usize, +} + +impl OrderedHeap { + pub fn new() -> OrderedHeap { + OrderedHeap { + heap: BinaryHeap::new(), + seq: 0, + } + } + + pub fn push(&mut self, value: T) { + self.heap.push(OrderedHeapEntry { + seq: self.seq, + value, + }); + self.seq += 1; + } + + pub fn pop(&mut self) -> Option { + self.heap.pop().map(|x| x.value) + } +} + +#[derive(Debug, Clone)] +struct OrderedHeapEntry { + seq: usize, + value: T, +} + +impl Ord for OrderedHeapEntry { + fn cmp(&self, other: &Self) -> Ordering { + self.value.cmp(&other.value).then(self.seq.cmp(&other.seq).reverse()) + } +} + +impl PartialOrd for OrderedHeapEntry +where + Self: Ord, +{ + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(&other)) + } +} + +impl PartialEq for OrderedHeapEntry +where + Self: Ord, +{ + fn eq(&self, other: &Self) -> bool { + self.cmp(other) == Ordering::Equal + } +} + +impl Eq for OrderedHeapEntry where Self: Ord {} diff --git a/util/merkle/src/triedbmut.rs b/util/merkle/src/triedbmut.rs index 17b2a23da6..f9fa2209f7 100644 --- a/util/merkle/src/triedbmut.rs +++ b/util/merkle/src/triedbmut.rs @@ -171,6 +171,113 @@ impl<'a> TrieDBMut<'a> { } } + pub(crate) fn insert_raw(&mut self, node: RlpNode) -> crate::Result> { + let mut old_val = None; + let cur_hash = *self.root; + *self.root = self.insert_raw_aux(node, Some(cur_hash), &mut old_val)?; + + Ok(old_val) + } + + fn insert_raw_aux( + &mut self, + node: RlpNode, + cur_node_hash: Option, + old_val: &mut Option, + ) -> crate::Result { + let path = match &node { + RlpNode::Leaf(slice, _) | RlpNode::Branch(slice, _) => slice, + }; + + match cur_node_hash { + Some(hash) => { + let existing_node_rlp = self.db.get(&hash).ok_or_else(|| TrieError::IncompleteDatabase(hash))?; + match RlpNode::decoded(&existing_node_rlp) { + Some(RlpNode::Leaf(partial, value)) => { + // Renew the Leaf + if &partial == path { + let hash = self.db.insert(&RlpNode::encoded(node)); + *old_val = Some(existing_node_rlp); + Ok(hash) + } else { + // Make branch node and insert Leaves + let common = partial.common_prefix(&path); + let mut new_child = empty_children(); + let new_partial = partial.mid(common); + let new_path = path.mid(common); + new_child[new_partial.at(0) as usize] = Some(self.insert_aux( + new_partial.mid(1), + value, + new_child[new_partial.at(0) as usize], + old_val, + )?); + new_child[new_path.at(0) as usize] = Some(self.insert_raw_aux( + node.mid(common + 1), + new_child[new_path.at(0) as usize], + old_val, + )?); + + let hash = self + .db + .insert(&RlpNode::encoded_until(RlpNode::Branch(partial, new_child.into()), common)); + + Ok(hash) + } + } + Some(RlpNode::Branch(partial, mut children)) => { + let common = partial.common_prefix(&path); + + // Make new branch node and insert leaf and branch with new path + if common < partial.len() { + let mut new_child = empty_children(); + let new_partial = partial.mid(common); + let new_path = path.mid(common); + let o_branch = RlpNode::Branch(new_partial.mid(1), children); + + let b_hash = self.db.insert(&RlpNode::encoded(o_branch)); + + new_child[new_partial.at(0) as usize] = Some(b_hash); + new_child[new_path.at(0) as usize] = Some(self.insert_raw_aux( + node.mid(common + 1), + new_child[new_path.at(0) as usize], + old_val, + )?); + + let hash = self + .db + .insert(&RlpNode::encoded_until(RlpNode::Branch(partial, new_child.into()), common)); + + Ok(hash) + } else { + // Insert leaf into the branch node + let new_path = path.mid(common); + + children[new_path.at(0) as usize] = Some(self.insert_raw_aux( + node.mid(common + 1), + children[new_path.at(0) as usize], + old_val, + )?); + + let new_branch = RlpNode::Branch(partial, children); + let node_rlp = RlpNode::encoded(new_branch); + let hash = self.db.insert(&node_rlp); + + Ok(hash) + } + } + None => { + let hash = self.db.insert(&RlpNode::encoded(node)); + Ok(hash) + } + } + } + None => { + let hash = self.db.insert(&RlpNode::encoded(node)); + Ok(hash) + } + } + } + /// Remove auxiliary fn remove_aux( &mut self,