diff --git a/crates/core/src/commands/dump.rs b/crates/core/src/commands/dump.rs index c4c7b218..9d52744d 100644 --- a/crates/core/src/commands/dump.rs +++ b/crates/core/src/commands/dump.rs @@ -1,8 +1,10 @@ -use std::io::Write; +use std::{io::Write, thread::scope}; + +use pariter::IteratorExt; use crate::{ backend::node::{Node, NodeType}, - blob::{BlobId, BlobType}, + blob::{BlobId, BlobType, DataId}, error::{ErrorKind, RusticError, RusticResult}, repository::{IndexedFull, Repository}, }; @@ -11,7 +13,6 @@ use crate::{ /// /// # Type Parameters /// -/// * `P` - The progress bar type. /// * `S` - The type of the indexed tree. /// /// # Arguments @@ -23,7 +24,9 @@ use crate::{ /// # Errors /// /// * If the node is not a file. -pub(crate) fn dump( +/// * If a blob cannot be fetched from the backend. +/// * If writing to `w` fails. +pub(crate) fn dump( repo: &Repository, node: &Node, w: &mut impl Write, @@ -36,15 +39,42 @@ pub(crate) fn dump( .attach_context("node_type", node.node_type.to_string())); } - for id in node.content.as_ref().unwrap() { + let Some(content) = node.content.as_ref() else { + return Ok(()); + }; + + // Single-blob files have nothing to overlap, so skip the worker setup. + if content.len() < 2 { + return dump_sequential(repo, content, w); + } + + scope(|s| -> RusticResult<()> { + content + .iter() + .map(|id| BlobId::from(**id)) + .parallel_map_scoped(s, |id| repo.get_blob_cached(&id, BlobType::Data)) + .try_for_each(|res| write_blob(w, &res?)) + }) +} + +fn dump_sequential( + repo: &Repository, + content: &[DataId], + w: &mut impl Write, +) -> RusticResult<()> { + for id in content { let data = repo.get_blob_cached(&BlobId::from(**id), BlobType::Data)?; - w.write_all(&data).map_err(|err| { - RusticError::with_source( - ErrorKind::InputOutput, - "Failed to write data to writer.", - err, - ) - })?; + write_blob(w, &data)?; } Ok(()) } + +fn write_blob(w: &mut impl Write, data: &[u8]) -> RusticResult<()> { + w.write_all(data).map_err(|err| { + RusticError::with_source( + ErrorKind::InputOutput, + "Failed to write data to writer.", + err, + ) + }) +} diff --git a/crates/core/src/repository.rs b/crates/core/src/repository.rs index 260ba697..fc09dd54 100644 --- a/crates/core/src/repository.rs +++ b/crates/core/src/repository.rs @@ -1752,11 +1752,14 @@ impl Repository { /// # Errors /// /// * If the node is not a file. - /// + /// /// # Note /// /// Currently, only regular file nodes are supported. - pub fn dump(&self, node: &Node, w: &mut impl Write) -> RusticResult<()> { + pub fn dump(&self, node: &Node, w: &mut impl Write) -> RusticResult<()> + where + S: Sync, + { commands::dump::dump(self, node, w) } diff --git a/crates/core/tests/integration.rs b/crates/core/tests/integration.rs index 076b3ed8..43840ec7 100644 --- a/crates/core/tests/integration.rs +++ b/crates/core/tests/integration.rs @@ -28,6 +28,7 @@ mod integration { mod check; mod chunker; mod copy; + mod dump; mod find; mod hotcold; mod key; diff --git a/crates/core/tests/integration/dump.rs b/crates/core/tests/integration/dump.rs new file mode 100644 index 00000000..42a9a4e5 --- /dev/null +++ b/crates/core/tests/integration/dump.rs @@ -0,0 +1,80 @@ +use std::{fs, io::Write, path::PathBuf, str::FromStr}; + +use anyhow::Result; +use bytesize::ByteSize; +use pretty_assertions::assert_eq; +use rstest::rstest; +use tempfile::tempdir; + +use rustic_core::{ + BackupOptions, ConfigOptions, IndexedFullStatus, PathList, Repository, + repofile::{Chunker, SnapshotFile}, +}; + +use super::{RepoOpen, set_up_repo}; + +/// Build a deterministic byte payload of the requested length. +fn payload(len: usize) -> Vec { + (0..len) + .map(|i| u8::try_from(i % 251).expect("251 always fits in u8")) + .collect() +} + +/// Backup a single file with the given content into `repo`, configuring the +/// fixed-size chunker so the file reliably splits into multiple blobs. +/// +/// Returns the repository in the [`IndexedFullStatus`] state along with the +/// snapshot path that points at the backed-up file. +fn backup_single_file( + repo: RepoOpen, + name: &str, + data: &[u8], +) -> Result<(Repository, String)> { + let dir = tempdir()?; + let file_path = dir.path().join(name); + fs::File::create(&file_path)?.write_all(data)?; + + let mut repo = repo.to_indexed_ids()?; + let config = ConfigOptions::default() + .set_chunker(Chunker::FixedSize) + .set_chunk_size(ByteSize(4096)); + assert!(repo.apply_config(&config)?); + + let paths = PathList::from_iter([file_path]); + let opts = BackupOptions::default().as_path(PathBuf::from_str(name)?); + let _snapshot = repo.backup(&opts, &paths, SnapshotFile::default())?; + + Ok((repo.to_indexed()?, format!("latest:{name}"))) +} + +#[rstest] +fn test_dump_multi_blob_matches_source(set_up_repo: Result) -> Result<()> { + let data = payload(64 * 1024); + let (repo, snapshot_path) = backup_single_file(set_up_repo?, "file.bin", &data)?; + let node = repo.node_from_snapshot_path(&snapshot_path, |_| true)?; + + // Sanity: the configured chunker must have produced more than one blob, + // otherwise the parallel path is never taken. + let blob_count = node.content.as_ref().map_or(0, Vec::len); + assert!( + blob_count > 1, + "expected the test file to span multiple blobs, got {blob_count}", + ); + + let mut out = Vec::new(); + repo.dump(&node, &mut out)?; + assert_eq!(out, data); + Ok(()) +} + +#[rstest] +fn test_dump_default_options_match_source(set_up_repo: Result) -> Result<()> { + let data = payload(32 * 1024); + let (repo, snapshot_path) = backup_single_file(set_up_repo?, "file.bin", &data)?; + let node = repo.node_from_snapshot_path(&snapshot_path, |_| true)?; + + let mut out = Vec::new(); + repo.dump(&node, &mut out)?; + assert_eq!(out, data); + Ok(()) +}