Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 42 additions & 12 deletions crates/core/src/commands/dump.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use std::io::Write;
use std::{io::Write, thread::scope};

use pariter::IteratorExt;

use crate::{
backend::node::{Node, NodeType},
blob::{BlobId, BlobType},
blob::{BlobId, BlobType, DataId},
error::{ErrorKind, RusticError, RusticResult},
repository::{IndexedFull, Repository},
};
Expand All @@ -11,7 +13,6 @@ use crate::{
///
/// # Type Parameters
///
/// * `P` - The progress bar type.
/// * `S` - The type of the indexed tree.
///
/// # Arguments
Expand All @@ -23,7 +24,9 @@ use crate::{
/// # Errors
///
/// * If the node is not a file.
pub(crate) fn dump<S: IndexedFull>(
/// * If a blob cannot be fetched from the backend.
/// * If writing to `w` fails.
pub(crate) fn dump<S: IndexedFull + Sync>(
repo: &Repository<S>,
node: &Node,
w: &mut impl Write,
Expand All @@ -36,15 +39,42 @@ pub(crate) fn dump<S: IndexedFull>(
.attach_context("node_type", node.node_type.to_string()));
}

for id in node.content.as_ref().unwrap() {
let Some(content) = node.content.as_ref() else {
return Ok(());
};

// Single-blob files have nothing to overlap, so skip the worker setup.
if content.len() < 2 {
return dump_sequential(repo, content, w);
}

scope(|s| -> RusticResult<()> {
content
.iter()
.map(|id| BlobId::from(**id))
.parallel_map_scoped(s, |id| repo.get_blob_cached(&id, BlobType::Data))
.try_for_each(|res| write_blob(w, &res?))
})
}

fn dump_sequential<S: IndexedFull>(
repo: &Repository<S>,
content: &[DataId],
w: &mut impl Write,
) -> RusticResult<()> {
for id in content {
let data = repo.get_blob_cached(&BlobId::from(**id), BlobType::Data)?;
w.write_all(&data).map_err(|err| {
RusticError::with_source(
ErrorKind::InputOutput,
"Failed to write data to writer.",
err,
)
})?;
write_blob(w, &data)?;
}
Ok(())
}

fn write_blob(w: &mut impl Write, data: &[u8]) -> RusticResult<()> {
w.write_all(data).map_err(|err| {
RusticError::with_source(
ErrorKind::InputOutput,
"Failed to write data to writer.",
err,
)
})
}
7 changes: 5 additions & 2 deletions crates/core/src/repository.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1752,11 +1752,14 @@ impl<S: IndexedFull> Repository<S> {
/// # Errors
///
/// * If the node is not a file.
///
///
/// # Note
///
/// Currently, only regular file nodes are supported.
pub fn dump(&self, node: &Node, w: &mut impl Write) -> RusticResult<()> {
pub fn dump(&self, node: &Node, w: &mut impl Write) -> RusticResult<()>
where
S: Sync,
{
commands::dump::dump(self, node, w)
}

Expand Down
1 change: 1 addition & 0 deletions crates/core/tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ mod integration {
mod check;
mod chunker;
mod copy;
mod dump;
mod find;
mod hotcold;
mod key;
Expand Down
80 changes: 80 additions & 0 deletions crates/core/tests/integration/dump.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
use std::{fs, io::Write, path::PathBuf, str::FromStr};

use anyhow::Result;
use bytesize::ByteSize;
use pretty_assertions::assert_eq;
use rstest::rstest;
use tempfile::tempdir;

use rustic_core::{
BackupOptions, ConfigOptions, IndexedFullStatus, PathList, Repository,
repofile::{Chunker, SnapshotFile},
};

use super::{RepoOpen, set_up_repo};

/// Build a deterministic byte payload of the requested length.
fn payload(len: usize) -> Vec<u8> {
(0..len)
.map(|i| u8::try_from(i % 251).expect("251 always fits in u8"))
.collect()
}

/// Backup a single file with the given content into `repo`, configuring the
/// fixed-size chunker so the file reliably splits into multiple blobs.
///
/// Returns the repository in the [`IndexedFullStatus`] state along with the
/// snapshot path that points at the backed-up file.
fn backup_single_file(
repo: RepoOpen,
name: &str,
data: &[u8],
) -> Result<(Repository<IndexedFullStatus>, String)> {
let dir = tempdir()?;
let file_path = dir.path().join(name);
fs::File::create(&file_path)?.write_all(data)?;

let mut repo = repo.to_indexed_ids()?;
let config = ConfigOptions::default()
.set_chunker(Chunker::FixedSize)
.set_chunk_size(ByteSize(4096));
assert!(repo.apply_config(&config)?);

let paths = PathList::from_iter([file_path]);
let opts = BackupOptions::default().as_path(PathBuf::from_str(name)?);
let _snapshot = repo.backup(&opts, &paths, SnapshotFile::default())?;

Ok((repo.to_indexed()?, format!("latest:{name}")))
}

#[rstest]
fn test_dump_multi_blob_matches_source(set_up_repo: Result<RepoOpen>) -> Result<()> {
let data = payload(64 * 1024);
let (repo, snapshot_path) = backup_single_file(set_up_repo?, "file.bin", &data)?;
let node = repo.node_from_snapshot_path(&snapshot_path, |_| true)?;

// Sanity: the configured chunker must have produced more than one blob,
// otherwise the parallel path is never taken.
let blob_count = node.content.as_ref().map_or(0, Vec::len);
assert!(
blob_count > 1,
"expected the test file to span multiple blobs, got {blob_count}",
);

let mut out = Vec::new();
repo.dump(&node, &mut out)?;
assert_eq!(out, data);
Ok(())
}

#[rstest]
fn test_dump_default_options_match_source(set_up_repo: Result<RepoOpen>) -> Result<()> {
let data = payload(32 * 1024);
let (repo, snapshot_path) = backup_single_file(set_up_repo?, "file.bin", &data)?;
let node = repo.node_from_snapshot_path(&snapshot_path, |_| true)?;

let mut out = Vec::new();
repo.dump(&node, &mut out)?;
assert_eq!(out, data);
Ok(())
}
Loading