Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions src/keboola/vcr/sanitizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,17 +789,21 @@ def create_default_sanitizer(secrets: dict[str, Any]) -> DefaultSanitizer:
"""
Create a default sanitizer from secrets.

Extracts all string values from the secrets dict and returns a
DefaultSanitizer that handles OAuth bodies, JSON responses, headers,
and URL parameters automatically.
Collects only values under #-prefixed keys (Keboola's encrypted-field
convention) and returns a DefaultSanitizer that handles OAuth bodies,
JSON responses, headers, and URL parameters automatically.

Non-sensitive metadata fields (oauthVersion, id, created, etc.) are
intentionally skipped to avoid corrupting URL paths in cassettes.

Args:
secrets: Dictionary of secret values to redact

Returns:
A DefaultSanitizer with extracted secret values
"""
secret_values = extract_values(secrets, [])
secret_values: list[str] = []
_collect_hash_values(secrets, secret_values)
return DefaultSanitizer(sensitive_values=secret_values)


Expand Down
93 changes: 93 additions & 0 deletions src/keboola/vcr/scaffolder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,37 @@
falls back to a numbered index). An optional *secrets file* can be
provided to deep-merge real credentials at recording time while
keeping only dummy values in the committed ``config.json``.

## Standard repo layout (used by the CLI defaults)

``tests/setup/`` is the conventional home for scaffold inputs:

- ``tests/setup/configs.json`` — test definitions (wrapped or raw format)
- ``tests/setup/input_files/`` — CSV/files for writer components

When ``input_files_dir`` is provided (default: ``tests/setup/input_files``),
the scaffolder reads each test's ``config.json`` after folder creation and
copies matching files into the test's ``in/tables/`` or ``in/files/`` based
on the ``storage.input.tables[].destination`` / ``storage.input.files[].destination``
entries. This is the mechanism used to supply writer components with their
input data during recording without bundling large CSVs in the test tree.

Example ``configs.json`` entry for a writer::

{
"name": "01_write_data",
"config": {
"parameters": {"#api_key": "DUMMY"},
"storage": {
"input": {
"tables": [{"destination": "my_input_table.csv"}]
}
}
}
}

Place ``tests/setup/input_files/my_input_table.csv`` and run scaffold — the
file is copied into each test's ``source/data/in/tables/`` automatically.
"""

from __future__ import annotations
Expand Down Expand Up @@ -72,6 +103,7 @@ def scaffold_from_json(
secrets_file: Path | None = None,
chain_state: bool = False,
regenerate: bool = False,
input_files_dir: Path | None = None,
) -> list[Path]:
"""
Create test folders from definitions file.
Expand All @@ -90,6 +122,12 @@ def scaffold_from_json(
regenerate: Delete existing cassettes before recording so fresh
interactions are captured from the live API. When False,
tests that already have a cassette are skipped.
input_files_dir: Optional directory containing CSV/files for writer
components. After scaffolding each test folder the contents are
copied into ``in/tables/`` or ``in/files/`` based on the
``storage.input`` mappings in each test's ``config.json``.
Defaults to ``tests/setup/input_files`` when called via the CLI.
If the directory does not exist it is silently skipped.

Returns:
List of created test folder paths
Expand Down Expand Up @@ -146,6 +184,7 @@ def scaffold_from_json(
secrets_override=secrets_override,
input_state=chained_state,
regenerate=regenerate,
input_files_dir=Path(input_files_dir) if input_files_dir is not None else None,
)
created_paths.append(test_path)

Expand Down Expand Up @@ -208,6 +247,7 @@ def _scaffold_single_test(
secrets_override: dict[str, Any] | None = None,
input_state: dict[str, Any] | None = None,
regenerate: bool = False,
input_files_dir: Path | None = None,
) -> Path:
"""Create folder structure for a single test."""
# Validate definition
Expand Down Expand Up @@ -251,6 +291,10 @@ def _scaffold_single_test(

logger.info(f"Created test folder structure: {test_dir}")

# Copy input files before running the component so the writer can find them
if input_files_dir is not None:
self._copy_input_files([test_dir], input_files_dir)

# Record cassette if requested
if record and component_script:
cassette_path = source_data_dir / "cassettes" / VCRRecorder.DEFAULT_CASSETTE_FILE
Expand Down Expand Up @@ -373,6 +417,55 @@ def run_component():
# Private helpers
# ------------------------------------------------------------------

@staticmethod
def _copy_input_files(created_paths: list[Path], input_files_dir: Path) -> None:
"""Copy input CSV/files into scaffolded test dirs from a shared input_files directory.

Reads each test's ``config.json`` and copies files listed under
``storage.input.tables[].destination`` into ``in/tables/`` and
``storage.input.files[].destination`` into ``in/files/``.

Silently skips if *input_files_dir* does not exist or a referenced
source file is missing — the user will get a clear runtime error from
the component if a required input is absent.
"""
if not input_files_dir.exists():
return

for test_dir in created_paths:
config_path = test_dir / "source" / "data" / "config.json"
if not config_path.exists():
continue

try:
config = json.loads(config_path.read_text())
except (json.JSONDecodeError, OSError):
continue

storage = config.get("storage", {})

for entry in storage.get("input", {}).get("tables", []):
dest = entry.get("destination", "")
if not dest:
continue
src = input_files_dir / dest
if src.exists():
target_dir = test_dir / "source" / "data" / "in" / "tables"
target_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, target_dir / dest)
logger.info(f"Copied {src} -> {target_dir / dest}")

for entry in storage.get("input", {}).get("files", []):
dest = entry.get("destination", "")
if not dest:
continue
src = input_files_dir / dest
if src.exists():
target_dir = test_dir / "source" / "data" / "in" / "files"
target_dir.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, target_dir / dest)
logger.info(f"Copied {src} -> {target_dir / dest}")

def _mask_secrets(
self,
config: dict[str, Any],
Expand Down
2 changes: 1 addition & 1 deletion tests/test_recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def test_custom_sanitizers_stored_in_composite(self, tmp_cassette_dir):
def test_no_sanitizers_uses_create_default_sanitizer(self, tmp_cassette_dir):
with patch("keboola.vcr.recorder.vcr") as mock_vcr:
mock_vcr.VCR.return_value = MagicMock()
r = VCRRecorder(cassette_dir=tmp_cassette_dir, secrets={"k": "v"})
r = VCRRecorder(cassette_dir=tmp_cassette_dir, secrets={"#k": "v"})
assert isinstance(r.sanitizer, DefaultSanitizer)
assert "v" in r.sanitizer.sensitive_values

Expand Down
3 changes: 2 additions & 1 deletion tests/test_sanitizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,11 +474,12 @@ def test_ignores_non_hash_prefixed_keys(self):

class TestCreateDefaultSanitizer:
def test_creates_default_sanitizer_with_secret_values(self):
secrets = {"api_key": "my-key", "password": "s3cret"}
secrets = {"#api_key": "my-key", "#password": "s3cret", "oauthVersion": "2.0"}
s = create_default_sanitizer(secrets)
assert isinstance(s, DefaultSanitizer)
assert "my-key" in s.sensitive_values
assert "s3cret" in s.sensitive_values
assert "2.0" not in s.sensitive_values

def test_empty_secrets(self):
s = create_default_sanitizer({})
Expand Down