diff --git a/.coverage b/.coverage deleted file mode 100644 index 8a62b2f..0000000 Binary files a/.coverage and /dev/null differ diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index 4b88428..cc2a8f9 100644 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -12,6 +12,7 @@ from defusedxml.minidom import parseString # Create a safe random number generator +_SAFE_RANDOM = SystemRandom() # Set up logging LOG = logging.getLogger("dicttoxml") @@ -29,8 +30,7 @@ def make_id(element: str, start: int = 100000, end: int = 999999) -> str: Returns: str: The generated ID. """ - safe_random = SystemRandom() - return f"{element}_{safe_random.randint(start, end)}" + return f"{element}_{_SAFE_RANDOM.randint(start, end)}" def get_unique_id(element: str) -> str: @@ -641,7 +641,7 @@ def dicttoxml( item_wrap: bool = True, item_func: Callable[[str], str] = default_item_func, cdata: bool = False, - xml_namespaces: dict[str, Any] = {}, + xml_namespaces: dict[str, Any] | None = None, list_headers: bool = False, xpath_format: bool = False, ) -> bytes: @@ -797,6 +797,8 @@ def dicttoxml( output = [] namespace_str = "" + if xml_namespaces is None: + xml_namespaces = {} for prefix in xml_namespaces: if prefix == 'xsi': for schema_att in xml_namespaces[prefix]: diff --git a/lat.md/behavior.md b/lat.md/behavior.md index 3e199c6..c82cfcc 100644 --- a/lat.md/behavior.md +++ b/lat.md/behavior.md @@ -14,6 +14,8 @@ Default output includes an XML declaration, wraps content in `all`, pretty print [[json2xml/json2xml.py#Json2xml#to_xml]] calls [[json2xml/dicttoxml.py#dicttoxml]] with the configured wrapper, root, `attr_type`, `item_wrap`, `cdata`, and `list_headers` options. When `item_wrap=False`, list values repeat the parent tag instead of creating `` children. When `pretty=False`, the library returns the serializer bytes directly. +The Rust fast path in [[rust/src/lib.rs#write_dict_contents]] and [[rust/src/lib.rs#write_list_contents]] mirrors those Python list-wrapper rules. `list_headers=True` suppresses the outer list container and repeats the parent tag only for nested dict items, while primitive items still use the same scalar tags that Python emits. + ## XPath 3.1 format XPath mode swaps the project-specific XML shape for the W3C `json-to-xml` mapping with typed element names and the XPath functions namespace. @@ -24,4 +26,4 @@ When `xpath_format=True`, [[json2xml/dicttoxml.py#dicttoxml]] delegates payload Pretty printing acts as a validation step, because the formatter reparses the generated XML before returning it. -[[json2xml/json2xml.py#Json2xml#to_xml]] uses `defusedxml.minidom.parseString` before `toprettyxml`. If the generated bytes are not well-formed XML, the converter raises `InvalidDataError` instead of returning broken pretty output. \ No newline at end of file +[[json2xml/json2xml.py#Json2xml#to_xml]] uses `defusedxml.minidom.parseString` before `toprettyxml`. If the generated bytes are not well-formed XML, the converter raises `InvalidDataError` instead of returning broken pretty output. diff --git a/lat.md/tests.md b/lat.md/tests.md index 79838d2..05bd2a4 100644 --- a/lat.md/tests.md +++ b/lat.md/tests.md @@ -28,4 +28,16 @@ XPath mode should emit the W3C XPath functions namespace and typed child element ### Item-wrap false repeats parent tag -Disabling item wrapping should repeat the parent element name for primitive list items instead of producing nested `` tags. \ No newline at end of file +Disabling item wrapping should repeat the parent element name for primitive list items instead of producing nested `` tags. + +### Default xml namespaces stay empty + +Calling `dicttoxml` without `xml_namespaces` should preserve the legacy root output and avoid adding namespace declarations or `xsi:` attributes implicitly. + +### Explicit xml namespaces emit schema attributes + +Supplying namespace prefixes and an `xsi` mapping should emit the expected `xmlns:*` declarations plus supported schema attributes without mutating the caller input. + +### Xml namespace inputs are not mutated across calls + +Reusing one `xml_namespaces` mapping across multiple `dicttoxml` calls should return identical XML each time so namespace declarations never accumulate on the shared dict. diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 30b51da..bb73436 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -340,7 +340,14 @@ fn write_dict_contents( // Lists in dicts get special wrapping treatment if let Ok(list) = val.cast::() { - if cfg.item_wrap { + let first_is_scalar = list + .get_item(0) + .ok() + .map(|item| is_python_scalar(&item)) + .unwrap_or(false); + let wrap_list_container = (cfg.item_wrap || !first_is_scalar) && !cfg.list_headers; + + if wrap_list_container { write_open_tag(out, &xml_key, name_attr, type_attr(cfg, "list")); write_list_contents(py, out, list, &xml_key, cfg)?; write_close_tag(out, &xml_key); @@ -354,6 +361,18 @@ fn write_dict_contents( Ok(()) } +/// Return true when a Python object is treated as a primitive scalar by the +/// pure-Python serializer for list-wrapper decisions. +#[cfg(feature = "python")] +#[inline] +fn is_python_scalar(obj: &Bound<'_, PyAny>) -> bool { + obj.is_none() + || obj.is_instance_of::() + || obj.is_instance_of::() + || obj.is_instance_of::() + || obj.is_instance_of::() +} + /// Write all items of a list into the buffer. #[cfg(feature = "python")] fn write_list_contents( @@ -363,7 +382,8 @@ fn write_list_contents( parent: &str, cfg: &ConvertConfig, ) -> PyResult<()> { - let tag_name = if cfg.list_headers { + let scalar_tag_name = if cfg.item_wrap { "item" } else { parent }; + let dict_tag_name = if cfg.list_headers { parent } else if cfg.item_wrap { "item" @@ -375,14 +395,19 @@ fn write_list_contents( // Dicts inside lists have special wrapping logic if let Ok(dict) = item.cast::() { if cfg.item_wrap || cfg.list_headers { - write_open_tag(out, tag_name, None, type_attr(cfg, "dict")); + let dict_type_attr = if cfg.list_headers { + None + } else { + type_attr(cfg, "dict") + }; + write_open_tag(out, dict_tag_name, None, dict_type_attr); write_dict_contents(py, out, dict, cfg)?; - write_close_tag(out, tag_name); + write_close_tag(out, dict_tag_name); } else { write_dict_contents(py, out, dict, cfg)?; } } else { - write_value(py, out, &item, tag_name, None, cfg, true)?; + write_value(py, out, &item, scalar_tag_name, None, cfg, true)?; } } Ok(()) diff --git a/tests/conftest.py b/tests/conftest.py index 9e52860..9c40ee5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,7 @@ import json from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List +from typing import TYPE_CHECKING, Any import pytest diff --git a/tests/test_dict2xml.py b/tests/test_dict2xml.py index 42fe188..37dfaee 100644 --- a/tests/test_dict2xml.py +++ b/tests/test_dict2xml.py @@ -500,6 +500,92 @@ def test_dicttoxml_with_xml_namespaces(self) -> None: result = dicttoxml.dicttoxml(data, xml_namespaces=namespaces) assert b'xmlns="http://example.com"' in result + # @lat: [[tests#Conversion behavior#Default xml namespaces stay empty]] + def test_dicttoxml_without_xml_namespaces_keeps_previous_output(self) -> None: + """Test dicttoxml without xml_namespaces keeps the default XML shape.""" + data = {"bike": "blue"} + result = dicttoxml.dicttoxml(data, attr_type=False) + assert ( + b'' + b"blue" == result + ) + assert b"xmlns" not in result + assert b"xsi:" not in result + + # @lat: [[tests#Conversion behavior#Explicit xml namespaces emit schema attributes]] + def test_dicttoxml_with_explicit_xml_namespaces_emits_schema_attributes(self) -> None: + """Test dicttoxml emits explicit namespace declarations and XSI schema attributes.""" + data = {"bike": "blue"} + namespaces = { + "veh": "https://example.com/vehicle", + "xsi": { + "schemaInstance": "http://www.w3.org/2001/XMLSchema-instance", + "schemaLocation": "https://example.com/vehicle vehicle.xsd", + "noNamespaceSchemaLocation": "vehicle-no-namespace.xsd", + }, + } + namespaces_before = { + prefix: value.copy() if isinstance(value, dict) else value + for prefix, value in namespaces.items() + } + + result = dicttoxml.dicttoxml( + data, + custom_root="vehicle", + attr_type=False, + xml_namespaces=namespaces, + ) + + assert ( + b'' + b'' + b"blue" + b"" == result + ) + assert b"xsi:noNamespaceSchemaLocation" not in result + assert namespaces == namespaces_before + + # @lat: [[tests#Conversion behavior#Xml namespace inputs are not mutated across calls]] + def test_dicttoxml_reuses_xml_namespaces_without_mutating_input(self) -> None: + """Test reusing xml_namespaces across calls does not mutate or accumulate state.""" + data = {"bike": "blue"} + namespaces = { + "veh": "https://example.com/vehicle", + "xsi": { + "schemaInstance": "http://www.w3.org/2001/XMLSchema-instance", + "schemaLocation": "https://example.com/vehicle vehicle.xsd", + }, + } + namespaces_before = { + prefix: value.copy() if isinstance(value, dict) else value + for prefix, value in namespaces.items() + } + + first = dicttoxml.dicttoxml( + data, + custom_root="vehicle", + attr_type=False, + xml_namespaces=namespaces, + ) + second = dicttoxml.dicttoxml( + data, + custom_root="vehicle", + attr_type=False, + xml_namespaces=namespaces, + ) + + assert first == second + assert first.count(b'xmlns:veh="https://example.com/vehicle"') == 1 + assert first.count( + b'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' + ) == 1 + assert first.count( + b'xsi:schemaLocation="https://example.com/vehicle vehicle.xsd"' + ) == 1 + assert namespaces == namespaces_before + def test_datetime_conversion(self) -> None: """Test datetime conversion.""" data = {"key": datetime.datetime(2023, 2, 15, 12, 30, 45)} diff --git a/tests/test_json2xml.py b/tests/test_json2xml.py index 672a6a1..9231eed 100644 --- a/tests/test_json2xml.py +++ b/tests/test_json2xml.py @@ -21,12 +21,6 @@ class TestJson2xml: """Tests for `json2xml` package.""" - def setUp(self) -> None: - """Set up test fixtures, if any.""" - - def tearDown(self) -> None: - """Tear down test fixtures, if any.""" - def test_read_from_json(self) -> None: """Test something.""" data = readfromjson("examples/bigexample.json") diff --git a/tests/test_rust_dicttoxml.py b/tests/test_rust_dicttoxml.py index 6efbcef..f7489ae 100644 --- a/tests/test_rust_dicttoxml.py +++ b/tests/test_rust_dicttoxml.py @@ -252,7 +252,8 @@ def test_item_wrap_false(self): def test_list_headers(self): data = {"colors": ["red", "green"]} result = rust_dicttoxml(data, list_headers=True) - assert b"red" in result + assert b"green" in result class TestRustVsPythonCompatibility: @@ -334,21 +335,18 @@ def test_item_wrap_false_matches(self): rust, python = self.compare_outputs(data, item_wrap=False) assert rust == python - @pytest.mark.xfail(reason="Rust list_headers implementation differs from Python - uses different wrapping semantics") def test_list_headers_true_matches(self): """Test that list_headers=True produces matching output.""" data = {"items": ["one", "two", "three"]} rust, python = self.compare_outputs(data, list_headers=True) assert rust == python - @pytest.mark.xfail(reason="Rust item_wrap=False with nested dicts differs from Python - known limitation") def test_item_wrap_false_with_nested_dict_matches(self): """Test item_wrap=False with nested dicts in list.""" data = {"users": [{"name": "Alice"}, {"name": "Bob"}]} rust, python = self.compare_outputs(data, item_wrap=False) assert rust == python - @pytest.mark.xfail(reason="Rust list_headers with nested structures differs from Python - known limitation") def test_list_headers_with_nested_matches(self): """Test list_headers=True with nested structures.""" data = {"products": [{"id": 1, "name": "Widget"}, {"id": 2, "name": "Gadget"}]}