Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file removed .coverage
Binary file not shown.
8 changes: 5 additions & 3 deletions json2xml/dicttoxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from defusedxml.minidom import parseString

# Create a safe random number generator
_SAFE_RANDOM = SystemRandom()

# Set up logging
LOG = logging.getLogger("dicttoxml")
Expand All @@ -29,8 +30,7 @@ def make_id(element: str, start: int = 100000, end: int = 999999) -> str:
Returns:
str: The generated ID.
"""
safe_random = SystemRandom()
return f"{element}_{safe_random.randint(start, end)}"
return f"{element}_{_SAFE_RANDOM.randint(start, end)}"


def get_unique_id(element: str) -> str:
Expand Down Expand Up @@ -641,7 +641,7 @@ def dicttoxml(
item_wrap: bool = True,
item_func: Callable[[str], str] = default_item_func,
cdata: bool = False,
xml_namespaces: dict[str, Any] = {},
xml_namespaces: dict[str, Any] | None = None,
list_headers: bool = False,
xpath_format: bool = False,
) -> bytes:
Expand Down Expand Up @@ -797,6 +797,8 @@ def dicttoxml(

output = []
namespace_str = ""
if xml_namespaces is None:
xml_namespaces = {}
for prefix in xml_namespaces:
if prefix == 'xsi':
for schema_att in xml_namespaces[prefix]:
Expand Down
4 changes: 3 additions & 1 deletion lat.md/behavior.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Default output includes an XML declaration, wraps content in `all`, pretty print

[[json2xml/json2xml.py#Json2xml#to_xml]] calls [[json2xml/dicttoxml.py#dicttoxml]] with the configured wrapper, root, `attr_type`, `item_wrap`, `cdata`, and `list_headers` options. When `item_wrap=False`, list values repeat the parent tag instead of creating `<item>` children. When `pretty=False`, the library returns the serializer bytes directly.

The Rust fast path in [[rust/src/lib.rs#write_dict_contents]] and [[rust/src/lib.rs#write_list_contents]] mirrors those Python list-wrapper rules. `list_headers=True` suppresses the outer list container and repeats the parent tag only for nested dict items, while primitive items still use the same scalar tags that Python emits.

## XPath 3.1 format

XPath mode swaps the project-specific XML shape for the W3C `json-to-xml` mapping with typed element names and the XPath functions namespace.
Expand All @@ -24,4 +26,4 @@ When `xpath_format=True`, [[json2xml/dicttoxml.py#dicttoxml]] delegates payload

Pretty printing acts as a validation step, because the formatter reparses the generated XML before returning it.

[[json2xml/json2xml.py#Json2xml#to_xml]] uses `defusedxml.minidom.parseString` before `toprettyxml`. If the generated bytes are not well-formed XML, the converter raises `InvalidDataError` instead of returning broken pretty output.
[[json2xml/json2xml.py#Json2xml#to_xml]] uses `defusedxml.minidom.parseString` before `toprettyxml`. If the generated bytes are not well-formed XML, the converter raises `InvalidDataError` instead of returning broken pretty output.
14 changes: 13 additions & 1 deletion lat.md/tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,16 @@ XPath mode should emit the W3C XPath functions namespace and typed child element

### Item-wrap false repeats parent tag

Disabling item wrapping should repeat the parent element name for primitive list items instead of producing nested `<item>` tags.
Disabling item wrapping should repeat the parent element name for primitive list items instead of producing nested `<item>` tags.

### Default xml namespaces stay empty

Calling `dicttoxml` without `xml_namespaces` should preserve the legacy root output and avoid adding namespace declarations or `xsi:` attributes implicitly.

### Explicit xml namespaces emit schema attributes

Supplying namespace prefixes and an `xsi` mapping should emit the expected `xmlns:*` declarations plus supported schema attributes without mutating the caller input.

### Xml namespace inputs are not mutated across calls

Reusing one `xml_namespaces` mapping across multiple `dicttoxml` calls should return identical XML each time so namespace declarations never accumulate on the shared dict.
35 changes: 30 additions & 5 deletions rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,14 @@ fn write_dict_contents(

// Lists in dicts get special wrapping treatment
if let Ok(list) = val.cast::<PyList>() {
if cfg.item_wrap {
let first_is_scalar = list
.get_item(0)
.ok()
.map(|item| is_python_scalar(&item))
.unwrap_or(false);
let wrap_list_container = (cfg.item_wrap || !first_is_scalar) && !cfg.list_headers;

if wrap_list_container {
write_open_tag(out, &xml_key, name_attr, type_attr(cfg, "list"));
write_list_contents(py, out, list, &xml_key, cfg)?;
write_close_tag(out, &xml_key);
Expand All @@ -354,6 +361,18 @@ fn write_dict_contents(
Ok(())
}

/// Return true when a Python object is treated as a primitive scalar by the
/// pure-Python serializer for list-wrapper decisions.
#[cfg(feature = "python")]
#[inline]
fn is_python_scalar(obj: &Bound<'_, PyAny>) -> bool {
obj.is_none()
|| obj.is_instance_of::<PyBool>()
|| obj.is_instance_of::<PyInt>()
|| obj.is_instance_of::<PyFloat>()
|| obj.is_instance_of::<PyString>()
}

/// Write all items of a list into the buffer.
#[cfg(feature = "python")]
fn write_list_contents(
Expand All @@ -363,7 +382,8 @@ fn write_list_contents(
parent: &str,
cfg: &ConvertConfig,
) -> PyResult<()> {
let tag_name = if cfg.list_headers {
let scalar_tag_name = if cfg.item_wrap { "item" } else { parent };
let dict_tag_name = if cfg.list_headers {
parent
} else if cfg.item_wrap {
"item"
Expand All @@ -375,14 +395,19 @@ fn write_list_contents(
// Dicts inside lists have special wrapping logic
if let Ok(dict) = item.cast::<PyDict>() {
if cfg.item_wrap || cfg.list_headers {
write_open_tag(out, tag_name, None, type_attr(cfg, "dict"));
let dict_type_attr = if cfg.list_headers {
None
} else {
type_attr(cfg, "dict")
};
write_open_tag(out, dict_tag_name, None, dict_type_attr);
write_dict_contents(py, out, dict, cfg)?;
write_close_tag(out, tag_name);
write_close_tag(out, dict_tag_name);
} else {
write_dict_contents(py, out, dict, cfg)?;
}
} else {
write_value(py, out, &item, tag_name, None, cfg, true)?;
write_value(py, out, &item, scalar_tag_name, None, cfg, true)?;
}
}
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import json
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List
from typing import TYPE_CHECKING, Any

import pytest

Expand Down
86 changes: 86 additions & 0 deletions tests/test_dict2xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,92 @@ def test_dicttoxml_with_xml_namespaces(self) -> None:
result = dicttoxml.dicttoxml(data, xml_namespaces=namespaces)
assert b'xmlns="http://example.com"' in result

# @lat: [[tests#Conversion behavior#Default xml namespaces stay empty]]
def test_dicttoxml_without_xml_namespaces_keeps_previous_output(self) -> None:
"""Test dicttoxml without xml_namespaces keeps the default XML shape."""
data = {"bike": "blue"}
result = dicttoxml.dicttoxml(data, attr_type=False)
assert (
b'<?xml version="1.0" encoding="UTF-8" ?>'
b"<root><bike>blue</bike></root>" == result
)
assert b"xmlns" not in result
assert b"xsi:" not in result

# @lat: [[tests#Conversion behavior#Explicit xml namespaces emit schema attributes]]
def test_dicttoxml_with_explicit_xml_namespaces_emits_schema_attributes(self) -> None:
"""Test dicttoxml emits explicit namespace declarations and XSI schema attributes."""
data = {"bike": "blue"}
namespaces = {
"veh": "https://example.com/vehicle",
"xsi": {
"schemaInstance": "http://www.w3.org/2001/XMLSchema-instance",
"schemaLocation": "https://example.com/vehicle vehicle.xsd",
"noNamespaceSchemaLocation": "vehicle-no-namespace.xsd",
},
}
namespaces_before = {
prefix: value.copy() if isinstance(value, dict) else value
for prefix, value in namespaces.items()
}

result = dicttoxml.dicttoxml(
data,
custom_root="vehicle",
attr_type=False,
xml_namespaces=namespaces,
)

assert (
b'<?xml version="1.0" encoding="UTF-8" ?>'
b'<vehicle xmlns:veh="https://example.com/vehicle" '
b'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
b'xsi:schemaLocation="https://example.com/vehicle vehicle.xsd">'
b"<bike>blue</bike>"
b"</vehicle>" == result
)
assert b"xsi:noNamespaceSchemaLocation" not in result
assert namespaces == namespaces_before

# @lat: [[tests#Conversion behavior#Xml namespace inputs are not mutated across calls]]
def test_dicttoxml_reuses_xml_namespaces_without_mutating_input(self) -> None:
"""Test reusing xml_namespaces across calls does not mutate or accumulate state."""
data = {"bike": "blue"}
namespaces = {
"veh": "https://example.com/vehicle",
"xsi": {
"schemaInstance": "http://www.w3.org/2001/XMLSchema-instance",
"schemaLocation": "https://example.com/vehicle vehicle.xsd",
},
}
namespaces_before = {
prefix: value.copy() if isinstance(value, dict) else value
for prefix, value in namespaces.items()
}

first = dicttoxml.dicttoxml(
data,
custom_root="vehicle",
attr_type=False,
xml_namespaces=namespaces,
)
second = dicttoxml.dicttoxml(
data,
custom_root="vehicle",
attr_type=False,
xml_namespaces=namespaces,
)

assert first == second
assert first.count(b'xmlns:veh="https://example.com/vehicle"') == 1
assert first.count(
b'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"'
) == 1
assert first.count(
b'xsi:schemaLocation="https://example.com/vehicle vehicle.xsd"'
) == 1
assert namespaces == namespaces_before

def test_datetime_conversion(self) -> None:
"""Test datetime conversion."""
data = {"key": datetime.datetime(2023, 2, 15, 12, 30, 45)}
Expand Down
6 changes: 0 additions & 6 deletions tests/test_json2xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@
class TestJson2xml:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (testing): Add tests covering the updated xml_namespaces behavior in dicttoxml (None default, no shared mutation, and explicit namespaces).

Since xml_namespaces changed from a mutable default ({}) to an optional argument (None with xml_namespaces = xml_namespaces or {}), tests should explicitly cover:

  1. Calling dicttoxml without xml_namespaces and confirming the output matches previous behavior (no unexpected namespace attributes).
  2. Calling dicttoxml with an explicit xml_namespaces dict (including xsi with multiple schema attributes) and asserting the XML has the expected namespace declarations and attributes.
  3. Reusing the same xml_namespaces dict across multiple calls and asserting the dict is unchanged and namespaces don’t accumulate.

These will guard against regressions in the new default-handling logic and verify the mutable-default fix is behaviorally transparent.

"""Tests for `json2xml` package."""

def setUp(self) -> None:
"""Set up test fixtures, if any."""

def tearDown(self) -> None:
"""Tear down test fixtures, if any."""

def test_read_from_json(self) -> None:
"""Test something."""
data = readfromjson("examples/bigexample.json")
Expand Down
6 changes: 2 additions & 4 deletions tests/test_rust_dicttoxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,8 @@ def test_item_wrap_false(self):
def test_list_headers(self):
data = {"colors": ["red", "green"]}
result = rust_dicttoxml(data, list_headers=True)
assert b"<colors" in result
assert b"<item type=\"str\">red</item>" in result
assert b"<item type=\"str\">green</item>" in result


class TestRustVsPythonCompatibility:
Expand Down Expand Up @@ -334,21 +335,18 @@ def test_item_wrap_false_matches(self):
rust, python = self.compare_outputs(data, item_wrap=False)
assert rust == python

@pytest.mark.xfail(reason="Rust list_headers implementation differs from Python - uses different wrapping semantics")
def test_list_headers_true_matches(self):
"""Test that list_headers=True produces matching output."""
data = {"items": ["one", "two", "three"]}
rust, python = self.compare_outputs(data, list_headers=True)
assert rust == python

@pytest.mark.xfail(reason="Rust item_wrap=False with nested dicts differs from Python - known limitation")
def test_item_wrap_false_with_nested_dict_matches(self):
"""Test item_wrap=False with nested dicts in list."""
data = {"users": [{"name": "Alice"}, {"name": "Bob"}]}
rust, python = self.compare_outputs(data, item_wrap=False)
assert rust == python

@pytest.mark.xfail(reason="Rust list_headers with nested structures differs from Python - known limitation")
def test_list_headers_with_nested_matches(self):
"""Test list_headers=True with nested structures."""
data = {"products": [{"id": 1, "name": "Widget"}, {"id": 2, "name": "Gadget"}]}
Expand Down
Loading