diff --git a/src/toon_format/decoder.py b/src/toon_format/decoder.py index 90f0849..ee712bc 100644 --- a/src/toon_format/decoder.py +++ b/src/toon_format/decoder.py @@ -8,7 +8,8 @@ and validates array lengths and delimiters. """ -from typing import Any, Dict, List, Optional, Tuple +import json +from typing import Any, Dict, List, Optional, Tuple, Union from ._literal_utils import is_boolean_or_null_literal, is_numeric_literal from ._parsing_utils import ( @@ -228,18 +229,42 @@ def split_key_value(line: str) -> Tuple[str, str]: return (key, value) -def decode(input_str: str, options: Optional[DecodeOptions] = None) -> JsonValue: +def decode(input_str: str, options: Optional[DecodeOptions] = None) -> Union[JsonValue, str]: """Decode a TOON-formatted string to a Python value. + This function parses TOON format and returns the decoded data. By default, + it returns a Python object (dict, list, str, int, float, bool, or None). + + The DecodeOptions.json_indent parameter is a Python-specific feature that + enables returning a JSON-formatted string instead of a Python object. + This is useful for applications that need pretty-printed JSON output. + Args: - input_str: TOON-formatted string - options: Optional decoding options + input_str: TOON-formatted string to decode + options: Optional DecodeOptions with indent, strict, and json_indent + settings. If not provided, defaults are used (indent=2, + strict=True, json_indent=None). Returns: - Decoded Python value + By default (json_indent=None): Decoded Python value (object, array, + string, number, boolean, or null). + When json_indent is set: A JSON-formatted string with the specified + indentation level. Example: DecodeOptions(json_indent=2) returns + pretty-printed JSON with 2-space indentation. Raises: - ToonDecodeError: If input is malformed + ToonDecodeError: If input is malformed or violates strict-mode rules + ValueError: If json_indent is negative + + Example: + >>> toon = "name: Alice\\nage: 30" + >>> decode(toon) + {'name': 'Alice', 'age': 30} + >>> print(decode(toon, DecodeOptions(json_indent=2))) + { + "name": "Alice", + "age": 30 + } """ if options is None: options = DecodeOptions() @@ -273,32 +298,44 @@ def decode(input_str: str, options: Optional[DecodeOptions] = None) -> JsonValue # Check for empty input (per spec Section 8: empty/whitespace-only → empty object) non_blank_lines = [ln for ln in lines if not ln.is_blank] if not non_blank_lines: - return {} - - # Determine root form (Section 5) - first_line = non_blank_lines[0] - - # Check if it's a root array header - header_info = parse_header(first_line.content) - if header_info is not None and header_info[0] is None: # No key = root array - # Root array - return decode_array(lines, 0, 0, header_info, strict) + result: Any = {} + else: + # Determine root form (Section 5) + first_line = non_blank_lines[0] + + # Check if it's a root array header + header_info = parse_header(first_line.content) + if header_info is not None and header_info[0] is None: # No key = root array + # Root array + result = decode_array(lines, 0, 0, header_info, strict) + else: + # Check if it's a single primitive + if len(non_blank_lines) == 1: + line_content = first_line.content + # Check if it's not a key-value line + try: + split_key_value(line_content) + except ToonDecodeError: + # Not a key-value, check if it's a header + if header_info is None: + # Single primitive + result = parse_primitive(line_content) + else: + result = decode_object(lines, 0, 0, strict) + else: + # It's a key-value, so root object + result = decode_object(lines, 0, 0, strict) + else: + # Otherwise, root object + result = decode_object(lines, 0, 0, strict) - # Check if it's a single primitive - if len(non_blank_lines) == 1: - line_content = first_line.content - # Check if it's not a key-value line - try: - split_key_value(line_content) - # It's a key-value, so root object - except ToonDecodeError: - # Not a key-value, check if it's a header - if header_info is None: - # Single primitive - return parse_primitive(line_content) + # If json_indent is specified, return JSON-formatted string + if options.json_indent is not None: + if options.json_indent < 0: + raise ToonDecodeError(f"json_indent must be non-negative, got {options.json_indent}") + return json.dumps(result, indent=options.json_indent, ensure_ascii=False) - # Otherwise, root object - return decode_object(lines, 0, 0, strict) + return result def decode_object( diff --git a/src/toon_format/types.py b/src/toon_format/types.py index a000d5a..1962e33 100644 --- a/src/toon_format/types.py +++ b/src/toon_format/types.py @@ -52,12 +52,26 @@ class DecodeOptions: Attributes: indent: Number of spaces per indentation level (default: 2) + Used for parsing TOON format. strict: Enable strict validation (default: True) + Enforces spec conformance checks. + json_indent: Optional number of spaces for JSON output formatting + (default: None). When set, decode() returns a JSON-formatted + string instead of a Python object. This is a Python-specific + feature for convenient output formatting. When None, returns + a Python object as normal. Pass an integer (e.g., 2 or 4) + to enable pretty-printed JSON output. """ - def __init__(self, indent: int = 2, strict: bool = True) -> None: + def __init__( + self, + indent: int = 2, + strict: bool = True, + json_indent: Union[int, None] = None, + ) -> None: self.indent = indent self.strict = strict + self.json_indent = json_indent # Depth type for tracking indentation level diff --git a/tests/test_api.py b/tests/test_api.py index 8eff0b5..df6df92 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -10,8 +10,12 @@ Python type normalization is tested in test_normalization.py. """ +import json +from typing import Any, Dict, List, Tuple + import pytest +from tests.test_spec_fixtures import get_all_decode_fixtures from toon_format import ToonDecodeError, decode, encode from toon_format.types import DecodeOptions, EncodeOptions @@ -286,3 +290,118 @@ def test_roundtrip_with_length_marker(self): toon = encode(original, {"lengthMarker": "#"}) decoded = decode(toon) assert decoded == original + + +# TODO: Add targeted unit tests for decode()'s json_indent feature here. +# See Issue #10. For now, comprehensive tests are in +# TestDecodeJSONIndentationWithSpecFixtures. +@pytest.mark.skip( + reason="Placeholder for targeted decode() JSON indentation tests. See TODO above." +) +class TestDecodeJSONIndentation: + """Test decode() JSON indentation feature (Issue #10). + + Comprehensive tests for the json_indent feature are in + TestDecodeJSONIndentationWithSpecFixtures, which validates against official + TOON specification fixtures. + """ + + pass + + +def _get_sample_decode_fixtures() -> List[Tuple[str, Dict[str, Any]]]: + """Get a sample of decode test cases from fixture files for json_indent testing. + + Selects a few representative test cases from the official TOON spec fixtures. + """ + all_fixtures = get_all_decode_fixtures() + + # Select a few representative test cases from different fixture categories + selected_files = {"primitives.json", "arrays-primitive.json", "objects.json"} + test_cases = [] + + for test_id, test_data, fixture_name in all_fixtures: + if f"{fixture_name}.json" in selected_files and len(test_cases) < 9: + test_cases.append((test_id, test_data)) + + return test_cases + + +class TestDecodeJSONIndentationWithSpecFixtures: + """Test json_indent feature against spec fixtures to ensure comprehensive coverage. + + These tests validate that the json_indent feature works correctly with various + TOON format patterns defined in the official specification fixtures. + """ + + @pytest.mark.parametrize("test_id,test_data", _get_sample_decode_fixtures()) + def test_json_indent_produces_valid_json(self, test_id: str, test_data: Dict[str, Any]): + """Verify that json_indent produces valid JSON that can be parsed.""" + input_str = test_data["input"] + expected = test_data.get("expected") + should_error = test_data.get("shouldError", False) + + if should_error: + pytest.skip(f"Skipping error case: {test_id}") + return + + # Decode with json_indent=2 + result = decode(input_str, DecodeOptions(json_indent=2)) + + # Result should be a string (JSON) + assert isinstance(result, str), f"Expected string, got {type(result)} for {test_id}" + + # Result should be valid JSON + parsed = json.loads(result) + + # Parsed JSON should match the expected output from spec + assert parsed == expected, ( + f"JSON mismatch in {test_id}\n" + f"Input: {input_str!r}\n" + f"Expected: {expected!r}\n" + f"Got: {parsed!r}" + ) + + @pytest.mark.parametrize("test_id,test_data", _get_sample_decode_fixtures()) + def test_json_indent_with_different_indent_sizes(self, test_id: str, test_data: Dict[str, Any]): + """Verify that json_indent respects different indent sizes.""" + input_str = test_data["input"] + expected = test_data.get("expected") + should_error = test_data.get("shouldError", False) + + if should_error: + pytest.skip(f"Skipping error case: {test_id}") + return + + # Test with indent=2 + result_2 = decode(input_str, DecodeOptions(json_indent=2)) + parsed_2 = json.loads(result_2) + assert parsed_2 == expected + + # Test with indent=4 + result_4 = decode(input_str, DecodeOptions(json_indent=4)) + parsed_4 = json.loads(result_4) + assert parsed_4 == expected + + # Different indent sizes should produce different strings (unless single line) + if "\n" in result_2 and "\n" in result_4: + # Multi-line results should differ in formatting + # (indentation characters will be different) + assert result_2 != result_4, ( + "Different indent sizes should produce different formatting" + ) + + def test_json_indent_consistency_with_plain_decode(self): + """Verify that json_indent=None produces same data as plain decode.""" + toon = "user:\n name: Alice\n age: 30" + + # Decode as plain object + result_object = decode(toon) + + # Decode with json_indent=None + result_none = decode(toon, DecodeOptions(json_indent=None)) + + # Both should return the same dict + assert result_object == result_none + assert isinstance(result_object, dict) + assert isinstance(result_none, dict)