Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 67 additions & 30 deletions src/toon_format/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
and validates array lengths and delimiters.
"""

from typing import Any, Dict, List, Optional, Tuple
import json
from typing import Any, Dict, List, Optional, Tuple, Union

from ._literal_utils import is_boolean_or_null_literal, is_numeric_literal
from ._parsing_utils import (
Expand Down Expand Up @@ -228,18 +229,42 @@ def split_key_value(line: str) -> Tuple[str, str]:
return (key, value)


def decode(input_str: str, options: Optional[DecodeOptions] = None) -> JsonValue:
def decode(input_str: str, options: Optional[DecodeOptions] = None) -> Union[JsonValue, str]:
"""Decode a TOON-formatted string to a Python value.

This function parses TOON format and returns the decoded data. By default,
it returns a Python object (dict, list, str, int, float, bool, or None).

The DecodeOptions.json_indent parameter is a Python-specific feature that
enables returning a JSON-formatted string instead of a Python object.
This is useful for applications that need pretty-printed JSON output.

Args:
input_str: TOON-formatted string
options: Optional decoding options
input_str: TOON-formatted string to decode
options: Optional DecodeOptions with indent, strict, and json_indent
settings. If not provided, defaults are used (indent=2,
strict=True, json_indent=None).

Returns:
Decoded Python value
By default (json_indent=None): Decoded Python value (object, array,
string, number, boolean, or null).
When json_indent is set: A JSON-formatted string with the specified
indentation level. Example: DecodeOptions(json_indent=2) returns
pretty-printed JSON with 2-space indentation.

Raises:
ToonDecodeError: If input is malformed
ToonDecodeError: If input is malformed or violates strict-mode rules
ValueError: If json_indent is negative

Example:
>>> toon = "name: Alice\\nage: 30"
>>> decode(toon)
{'name': 'Alice', 'age': 30}
>>> print(decode(toon, DecodeOptions(json_indent=2)))
{
"name": "Alice",
"age": 30
}
"""
if options is None:
options = DecodeOptions()
Expand Down Expand Up @@ -273,32 +298,44 @@ def decode(input_str: str, options: Optional[DecodeOptions] = None) -> JsonValue
# Check for empty input (per spec Section 8: empty/whitespace-only → empty object)
non_blank_lines = [ln for ln in lines if not ln.is_blank]
if not non_blank_lines:
return {}

# Determine root form (Section 5)
first_line = non_blank_lines[0]

# Check if it's a root array header
header_info = parse_header(first_line.content)
if header_info is not None and header_info[0] is None: # No key = root array
# Root array
return decode_array(lines, 0, 0, header_info, strict)
result: Any = {}
else:
# Determine root form (Section 5)
first_line = non_blank_lines[0]

# Check if it's a root array header
header_info = parse_header(first_line.content)
if header_info is not None and header_info[0] is None: # No key = root array
# Root array
result = decode_array(lines, 0, 0, header_info, strict)
else:
# Check if it's a single primitive
if len(non_blank_lines) == 1:
line_content = first_line.content
# Check if it's not a key-value line
try:
split_key_value(line_content)
except ToonDecodeError:
# Not a key-value, check if it's a header
if header_info is None:
# Single primitive
result = parse_primitive(line_content)
else:
result = decode_object(lines, 0, 0, strict)
else:
# It's a key-value, so root object
result = decode_object(lines, 0, 0, strict)
else:
# Otherwise, root object
result = decode_object(lines, 0, 0, strict)

# Check if it's a single primitive
if len(non_blank_lines) == 1:
line_content = first_line.content
# Check if it's not a key-value line
try:
split_key_value(line_content)
# It's a key-value, so root object
except ToonDecodeError:
# Not a key-value, check if it's a header
if header_info is None:
# Single primitive
return parse_primitive(line_content)
# If json_indent is specified, return JSON-formatted string
if options.json_indent is not None:
if options.json_indent < 0:
raise ToonDecodeError(f"json_indent must be non-negative, got {options.json_indent}")
Copy link

Copilot AI Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error raised for negative json_indent values uses ToonDecodeError, but the docstring documents ValueError. Since json_indent is a formatting option unrelated to TOON format parsing, raising ValueError would be more semantically appropriate and consistent with the documented behavior.

Consider changing to:

raise ValueError(f"json_indent must be non-negative, got {options.json_indent}")
Suggested change
raise ToonDecodeError(f"json_indent must be non-negative, got {options.json_indent}")
raise ValueError(f"json_indent must be non-negative, got {options.json_indent}")

Copilot uses AI. Check for mistakes.
return json.dumps(result, indent=options.json_indent, ensure_ascii=False)

# Otherwise, root object
return decode_object(lines, 0, 0, strict)
return result


def decode_object(
Expand Down
16 changes: 15 additions & 1 deletion src/toon_format/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,26 @@ class DecodeOptions:

Attributes:
indent: Number of spaces per indentation level (default: 2)
Used for parsing TOON format.
strict: Enable strict validation (default: True)
Enforces spec conformance checks.
Comment on lines 54 to +57
Copy link

Copilot AI Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The comment on lines 55-57 uses inconsistent terminology compared to the rest of the codebase. Line 55 says "Used for parsing TOON format" while line 57 says "Enforces spec conformance checks." The wording for indent could be more precise to match the style of the strict parameter.

Consider revising for consistency:

indent: Number of spaces per indentation level (default: 2).
        Used for parsing TOON format input.
strict: Enable strict validation (default: True).
        Enforces TOON specification conformance checks.
Suggested change
indent: Number of spaces per indentation level (default: 2)
Used for parsing TOON format.
strict: Enable strict validation (default: True)
Enforces spec conformance checks.
indent: Number of spaces per indentation level (default: 2).
strict: Enable strict validation (default: True).
Enforces TOON specification conformance checks.

Copilot uses AI. Check for mistakes.
json_indent: Optional number of spaces for JSON output formatting
(default: None). When set, decode() returns a JSON-formatted
string instead of a Python object. This is a Python-specific
feature for convenient output formatting. When None, returns
a Python object as normal. Pass an integer (e.g., 2 or 4)
to enable pretty-printed JSON output.
"""

def __init__(self, indent: int = 2, strict: bool = True) -> None:
def __init__(
self,
indent: int = 2,
strict: bool = True,
json_indent: Union[int, None] = None,
) -> None:
self.indent = indent
self.strict = strict
self.json_indent = json_indent


# Depth type for tracking indentation level
Expand Down
119 changes: 119 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@
Python type normalization is tested in test_normalization.py.
"""

import json
from typing import Any, Dict, List, Tuple

import pytest

from tests.test_spec_fixtures import get_all_decode_fixtures
from toon_format import ToonDecodeError, decode, encode
from toon_format.types import DecodeOptions, EncodeOptions

Expand Down Expand Up @@ -286,3 +290,118 @@ def test_roundtrip_with_length_marker(self):
toon = encode(original, {"lengthMarker": "#"})
decoded = decode(toon)
assert decoded == original


# TODO: Add targeted unit tests for decode()'s json_indent feature here.
# See Issue #10. For now, comprehensive tests are in
# TestDecodeJSONIndentationWithSpecFixtures.
@pytest.mark.skip(
reason="Placeholder for targeted decode() JSON indentation tests. See TODO above."
)
class TestDecodeJSONIndentation:
"""Test decode() JSON indentation feature (Issue #10).

Comprehensive tests for the json_indent feature are in
TestDecodeJSONIndentationWithSpecFixtures, which validates against official
TOON specification fixtures.
"""

pass


Comment on lines +298 to +311
Copy link

Copilot AI Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The class TestDecodeJSONIndentation is marked with @pytest.mark.skip but contains only a pass statement. This creates dead code that provides no value.

Since the TODO comment indicates that "comprehensive tests are in TestDecodeJSONIndentationWithSpecFixtures," and that class already exists with actual tests, this entire empty skipped class should be removed to keep the codebase clean. If targeted unit tests are needed in the future, they can be added at that time.

Suggested change
@pytest.mark.skip(
reason="Placeholder for targeted decode() JSON indentation tests. See TODO above."
)
class TestDecodeJSONIndentation:
"""Test decode() JSON indentation feature (Issue #10).
Comprehensive tests for the json_indent feature are in
TestDecodeJSONIndentationWithSpecFixtures, which validates against official
TOON specification fixtures.
"""
pass

Copilot uses AI. Check for mistakes.
def _get_sample_decode_fixtures() -> List[Tuple[str, Dict[str, Any]]]:
"""Get a sample of decode test cases from fixture files for json_indent testing.

Selects a few representative test cases from the official TOON spec fixtures.
"""
all_fixtures = get_all_decode_fixtures()

# Select a few representative test cases from different fixture categories
selected_files = {"primitives.json", "arrays-primitive.json", "objects.json"}
test_cases = []

for test_id, test_data, fixture_name in all_fixtures:
if f"{fixture_name}.json" in selected_files and len(test_cases) < 9:
test_cases.append((test_id, test_data))
Comment on lines +323 to +325
Copy link

Copilot AI Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The unpacking in this loop is incorrect. get_all_decode_fixtures() returns tuples of (test_id, test_data, fixture_name) (3 elements), but the code unpacks only 2 elements into (test_id, test_data). This will cause a ValueError: too many values to unpack at runtime.

Fix the unpacking to include all three elements:

for test_id, test_data, fixture_name in all_fixtures:
    if f"{fixture_name}.json" in selected_files and len(test_cases) < 9:
        test_cases.append((test_id, test_data))

Copilot uses AI. Check for mistakes.

return test_cases


class TestDecodeJSONIndentationWithSpecFixtures:
"""Test json_indent feature against spec fixtures to ensure comprehensive coverage.

These tests validate that the json_indent feature works correctly with various
TOON format patterns defined in the official specification fixtures.
"""

@pytest.mark.parametrize("test_id,test_data", _get_sample_decode_fixtures())
def test_json_indent_produces_valid_json(self, test_id: str, test_data: Dict[str, Any]):
"""Verify that json_indent produces valid JSON that can be parsed."""
input_str = test_data["input"]
expected = test_data.get("expected")
should_error = test_data.get("shouldError", False)

if should_error:
pytest.skip(f"Skipping error case: {test_id}")
return

# Decode with json_indent=2
result = decode(input_str, DecodeOptions(json_indent=2))

# Result should be a string (JSON)
assert isinstance(result, str), f"Expected string, got {type(result)} for {test_id}"

# Result should be valid JSON
parsed = json.loads(result)

# Parsed JSON should match the expected output from spec
assert parsed == expected, (
f"JSON mismatch in {test_id}\n"
f"Input: {input_str!r}\n"
f"Expected: {expected!r}\n"
f"Got: {parsed!r}"
)

@pytest.mark.parametrize("test_id,test_data", _get_sample_decode_fixtures())
def test_json_indent_with_different_indent_sizes(self, test_id: str, test_data: Dict[str, Any]):
"""Verify that json_indent respects different indent sizes."""
input_str = test_data["input"]
expected = test_data.get("expected")
should_error = test_data.get("shouldError", False)

if should_error:
pytest.skip(f"Skipping error case: {test_id}")
return

# Test with indent=2
result_2 = decode(input_str, DecodeOptions(json_indent=2))
parsed_2 = json.loads(result_2)
assert parsed_2 == expected

# Test with indent=4
result_4 = decode(input_str, DecodeOptions(json_indent=4))
parsed_4 = json.loads(result_4)
assert parsed_4 == expected

# Different indent sizes should produce different strings (unless single line)
if "\n" in result_2 and "\n" in result_4:
# Multi-line results should differ in formatting
# (indentation characters will be different)
assert result_2 != result_4, (
"Different indent sizes should produce different formatting"
)

def test_json_indent_consistency_with_plain_decode(self):
"""Verify that json_indent=None produces same data as plain decode."""
toon = "user:\n name: Alice\n age: 30"

# Decode as plain object
result_object = decode(toon)

# Decode with json_indent=None
result_none = decode(toon, DecodeOptions(json_indent=None))

# Both should return the same dict
assert result_object == result_none
assert isinstance(result_object, dict)
assert isinstance(result_none, dict)