diff --git a/aredis_om/model/model.py b/aredis_om/model/model.py index 61d5d745..07180b8b 100644 --- a/aredis_om/model/model.py +++ b/aredis_om/model/model.py @@ -8,25 +8,10 @@ from copy import copy from enum import Enum from functools import reduce -from typing import ( - Any, - Callable, - Dict, - List, - Literal, - Mapping, - Optional, - Sequence, - Set, - Tuple, - Type, - TypeVar, - Union, -) +from typing import (Any, Callable, Dict, List, Literal, Mapping, Optional, + Sequence, Set, Tuple, Type, TypeVar, Union) from typing import get_args as typing_get_args -from typing import ( - no_type_check, -) +from typing import no_type_check from more_itertools import ichunked from pydantic import BaseModel @@ -2142,6 +2127,7 @@ def __init__(self, default: Any = ..., **kwargs: Any) -> None: full_text_search = kwargs.pop("full_text_search", None) vector_options = kwargs.pop("vector_options", None) expire = kwargs.pop("expire", None) + separator = kwargs.pop("separator", SINGLE_VALUE_TAG_FIELD_SEPARATOR) super().__init__(default=default, **kwargs) self.primary_key = primary_key self.sortable = sortable @@ -2150,6 +2136,7 @@ def __init__(self, default: Any = ..., **kwargs: Any) -> None: self.full_text_search = full_text_search self.vector_options = vector_options self.expire = expire + self.separator = separator class RelationshipInfo(Representation): @@ -2261,6 +2248,7 @@ def Field( full_text_search: Union[bool, UndefinedType] = Undefined, vector_options: Optional[VectorFieldOptions] = None, expire: Optional[int] = None, + separator: str = SINGLE_VALUE_TAG_FIELD_SEPARATOR, **kwargs: Unpack[_FromFieldInfoInputs], ) -> Any: """ @@ -2276,6 +2264,8 @@ def Field( vector_options: Vector field configuration for similarity search. expire: TTL in seconds for this field (HashModel only, requires Redis 7.4+). When set, the field will automatically expire after save(). + separator: TAG field separator character for RediSearch indexing. + Defaults to "|". Use "," for comma-separated multi-value fields. **kwargs: Additional Pydantic field options. Returns: @@ -2291,6 +2281,7 @@ def Field( full_text_search=full_text_search, vector_options=vector_options, expire=expire, + separator=separator, ) return field_info @@ -3286,9 +3277,10 @@ def schema_for_fields(cls): if getattr(field_info, "primary_key", None) is True: if issubclass(_type, str): - redisearch_field = ( - f"{name} TAG SEPARATOR {SINGLE_VALUE_TAG_FIELD_SEPARATOR}" + separator = getattr( + field_info, "separator", SINGLE_VALUE_TAG_FIELD_SEPARATOR ) + redisearch_field = f"{name} TAG SEPARATOR {separator}" else: redisearch_field = cls.schema_for_type(name, _type, field_info) schema_parts.append(redisearch_field) @@ -3346,13 +3338,15 @@ def schema_for_type(cls, name, typ: Any, field_info: PydanticFieldInfo): else: schema = f"{name} NUMERIC" elif issubclass(typ, str): + separator = getattr( + field_info, "separator", SINGLE_VALUE_TAG_FIELD_SEPARATOR + ) if getattr(field_info, "full_text_search", False) is True: schema = ( - f"{name} TAG SEPARATOR {SINGLE_VALUE_TAG_FIELD_SEPARATOR} " - f"{name} AS {name}_fts TEXT" + f"{name} TAG SEPARATOR {separator} " f"{name} AS {name}_fts TEXT" ) else: - schema = f"{name} TAG SEPARATOR {SINGLE_VALUE_TAG_FIELD_SEPARATOR}" + schema = f"{name} TAG SEPARATOR {separator}" elif issubclass(typ, RedisModel): sub_fields = [] for embedded_name, field in typ.model_fields.items(): @@ -3363,7 +3357,10 @@ def schema_for_type(cls, name, typ: Any, field_info: PydanticFieldInfo): ) schema = " ".join(sub_fields) else: - schema = f"{name} TAG SEPARATOR {SINGLE_VALUE_TAG_FIELD_SEPARATOR}" + separator = getattr( + field_info, "separator", SINGLE_VALUE_TAG_FIELD_SEPARATOR + ) + schema = f"{name} TAG SEPARATOR {separator}" if schema and sortable is True: schema += " SORTABLE" if schema and case_sensitive is True: @@ -3627,7 +3624,10 @@ def schema_for_fields(cls): if getattr(field_info, "primary_key", None) is True: if issubclass(_type, str): - redisearch_field = f"$.{name} AS {name} TAG SEPARATOR {SINGLE_VALUE_TAG_FIELD_SEPARATOR}" + separator = getattr( + field_info, "separator", SINGLE_VALUE_TAG_FIELD_SEPARATOR + ) + redisearch_field = f"$.{name} AS {name} TAG SEPARATOR {separator}" else: redisearch_field = cls.schema_for_type( json_path, name, "", _type, field_info @@ -3781,6 +3781,11 @@ def schema_for_type( else typ ) + # Get separator from field_info, defaulting to pipe + separator = getattr( + field_info, "separator", SINGLE_VALUE_TAG_FIELD_SEPARATOR + ) + if is_vector and vector_options: schema = f"{path} AS {index_field_name} {vector_options.schema}" elif parent_is_container_type or parent_is_model_in_container: @@ -3795,7 +3800,7 @@ def schema_for_type( f"search. Problem field: {name}. Docs: {ERRORS_URL}#E13" ) # List/tuple fields are indexed as TAG fields and can be sortable - schema = f"{path} AS {index_field_name} TAG SEPARATOR {SINGLE_VALUE_TAG_FIELD_SEPARATOR}" + schema = f"{path} AS {index_field_name} TAG SEPARATOR {separator}" if sortable is True: schema += " SORTABLE" if case_sensitive is True: @@ -3815,7 +3820,7 @@ def schema_for_type( elif issubclass(typ, str): if full_text_search is True: schema = ( - f"{path} AS {index_field_name} TAG SEPARATOR {SINGLE_VALUE_TAG_FIELD_SEPARATOR} " + f"{path} AS {index_field_name} TAG SEPARATOR {separator} " f"{path} AS {index_field_name}_fts TEXT" ) if sortable is True: @@ -3829,14 +3834,14 @@ def schema_for_type( raise RedisModelError("Text fields cannot be case-sensitive.") else: # String fields are indexed as TAG fields and can be sortable - schema = f"{path} AS {index_field_name} TAG SEPARATOR {SINGLE_VALUE_TAG_FIELD_SEPARATOR}" + schema = f"{path} AS {index_field_name} TAG SEPARATOR {separator}" if sortable is True: schema += " SORTABLE" if case_sensitive is True: schema += " CASESENSITIVE" else: # Default to TAG field, which can be sortable - schema = f"{path} AS {index_field_name} TAG SEPARATOR {SINGLE_VALUE_TAG_FIELD_SEPARATOR}" + schema = f"{path} AS {index_field_name} TAG SEPARATOR {separator}" if sortable is True: schema += " SORTABLE" diff --git a/tests/test_tag_separator.py b/tests/test_tag_separator.py new file mode 100644 index 00000000..0520b875 --- /dev/null +++ b/tests/test_tag_separator.py @@ -0,0 +1,302 @@ +# type: ignore +"""Tests for issue #488: TAG field separator support. + +This module tests that the `separator` parameter in Field() is properly +supported for TAG fields in both HashModel and JsonModel, allowing users +to specify custom separators (e.g., comma) instead of the default pipe. +""" + +import abc + +import pytest +import pytest_asyncio +# We need to run this check as sync code (during tests) even in async mode +from redis_om import has_redisearch + +from aredis_om import Field, HashModel, JsonModel, Migrator +from aredis_om.model.model import SINGLE_VALUE_TAG_FIELD_SEPARATOR + +from .conftest import py_test_mark_asyncio + +if not has_redisearch(): + pytestmark = pytest.mark.skip + + +class TestFieldSeparatorParameter: + """Test that Field() accepts and stores the separator parameter.""" + + def test_field_accepts_separator_parameter(self): + """Field() should accept a separator parameter without error.""" + field = Field(index=True, separator=",") + assert hasattr(field, "separator") + assert field.separator == "," + + def test_field_default_separator_is_pipe(self): + """Field() without separator should default to pipe.""" + field = Field(index=True) + assert hasattr(field, "separator") + assert field.separator == SINGLE_VALUE_TAG_FIELD_SEPARATOR + assert field.separator == "|" + + def test_field_accepts_various_separators(self): + """Field() should accept various separator characters.""" + for sep in [",", ";", ":", "/", "-", "_"]: + field = Field(index=True, separator=sep) + assert field.separator == sep + + +@pytest_asyncio.fixture +async def separator_models(key_prefix, redis): + """Fixture providing models with custom separators for testing.""" + + class BaseHashModel(HashModel, abc.ABC): + class Meta: + global_key_prefix = key_prefix + database = redis + + class BaseJsonModel(JsonModel, abc.ABC): + class Meta: + global_key_prefix = key_prefix + database = redis + + class HashDocWithCustomSeparator(BaseHashModel, index=True): + name: str = Field(index=True) + tags: str = Field(index=True, separator=",") + + class JsonDocWithCustomSeparator(BaseJsonModel, index=True): + name: str = Field(index=True) + tags: str = Field(index=True, separator=",") + + class HashDocWithDefaultSeparator(BaseHashModel, index=True): + name: str = Field(index=True) + tags: str = Field(index=True) # Default pipe separator + + class JsonDocWithDefaultSeparator(BaseJsonModel, index=True): + name: str = Field(index=True) + tags: str = Field(index=True) # Default pipe separator + + await Migrator(conn=redis).run() + + return { + "HashCustom": HashDocWithCustomSeparator, + "JsonCustom": JsonDocWithCustomSeparator, + "HashDefault": HashDocWithDefaultSeparator, + "JsonDefault": JsonDocWithDefaultSeparator, + } + + +class TestSchemaGeneration: + """Test that separator is used in schema generation.""" + + @py_test_mark_asyncio + async def test_hash_model_schema_uses_custom_separator(self, separator_models): + """HashModel should use custom separator in FT.CREATE schema.""" + HashCustom = separator_models["HashCustom"] + schema = HashCustom.redisearch_schema() + + # The tags field should use comma separator + assert "tags TAG SEPARATOR ," in schema + + @py_test_mark_asyncio + async def test_json_model_schema_uses_custom_separator(self, separator_models): + """JsonModel should use custom separator in FT.CREATE schema.""" + JsonCustom = separator_models["JsonCustom"] + schema = JsonCustom.redisearch_schema() + + # The tags field should use comma separator + assert "tags TAG SEPARATOR ," in schema + + @py_test_mark_asyncio + async def test_hash_model_schema_uses_default_separator(self, separator_models): + """HashModel without custom separator should use pipe.""" + HashDefault = separator_models["HashDefault"] + schema = HashDefault.redisearch_schema() + + # The tags field should use default pipe separator + assert "tags TAG SEPARATOR |" in schema + + @py_test_mark_asyncio + async def test_json_model_schema_uses_default_separator(self, separator_models): + """JsonModel without custom separator should use pipe.""" + JsonDefault = separator_models["JsonDefault"] + schema = JsonDefault.redisearch_schema() + + # The tags field should use default pipe separator + assert "tags TAG SEPARATOR |" in schema + + +class TestEndToEndWithCustomSeparator: + """End-to-end tests for custom separator functionality.""" + + @py_test_mark_asyncio + async def test_hash_model_save_and_query_with_custom_separator( + self, separator_models + ): + """HashModel with custom separator should save and query correctly.""" + HashCustom = separator_models["HashCustom"] + + # Save a document with comma-separated tags + doc = HashCustom(name="Test Doc", tags="tag1,tag2,tag3") + await doc.save() + + # Query should find the document + results = await HashCustom.find(HashCustom.name == "Test Doc").all() + assert len(results) == 1 + assert results[0].tags == "tag1,tag2,tag3" + + @py_test_mark_asyncio + async def test_json_model_save_and_query_with_custom_separator( + self, separator_models + ): + """JsonModel with custom separator should save and query correctly.""" + JsonCustom = separator_models["JsonCustom"] + + # Save a document with comma-separated tags + doc = JsonCustom(name="Test Doc", tags="tag1,tag2,tag3") + await doc.save() + + # Query should find the document + results = await JsonCustom.find(JsonCustom.name == "Test Doc").all() + assert len(results) == 1 + assert results[0].tags == "tag1,tag2,tag3" + + @py_test_mark_asyncio + async def test_hash_model_query_individual_tag_with_custom_separator( + self, separator_models + ): + """HashModel should be able to query individual tags with custom separator.""" + HashCustom = separator_models["HashCustom"] + + # Save documents with different tags + doc1 = HashCustom(name="Doc 1", tags="python,redis") + doc2 = HashCustom(name="Doc 2", tags="javascript,redis") + doc3 = HashCustom(name="Doc 3", tags="python,mongodb") + await doc1.save() + await doc2.save() + await doc3.save() + + # Query for documents with "redis" tag + results = await HashCustom.find(HashCustom.tags == "redis").all() + assert len(results) == 2 + names = {r.name for r in results} + assert names == {"Doc 1", "Doc 2"} + + # Query for documents with "python" tag + results = await HashCustom.find(HashCustom.tags == "python").all() + assert len(results) == 2 + names = {r.name for r in results} + assert names == {"Doc 1", "Doc 3"} + + @py_test_mark_asyncio + async def test_json_model_query_individual_tag_with_custom_separator( + self, separator_models + ): + """JsonModel should be able to query individual tags with custom separator.""" + JsonCustom = separator_models["JsonCustom"] + + # Save documents with different tags + doc1 = JsonCustom(name="Doc 1", tags="python,redis") + doc2 = JsonCustom(name="Doc 2", tags="javascript,redis") + doc3 = JsonCustom(name="Doc 3", tags="python,mongodb") + await doc1.save() + await doc2.save() + await doc3.save() + + # Query for documents with "redis" tag + results = await JsonCustom.find(JsonCustom.tags == "redis").all() + assert len(results) == 2 + names = {r.name for r in results} + assert names == {"Doc 1", "Doc 2"} + + # Query for documents with "python" tag + results = await JsonCustom.find(JsonCustom.tags == "python").all() + assert len(results) == 2 + names = {r.name for r in results} + assert names == {"Doc 1", "Doc 3"} + + @py_test_mark_asyncio + async def test_default_separator_still_works(self, separator_models): + """Default pipe separator should continue to work correctly.""" + HashDefault = separator_models["HashDefault"] + + # Save documents with pipe-separated tags + doc1 = HashDefault(name="Doc 1", tags="python|redis") + doc2 = HashDefault(name="Doc 2", tags="javascript|redis") + await doc1.save() + await doc2.save() + + # Query for documents with "redis" tag + results = await HashDefault.find(HashDefault.tags == "redis").all() + assert len(results) == 2 + names = {r.name for r in results} + assert names == {"Doc 1", "Doc 2"} + + +class TestFullTextSearchWithCustomSeparator: + """Test full-text search fields with custom separator.""" + + @py_test_mark_asyncio + async def test_full_text_search_schema_uses_custom_separator( + self, key_prefix, redis + ): + """Full-text search fields should use custom separator in schema.""" + + class DocWithFTS(JsonModel, index=True): + title: str = Field(index=True, full_text_search=True, separator=",") + + class Meta: + global_key_prefix = key_prefix + database = redis + + schema = DocWithFTS.redisearch_schema() + + # Should have TAG with custom separator AND TEXT field + assert "title TAG SEPARATOR ," in schema + assert "title_fts TEXT" in schema + + +class TestEdgeCases: + """Test edge cases for separator functionality.""" + + def test_separator_with_non_indexed_field(self): + """Separator on non-indexed field should be stored but not affect schema.""" + field = Field(index=False, separator=",") + assert field.separator == "," + + @py_test_mark_asyncio + async def test_multiple_fields_with_different_separators(self, key_prefix, redis): + """Multiple fields can have different separators.""" + + class MultiSepDoc(JsonModel, index=True): + comma_tags: str = Field(index=True, separator=",") + semicolon_tags: str = Field(index=True, separator=";") + default_tags: str = Field(index=True) # Default pipe + + class Meta: + global_key_prefix = key_prefix + database = redis + + schema = MultiSepDoc.redisearch_schema() + + assert "comma_tags TAG SEPARATOR ," in schema + assert "semicolon_tags TAG SEPARATOR ;" in schema + assert "default_tags TAG SEPARATOR |" in schema + + @py_test_mark_asyncio + async def test_primary_key_uses_default_separator(self, key_prefix, redis): + """Primary key fields should use default separator.""" + + class DocWithPK(JsonModel, index=True): + custom_pk: str = Field(primary_key=True) + tags: str = Field(index=True, separator=",") + + class Meta: + global_key_prefix = key_prefix + database = redis + + schema = DocWithPK.redisearch_schema() + + # Primary key should use default separator + assert "custom_pk TAG SEPARATOR |" in schema + # Tags should use custom separator + assert "tags TAG SEPARATOR ," in schema