diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index ec753d95..61931001 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -19,18 +19,17 @@ jobs: fail-fast: false matrix: include: - - {os: windows-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"} - - {os: windows-latest, python: "3.11", dask-version: "latest", name: "Dask latest"} - - {os: ubuntu-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"} - - {os: ubuntu-latest, python: "3.11", dask-version: "latest", name: "Dask latest"} + - {os: windows-latest, python: "3.12", dask-version: "2025.2.0", name: "Dask 2025.2.0"} + - {os: windows-latest, python: "3.13", dask-version: "latest", name: "Dask latest"} + - {os: ubuntu-latest, python: "3.12", dask-version: "latest", name: "Dask latest"} - {os: ubuntu-latest, python: "3.13", dask-version: "latest", name: "Dask latest"} - - {os: macos-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"} - - {os: macos-latest, python: "3.11", dask-version: "latest", name: "Dask latest"} - - {os: macos-latest, python: "3.12", pip-flags: "--pre", name: "Python 3.12 (pre-release)"} + - {os: macos-latest, python: "3.12", dask-version: "latest", name: "Dask latest"} + - {os: macos-latest, python: "3.13", prerelease: "allow", name: "Python 3.13 (pre-release)"} env: OS: ${{ matrix.os }} PYTHON: ${{ matrix.python }} DASK_VERSION: ${{ matrix.dask-version }} + PRERELEASE: ${{ matrix.prerelease }} steps: - uses: actions/checkout@v2 @@ -41,7 +40,11 @@ jobs: python-version: ${{ matrix.python }} - name: Install dependencies run: | - uv sync --extra test + if [[ -n "${PRERELEASE}" ]]; then + uv sync --extra test --group simulate-prerelease --prerelease ${PRERELEASE} + else + uv sync --extra test + fi if [[ -n "${DASK_VERSION}" ]]; then if [[ "${DASK_VERSION}" == "latest" ]]; then uv pip install --upgrade dask diff --git a/.readthedocs.yaml b/.readthedocs.yaml index acecf90e..1b41715d 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,7 +3,7 @@ version: 2 build: os: ubuntu-20.04 tools: - python: "3.11" + python: "3.12" sphinx: configuration: docs/conf.py fail_on_warning: true diff --git a/pyproject.toml b/pyproject.toml index d61d6a2c..2f5cb4b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ maintainers = [ urls.Documentation = "https://spatialdata.scverse.org/en/latest" urls.Source = "https://github.com/scverse/spatialdata.git" urls.Home-page = "https://github.com/scverse/spatialdata.git" -requires-python = ">=3.11" +requires-python = ">=3.12" dynamic= [ "version" # allow version to be set by git tags ] @@ -87,6 +87,15 @@ extra = [ "spatialdata-io", ] +[dependency-groups] +simulate-prerelease = [ + # also use --prerelease allow, this is the full call: + # uv sync --extra test --group simulate-prerelease --prerelease allow --upgrade + # uv run --no-sync python + "geopandas @ git+https://github.com/geopandas/geopandas.git@main", + "anndata @ git+https://github.com/scverse/anndata.git@main", +] + [tool.coverage.run] source = ["spatialdata"] omit = [ diff --git a/src/spatialdata/_core/operations/rasterize_bins.py b/src/spatialdata/_core/operations/rasterize_bins.py index 17470812..f61d1758 100644 --- a/src/spatialdata/_core/operations/rasterize_bins.py +++ b/src/spatialdata/_core/operations/rasterize_bins.py @@ -246,7 +246,7 @@ def _get_relabeled_column_name(column_name: str) -> str: def _relabel_labels(table: AnnData, instance_key: str) -> pd.Series: labels_values_count = len(table.obs[instance_key].unique()) - is_not_numeric = not np.issubdtype(table.obs[instance_key].dtype, np.number) + is_not_numeric = not pd.api.types.is_numeric_dtype(table.obs[instance_key].dtype) zero_in_instance_key = 0 in table.obs[instance_key].values has_gaps = not is_not_numeric and labels_values_count != table.obs[instance_key].max() + int(zero_in_instance_key) diff --git a/src/spatialdata/_io/io_shapes.py b/src/spatialdata/_io/io_shapes.py index f204e387..8e6d4a60 100644 --- a/src/spatialdata/_io/io_shapes.py +++ b/src/spatialdata/_io/io_shapes.py @@ -121,7 +121,9 @@ def _write_shapes_v01(shapes: GeoDataFrame, group: zarr.Group, element_format: F """ import numcodecs - geometry, coords, offsets = to_ragged_array(shapes.geometry) + # np.array() creates a writable copy, needed for pandas 3.0 CoW compatibility + # https://github.com/geopandas/geopandas/issues/3697 + geometry, coords, offsets = to_ragged_array(np.array(shapes.geometry)) group.create_array(name="coords", data=coords) for i, o in enumerate(offsets): group.create_array(name=f"offset{i}", data=o) @@ -149,9 +151,16 @@ def _write_shapes_v02_v03(shapes: GeoDataFrame, group: zarr.Group, element_forma element_format The format of the shapes element used to store it. """ + from spatialdata.models._utils import TRANSFORM_KEY + store_root = group.store_path.store.root path = store_root / group.path / "shapes.parquet" + + # Temporarily remove transformations from attrs to avoid serialization issues + transforms = shapes.attrs[TRANSFORM_KEY] + del shapes.attrs[TRANSFORM_KEY] shapes.to_parquet(path) + shapes.attrs[TRANSFORM_KEY] = transforms attrs = element_format.attrs_to_dict(shapes.attrs) attrs["version"] = element_format.spatialdata_format_version diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index bed33ff1..5428c5c2 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -668,7 +668,11 @@ def validate(cls, data: DaskDataFrame) -> None: if ATTRS_KEY in data.attrs and "feature_key" in data.attrs[ATTRS_KEY]: feature_key = data.attrs[ATTRS_KEY][cls.FEATURE_KEY] if feature_key not in data.columns: - warnings.warn(f"Column `{feature_key}` not found." + SUGGESTION, UserWarning, stacklevel=2) + warnings.warn( + f"Column `{feature_key}` not found." + SUGGESTION, + UserWarning, + stacklevel=2, + ) @singledispatchmethod @classmethod @@ -1028,16 +1032,21 @@ def _validate_table_annotation_metadata(self, data: AnnData) -> None: raise ValueError(f"`{attr[self.REGION_KEY_KEY]}` not found in `adata.obs`. Please create the column.") if attr[self.INSTANCE_KEY] not in data.obs: raise ValueError(f"`{attr[self.INSTANCE_KEY]}` not found in `adata.obs`. Please create the column.") - if (dtype := data.obs[attr[self.INSTANCE_KEY]].dtype) not in [ - int, - np.int16, - np.uint16, - np.int32, - np.uint32, - np.int64, - np.uint64, - "O", - ] or (dtype == "O" and (val_dtype := type(data.obs[attr[self.INSTANCE_KEY]].iloc[0])) is not str): + if ( + (dtype := data.obs[attr[self.INSTANCE_KEY]].dtype) + not in [ + int, + np.int16, + np.uint16, + np.int32, + np.uint32, + np.int64, + np.uint64, + "O", + ] + and not pd.api.types.is_string_dtype(data.obs[attr[self.INSTANCE_KEY]]) + or (dtype == "O" and (val_dtype := type(data.obs[attr[self.INSTANCE_KEY]].iloc[0])) is not str) + ): dtype = dtype if dtype != "O" else val_dtype raise TypeError( f"Only int, np.int16, np.int32, np.int64, uint equivalents or string allowed as dtype for " diff --git a/tests/io/test_partial_read.py b/tests/io/test_partial_read.py index e200c1fa..7c5d4784 100644 --- a/tests/io/test_partial_read.py +++ b/tests/io/test_partial_read.py @@ -11,9 +11,9 @@ from pathlib import Path from typing import TYPE_CHECKING +import anndata import py import pytest -import zarr from pyarrow import ArrowInvalid from zarr.errors import ArrayNotFoundError, ZarrUserWarning @@ -397,21 +397,24 @@ def sdata_with_invalid_zarr_json_element_violating_spec(session_tmp_path: Path) ) -@pytest.fixture(scope="module") -def sdata_with_table_region_not_found_zarrv3(session_tmp_path: Path) -> PartialReadTestCase: +def _create_sdata_with_table_region_not_found(session_tmp_path: Path, zarr_version: int) -> PartialReadTestCase: + """Helper for table region not found test cases (zarr v2 and v3).""" # table/table/.zarr referring to a region that is not found # This has been emitting just a warning, but does not fail reading the table element. sdata = blobs() - sdata_path = session_tmp_path / "sdata_with_invalid_table_region_not_found_zarrv3.zarr" - sdata.write(sdata_path) + sdata_path = session_tmp_path / f"sdata_with_table_region_not_found_zarrv{zarr_version}.zarr" + if zarr_version == 2: + sdata.write(sdata_path, sdata_formats=SpatialDataContainerFormatV01()) + else: + sdata.write(sdata_path) corrupted = "blobs_labels" # The element data is missing sdata.delete_element_from_disk(corrupted) # But the labels element is referenced as a region in a table - regions = zarr.open_group(sdata_path / "tables" / "table" / "obs" / "region", mode="r") - arrs = dict(regions.arrays()) - assert corrupted in arrs["categories"][arrs["codes"]] + adata = anndata.read_zarr(sdata_path / "tables" / "table") + assert corrupted in adata.obs["region"].values + not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted] return PartialReadTestCase( @@ -425,30 +428,13 @@ def sdata_with_table_region_not_found_zarrv3(session_tmp_path: Path) -> PartialR @pytest.fixture(scope="module") -def sdata_with_table_region_not_found_zarrv2(session_tmp_path: Path) -> PartialReadTestCase: - # table/table/.zarr referring to a region that is not found - # This has been emitting just a warning, but does not fail reading the table element. - sdata = blobs() - sdata_path = session_tmp_path / "sdata_with_invalid_zattrs_table_region_not_found.zarr" - sdata.write(sdata_path, sdata_formats=SpatialDataContainerFormatV01()) +def sdata_with_table_region_not_found_zarrv3(session_tmp_path: Path) -> PartialReadTestCase: + return _create_sdata_with_table_region_not_found(session_tmp_path, zarr_version=3) - corrupted = "blobs_labels" - # The element data is missing - sdata.delete_element_from_disk(corrupted) - # But the labels element is referenced as a region in a table - regions = zarr.open_group(sdata_path / "tables" / "table" / "obs" / "region", mode="r") - arrs = dict(regions.arrays()) - assert corrupted in arrs["categories"][arrs["codes"]] - not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted] - return PartialReadTestCase( - path=sdata_path, - expected_elements=not_corrupted, - expected_exceptions=(), - warnings_patterns=[ - rf"The table is annotating '{re.escape(corrupted)}', which is not present in the SpatialData object" - ], - ) +@pytest.fixture(scope="module") +def sdata_with_table_region_not_found_zarrv2(session_tmp_path: Path) -> PartialReadTestCase: + return _create_sdata_with_table_region_not_found(session_tmp_path, zarr_version=2) @pytest.mark.parametrize(