Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,17 @@ jobs:
fail-fast: false
matrix:
include:
- {os: windows-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"}
- {os: windows-latest, python: "3.11", dask-version: "latest", name: "Dask latest"}
- {os: ubuntu-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"}
- {os: ubuntu-latest, python: "3.11", dask-version: "latest", name: "Dask latest"}
- {os: windows-latest, python: "3.12", dask-version: "2025.2.0", name: "Dask 2025.2.0"}
- {os: windows-latest, python: "3.13", dask-version: "latest", name: "Dask latest"}
- {os: ubuntu-latest, python: "3.12", dask-version: "latest", name: "Dask latest"}
- {os: ubuntu-latest, python: "3.13", dask-version: "latest", name: "Dask latest"}
- {os: macos-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"}
- {os: macos-latest, python: "3.11", dask-version: "latest", name: "Dask latest"}
- {os: macos-latest, python: "3.12", pip-flags: "--pre", name: "Python 3.12 (pre-release)"}
- {os: macos-latest, python: "3.12", dask-version: "latest", name: "Dask latest"}
- {os: macos-latest, python: "3.13", prerelease: "allow", name: "Python 3.13 (pre-release)"}
env:
OS: ${{ matrix.os }}
PYTHON: ${{ matrix.python }}
DASK_VERSION: ${{ matrix.dask-version }}
PRERELEASE: ${{ matrix.prerelease }}

steps:
- uses: actions/checkout@v2
Expand All @@ -41,7 +40,11 @@ jobs:
python-version: ${{ matrix.python }}
- name: Install dependencies
run: |
uv sync --extra test
if [[ -n "${PRERELEASE}" ]]; then
uv sync --extra test --group simulate-prerelease --prerelease ${PRERELEASE}
else
uv sync --extra test
fi
if [[ -n "${DASK_VERSION}" ]]; then
if [[ "${DASK_VERSION}" == "latest" ]]; then
uv pip install --upgrade dask
Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ version: 2
build:
os: ubuntu-20.04
tools:
python: "3.11"
python: "3.12"
sphinx:
configuration: docs/conf.py
fail_on_warning: true
Expand Down
11 changes: 10 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ maintainers = [
urls.Documentation = "https://spatialdata.scverse.org/en/latest"
urls.Source = "https://github.com/scverse/spatialdata.git"
urls.Home-page = "https://github.com/scverse/spatialdata.git"
requires-python = ">=3.11"
requires-python = ">=3.12"
dynamic= [
"version" # allow version to be set by git tags
]
Expand Down Expand Up @@ -87,6 +87,15 @@ extra = [
"spatialdata-io",
]

[dependency-groups]
simulate-prerelease = [
# also use --prerelease allow, this is the full call:
# uv sync --extra test --group simulate-prerelease --prerelease allow --upgrade
# uv run --no-sync python
"geopandas @ git+https://github.com/geopandas/geopandas.git@main",
"anndata @ git+https://github.com/scverse/anndata.git@main",
]

[tool.coverage.run]
source = ["spatialdata"]
omit = [
Expand Down
2 changes: 1 addition & 1 deletion src/spatialdata/_core/operations/rasterize_bins.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def _get_relabeled_column_name(column_name: str) -> str:
def _relabel_labels(table: AnnData, instance_key: str) -> pd.Series:
labels_values_count = len(table.obs[instance_key].unique())

is_not_numeric = not np.issubdtype(table.obs[instance_key].dtype, np.number)
is_not_numeric = not pd.api.types.is_numeric_dtype(table.obs[instance_key].dtype)
zero_in_instance_key = 0 in table.obs[instance_key].values
has_gaps = not is_not_numeric and labels_values_count != table.obs[instance_key].max() + int(zero_in_instance_key)

Expand Down
11 changes: 10 additions & 1 deletion src/spatialdata/_io/io_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,9 @@ def _write_shapes_v01(shapes: GeoDataFrame, group: zarr.Group, element_format: F
"""
import numcodecs

geometry, coords, offsets = to_ragged_array(shapes.geometry)
# np.array() creates a writable copy, needed for pandas 3.0 CoW compatibility
# https://github.com/geopandas/geopandas/issues/3697
geometry, coords, offsets = to_ragged_array(np.array(shapes.geometry))
group.create_array(name="coords", data=coords)
for i, o in enumerate(offsets):
group.create_array(name=f"offset{i}", data=o)
Expand Down Expand Up @@ -149,9 +151,16 @@ def _write_shapes_v02_v03(shapes: GeoDataFrame, group: zarr.Group, element_forma
element_format
The format of the shapes element used to store it.
"""
from spatialdata.models._utils import TRANSFORM_KEY

store_root = group.store_path.store.root
path = store_root / group.path / "shapes.parquet"

# Temporarily remove transformations from attrs to avoid serialization issues
transforms = shapes.attrs[TRANSFORM_KEY]
del shapes.attrs[TRANSFORM_KEY]
shapes.to_parquet(path)
shapes.attrs[TRANSFORM_KEY] = transforms

attrs = element_format.attrs_to_dict(shapes.attrs)
attrs["version"] = element_format.spatialdata_format_version
Expand Down
31 changes: 20 additions & 11 deletions src/spatialdata/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,11 @@ def validate(cls, data: DaskDataFrame) -> None:
if ATTRS_KEY in data.attrs and "feature_key" in data.attrs[ATTRS_KEY]:
feature_key = data.attrs[ATTRS_KEY][cls.FEATURE_KEY]
if feature_key not in data.columns:
warnings.warn(f"Column `{feature_key}` not found." + SUGGESTION, UserWarning, stacklevel=2)
warnings.warn(
f"Column `{feature_key}` not found." + SUGGESTION,
UserWarning,
stacklevel=2,
)

@singledispatchmethod
@classmethod
Expand Down Expand Up @@ -1028,16 +1032,21 @@ def _validate_table_annotation_metadata(self, data: AnnData) -> None:
raise ValueError(f"`{attr[self.REGION_KEY_KEY]}` not found in `adata.obs`. Please create the column.")
if attr[self.INSTANCE_KEY] not in data.obs:
raise ValueError(f"`{attr[self.INSTANCE_KEY]}` not found in `adata.obs`. Please create the column.")
if (dtype := data.obs[attr[self.INSTANCE_KEY]].dtype) not in [
int,
np.int16,
np.uint16,
np.int32,
np.uint32,
np.int64,
np.uint64,
"O",
] or (dtype == "O" and (val_dtype := type(data.obs[attr[self.INSTANCE_KEY]].iloc[0])) is not str):
if (
(dtype := data.obs[attr[self.INSTANCE_KEY]].dtype)
not in [
int,
np.int16,
np.uint16,
np.int32,
np.uint32,
np.int64,
np.uint64,
"O",
]
and not pd.api.types.is_string_dtype(data.obs[attr[self.INSTANCE_KEY]])
or (dtype == "O" and (val_dtype := type(data.obs[attr[self.INSTANCE_KEY]].iloc[0])) is not str)
):
dtype = dtype if dtype != "O" else val_dtype
raise TypeError(
f"Only int, np.int16, np.int32, np.int64, uint equivalents or string allowed as dtype for "
Expand Down
46 changes: 16 additions & 30 deletions tests/io/test_partial_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from pathlib import Path
from typing import TYPE_CHECKING

import anndata
import py
import pytest
import zarr
from pyarrow import ArrowInvalid
from zarr.errors import ArrayNotFoundError, ZarrUserWarning

Expand Down Expand Up @@ -397,21 +397,24 @@ def sdata_with_invalid_zarr_json_element_violating_spec(session_tmp_path: Path)
)


@pytest.fixture(scope="module")
def sdata_with_table_region_not_found_zarrv3(session_tmp_path: Path) -> PartialReadTestCase:
def _create_sdata_with_table_region_not_found(session_tmp_path: Path, zarr_version: int) -> PartialReadTestCase:
"""Helper for table region not found test cases (zarr v2 and v3)."""
# table/table/.zarr referring to a region that is not found
# This has been emitting just a warning, but does not fail reading the table element.
sdata = blobs()
sdata_path = session_tmp_path / "sdata_with_invalid_table_region_not_found_zarrv3.zarr"
sdata.write(sdata_path)
sdata_path = session_tmp_path / f"sdata_with_table_region_not_found_zarrv{zarr_version}.zarr"
if zarr_version == 2:
sdata.write(sdata_path, sdata_formats=SpatialDataContainerFormatV01())
else:
sdata.write(sdata_path)

corrupted = "blobs_labels"
# The element data is missing
sdata.delete_element_from_disk(corrupted)
# But the labels element is referenced as a region in a table
regions = zarr.open_group(sdata_path / "tables" / "table" / "obs" / "region", mode="r")
arrs = dict(regions.arrays())
assert corrupted in arrs["categories"][arrs["codes"]]
adata = anndata.read_zarr(sdata_path / "tables" / "table")
assert corrupted in adata.obs["region"].values

not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted]

return PartialReadTestCase(
Expand All @@ -425,30 +428,13 @@ def sdata_with_table_region_not_found_zarrv3(session_tmp_path: Path) -> PartialR


@pytest.fixture(scope="module")
def sdata_with_table_region_not_found_zarrv2(session_tmp_path: Path) -> PartialReadTestCase:
# table/table/.zarr referring to a region that is not found
# This has been emitting just a warning, but does not fail reading the table element.
sdata = blobs()
sdata_path = session_tmp_path / "sdata_with_invalid_zattrs_table_region_not_found.zarr"
sdata.write(sdata_path, sdata_formats=SpatialDataContainerFormatV01())
def sdata_with_table_region_not_found_zarrv3(session_tmp_path: Path) -> PartialReadTestCase:
return _create_sdata_with_table_region_not_found(session_tmp_path, zarr_version=3)

corrupted = "blobs_labels"
# The element data is missing
sdata.delete_element_from_disk(corrupted)
# But the labels element is referenced as a region in a table
regions = zarr.open_group(sdata_path / "tables" / "table" / "obs" / "region", mode="r")
arrs = dict(regions.arrays())
assert corrupted in arrs["categories"][arrs["codes"]]
not_corrupted = [name for _, name, _ in sdata.gen_elements() if name != corrupted]

return PartialReadTestCase(
path=sdata_path,
expected_elements=not_corrupted,
expected_exceptions=(),
warnings_patterns=[
rf"The table is annotating '{re.escape(corrupted)}', which is not present in the SpatialData object"
],
)
@pytest.fixture(scope="module")
def sdata_with_table_region_not_found_zarrv2(session_tmp_path: Path) -> PartialReadTestCase:
return _create_sdata_with_table_region_not_found(session_tmp_path, zarr_version=2)


@pytest.mark.parametrize(
Expand Down
Loading