From 8267d3a6ae32490dde416a02219f16b336674207 Mon Sep 17 00:00:00 2001
From: Sigfried Gold <sigfried@sigfried.org>
Date: Tue, 17 Mar 2026 14:40:19 -0400
Subject: [PATCH 1/4] Forward schema_path/target_class to linkml's delimited
 file loader
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pass through the new schema-aware loading params so that string-ranged
and enum-ranged columns in TSV/CSV files are not coerced to int/float.

Requires linkml >=1.11 (PR linkml/linkml#3289 added schema-awareness to
the underlying _DelimitedFileLoader; released in v1.11.0).

Also imports nothing new from `typing` — the new annotations use PEP 604
`X | Y | None` syntax to match the rest of the file's style.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/linkml_map/loaders/data_loaders.py |  45 +++++++-
 tests/test_loaders/test_data_loader.py | 154 +++++++++++++++++++++++++
 2 files changed, 195 insertions(+), 4 deletions(-)

diff --git a/src/linkml_map/loaders/data_loaders.py b/src/linkml_map/loaders/data_loaders.py
index 42d542e5..63c796dd 100644
--- a/src/linkml_map/loaders/data_loaders.py
+++ b/src/linkml_map/loaders/data_loaders.py
@@ -83,16 +83,25 @@ def __init__(
         self,
         source: str | Path,
         skip_empty_rows: bool = True,
+        schema_path: str | Path | None = None,
+        target_class: str | None = None,
     ) -> None:
         """Initialize TSV loader."""
         super().__init__(source)
         self.skip_empty_rows = skip_empty_rows
+        self.schema_path = schema_path
+        self.target_class = target_class
 
     def iter_instances(self) -> Iterator[dict[str, Any]]:
         """Iterate over rows from the TSV file."""
         from linkml.validator.loaders import TsvLoader
 
-        loader = TsvLoader(str(self.source), skip_empty_rows=self.skip_empty_rows)
+        loader = TsvLoader(
+            str(self.source),
+            skip_empty_rows=self.skip_empty_rows,
+            schema_path=self.schema_path,
+            target_class=self.target_class,
+        )
         yield from loader.iter_instances()
 
 
@@ -103,22 +112,34 @@ def __init__(
         self,
         source: str | Path,
         skip_empty_rows: bool = True,
+        schema_path: str | Path | None = None,
+        target_class: str | None = None,
     ) -> None:
         """Initialize CSV loader."""
         super().__init__(source)
         self.skip_empty_rows = skip_empty_rows
+        self.schema_path = schema_path
+        self.target_class = target_class
 
     def iter_instances(self) -> Iterator[dict[str, Any]]:
         """Iterate over rows from the CSV file."""
         from linkml.validator.loaders import CsvLoader
 
-        loader = CsvLoader(str(self.source), skip_empty_rows=self.skip_empty_rows)
+        loader = CsvLoader(
+            str(self.source),
+            skip_empty_rows=self.skip_empty_rows,
+            schema_path=self.schema_path,
+            target_class=self.target_class,
+        )
         yield from loader.iter_instances()
 
 
 def get_file_loader(
     path: str | Path,
     file_format: FileFormat | None = None,
+    *,
+    schema_path: str | Path | None = None,
+    target_class: str | None = None,
     **kwargs: Any,
 ) -> BaseFileLoader:
     """
@@ -126,6 +147,8 @@ def get_file_loader(
 
     :param path: Path to the file
     :param file_format: Explicit file format (auto-detected from extension if not provided)
+    :param schema_path: Path to the LinkML schema (enables schema-aware type coercion for TSV/CSV)
+    :param target_class: Target class name within the schema
     :param kwargs: Additional arguments passed to the loader
     :return: Appropriate file loader instance
     """
@@ -144,6 +167,10 @@ def get_file_loader(
         msg = f"No loader available for format: {file_format}"
         raise ValueError(msg)
 
+    if file_format in (FileFormat.TSV, FileFormat.CSV):
+        kwargs["schema_path"] = schema_path
+        kwargs["target_class"] = target_class
+
     return loader_class(path, **kwargs)
 
 
@@ -175,6 +202,8 @@ def __init__(
         base_path: str | Path,
         default_format: FileFormat | None = None,
         skip_empty_rows: bool = True,
+        schema_path: str | Path | None = None,
+        target_class: str | None = None,
     ) -> None:
         """
         Initialize the data loader.
@@ -182,6 +211,8 @@ def __init__(
         :param base_path: Base directory containing data files, or a single file path
         :param default_format: Default format to use when extension is ambiguous
         :param skip_empty_rows: Skip empty rows in tabular files (default: True)
+        :param schema_path: Path to the LinkML schema (enables schema-aware type coercion for TSV/CSV)
+        :param target_class: Target class name within the schema
         :raises FileNotFoundError: If the path does not exist
         """
         self.base_path = Path(base_path)
@@ -190,6 +221,8 @@ def __init__(
             raise FileNotFoundError(msg)
         self.default_format = default_format
         self.skip_empty_rows = skip_empty_rows
+        self.schema_path = schema_path
+        self.target_class = target_class
 
     @property
     def is_single_file(self) -> bool:
@@ -282,7 +315,9 @@ def __getitem__(self, identifier: str) -> Iterator[dict[str, Any]]:
         if file_format in (FileFormat.TSV, FileFormat.CSV):
             loader_kwargs["skip_empty_rows"] = self.skip_empty_rows
 
-        loader = get_file_loader(file_path, **loader_kwargs)
+        loader = get_file_loader(
+            file_path, schema_path=self.schema_path, target_class=self.target_class, **loader_kwargs
+        )
         return loader.iter_instances()
 
     def __iter__(self) -> Iterator[dict[str, Any]]:
@@ -296,7 +331,9 @@ def __iter__(self) -> Iterator[dict[str, Any]]:
         if file_format in (FileFormat.TSV, FileFormat.CSV):
             loader_kwargs["skip_empty_rows"] = self.skip_empty_rows
 
-        loader = get_file_loader(self.base_path, **loader_kwargs)
+        loader = get_file_loader(
+            self.base_path, schema_path=self.schema_path, target_class=self.target_class, **loader_kwargs
+        )
         yield from loader.iter_instances()
 
     def get_available_identifiers(self) -> list[str]:
diff --git a/tests/test_loaders/test_data_loader.py b/tests/test_loaders/test_data_loader.py
index 36e79439..0cda0d68 100644
--- a/tests/test_loaders/test_data_loader.py
+++ b/tests/test_loaders/test_data_loader.py
@@ -7,6 +7,34 @@
 import yaml
 
 from linkml_map.loaders import DataLoader, FileFormat, load_data_file
+from linkml_map.loaders.data_loaders import CsvFileLoader, TsvFileLoader, get_file_loader
+
+SCHEMA_WITH_ENUM = {
+    "id": "https://example.org/test",
+    "name": "test",
+    "prefixes": {"linkml": "https://w3id.org/linkml/"},
+    "imports": ["linkml:types"],
+    "default_range": "string",
+    "classes": {
+        "Record": {
+            "attributes": {
+                "id": {"range": "integer", "identifier": True},
+                "zipcode": {"range": "string"},
+                "score": {"range": "score_enum"},
+                "weight": {"range": "float"},
+            }
+        }
+    },
+    "enums": {
+        "score_enum": {
+            "permissible_values": {
+                "1": {"description": "Low"},
+                "2": {"description": "Medium"},
+                "3": {"description": "High"},
+            }
+        }
+    },
+}
 
 
 @pytest.fixture
@@ -313,3 +341,129 @@ def test_skip_empty_rows_false(self, tmp_path: Path) -> None:
         assert rows[1]["id"] == "P:002"
         # Empty string values are not included in the dict by linkml's loader
         assert "name" not in rows[1] or rows[1].get("name") == ""
+
+
+# --- Schema-aware loading tests ---
+# These verify that schema_path/target_class flow through to the underlying
+# linkml loader so that string-ranged and enum-ranged columns are not
+# coerced to int/float.
+
+
+@pytest.fixture()
+def schema_file(tmp_path: Path) -> Path:
+    """Write the test schema to a YAML file and return its path."""
+    path = tmp_path / "schema.yaml"
+    path.write_text(yaml.dump(SCHEMA_WITH_ENUM))
+    return path
+
+
+@pytest.fixture()
+def schema_aware_tsv(tmp_path: Path) -> Path:
+    """TSV with numeric-looking values in string and enum columns."""
+    path = tmp_path / "Record.tsv"
+    path.write_text("id\tzipcode\tscore\tweight\n1\t90210\t2\t3.5\n")
+    return path
+
+
+@pytest.fixture()
+def schema_aware_csv(tmp_path: Path) -> Path:
+    """CSV with numeric-looking values in string and enum columns."""
+    path = tmp_path / "Record.csv"
+    path.write_text("id,zipcode,score,weight\n1,90210,2,3.5\n")
+    return path
+
+
+def _assert_schema_aware_row(row: dict) -> None:
+    """Shared assertions for schema-aware loading: string/enum columns stay strings."""
+    assert row["id"] == 1
+    assert isinstance(row["id"], int)
+    assert row["zipcode"] == "90210"
+    assert isinstance(row["zipcode"], str)
+    assert row["score"] == "2"
+    assert isinstance(row["score"], str)
+    assert row["weight"] == 3.5
+    assert isinstance(row["weight"], float)
+
+
+class TestSchemaAwareTsvFileLoader:
+    """TsvFileLoader preserves string/enum columns when given a schema."""
+
+    def test_with_schema(self, schema_aware_tsv: Path, schema_file: Path) -> None:
+        loader = TsvFileLoader(schema_aware_tsv, schema_path=schema_file, target_class="Record")
+        row = next(loader.iter_instances())
+        _assert_schema_aware_row(row)
+
+    def test_without_schema_coerces(self, schema_aware_tsv: Path) -> None:
+        loader = TsvFileLoader(schema_aware_tsv)
+        row = next(loader.iter_instances())
+        assert isinstance(row["zipcode"], int)
+        assert isinstance(row["score"], int)
+
+
+class TestSchemaAwareCsvFileLoader:
+    """CsvFileLoader preserves string/enum columns when given a schema."""
+
+    def test_with_schema(self, schema_aware_csv: Path, schema_file: Path) -> None:
+        loader = CsvFileLoader(schema_aware_csv, schema_path=schema_file, target_class="Record")
+        row = next(loader.iter_instances())
+        _assert_schema_aware_row(row)
+
+    def test_without_schema_coerces(self, schema_aware_csv: Path) -> None:
+        loader = CsvFileLoader(schema_aware_csv)
+        row = next(loader.iter_instances())
+        assert isinstance(row["zipcode"], int)
+        assert isinstance(row["score"], int)
+
+
+class TestSchemaAwareGetFileLoader:
+    """get_file_loader forwards schema params to TSV/CSV loaders."""
+
+    @pytest.mark.parametrize("fixture_name", ["schema_aware_tsv", "schema_aware_csv"])
+    def test_with_schema(self, fixture_name: str, schema_file: Path, request: pytest.FixtureRequest) -> None:
+        data_file = request.getfixturevalue(fixture_name)
+        loader = get_file_loader(data_file, schema_path=schema_file, target_class="Record")
+        row = next(loader.iter_instances())
+        _assert_schema_aware_row(row)
+
+    def test_ignored_for_yaml(self, tmp_path: Path, schema_file: Path) -> None:
+        """schema_path/target_class are accepted but ignored for non-tabular formats."""
+        yaml_path = tmp_path / "data.yaml"
+        yaml_path.write_text(yaml.dump({"id": 1, "zipcode": "90210"}))
+        loader = get_file_loader(yaml_path, schema_path=schema_file, target_class="Record")
+        row = next(loader.iter_instances())
+        assert row["id"] == 1
+
+
+class TestSchemaAwareDataLoader:
+    """DataLoader forwards schema params through to underlying loaders."""
+
+    def test_single_file_with_schema(
+        self, schema_aware_tsv: Path, schema_file: Path
+    ) -> None:
+        loader = DataLoader(schema_aware_tsv, schema_path=schema_file, target_class="Record")
+        row = next(iter(loader))
+        _assert_schema_aware_row(row)
+
+    def test_directory_with_schema(self, tmp_path: Path, schema_file: Path) -> None:
+        tsv_path = tmp_path / "Record.tsv"
+        tsv_path.write_text("id\tzipcode\tscore\tweight\n1\t90210\t2\t3.5\n")
+        loader = DataLoader(tmp_path, schema_path=schema_file, target_class="Record")
+        row = next(loader["Record"])
+        _assert_schema_aware_row(row)
+
+    def test_directory_without_schema_coerces(self, tmp_path: Path) -> None:
+        tsv_path = tmp_path / "Record.tsv"
+        tsv_path.write_text("id\tzipcode\tscore\tweight\n1\t90210\t2\t3.5\n")
+        loader = DataLoader(tmp_path)
+        row = next(loader["Record"])
+        assert isinstance(row["zipcode"], int)
+
+    def test_iter_sources_with_schema(
+        self, schema_aware_tsv: Path, schema_file: Path
+    ) -> None:
+        loader = DataLoader(schema_aware_tsv, schema_path=schema_file, target_class="Record")
+        sources = list(loader.iter_sources())
+        assert len(sources) == 1
+        _, rows = sources[0]
+        row = next(rows)
+        _assert_schema_aware_row(row)

From ba1d4d422b25cb54989313c934f1b7f88154485a Mon Sep 17 00:00:00 2001
From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com>
Date: Fri, 22 May 2026 15:00:22 -0500
Subject: [PATCH 2/4] Simplify schema-param wiring in get_file_loader

Drop the kwargs-only schema_path/target_class from get_file_loader and
let them flow through **kwargs to the loader class. The DataLoader call
sites add them to the same conditional block that already gates
skip_empty_rows, so YAML/JSON paths never see them. Passing them to a
non-tabular loader now raises TypeError instead of being silently
swallowed.
---
 src/linkml_map/loaders/data_loaders.py | 23 +++++++----------------
 tests/test_loaders/test_data_loader.py | 17 ++++++-----------
 2 files changed, 13 insertions(+), 27 deletions(-)

diff --git a/src/linkml_map/loaders/data_loaders.py b/src/linkml_map/loaders/data_loaders.py
index 63c796dd..a5423bb7 100644
--- a/src/linkml_map/loaders/data_loaders.py
+++ b/src/linkml_map/loaders/data_loaders.py
@@ -137,9 +137,6 @@ def iter_instances(self) -> Iterator[dict[str, Any]]:
 def get_file_loader(
     path: str | Path,
     file_format: FileFormat | None = None,
-    *,
-    schema_path: str | Path | None = None,
-    target_class: str | None = None,
     **kwargs: Any,
 ) -> BaseFileLoader:
     """
@@ -147,9 +144,7 @@ def get_file_loader(
 
     :param path: Path to the file
     :param file_format: Explicit file format (auto-detected from extension if not provided)
-    :param schema_path: Path to the LinkML schema (enables schema-aware type coercion for TSV/CSV)
-    :param target_class: Target class name within the schema
-    :param kwargs: Additional arguments passed to the loader
+    :param kwargs: Additional arguments passed to the loader class
     :return: Appropriate file loader instance
     """
     if file_format is None:
@@ -167,10 +162,6 @@ def get_file_loader(
         msg = f"No loader available for format: {file_format}"
         raise ValueError(msg)
 
-    if file_format in (FileFormat.TSV, FileFormat.CSV):
-        kwargs["schema_path"] = schema_path
-        kwargs["target_class"] = target_class
-
     return loader_class(path, **kwargs)
 
 
@@ -314,10 +305,10 @@ def __getitem__(self, identifier: str) -> Iterator[dict[str, Any]]:
         file_format = FileFormat.from_extension(file_path)
         if file_format in (FileFormat.TSV, FileFormat.CSV):
             loader_kwargs["skip_empty_rows"] = self.skip_empty_rows
+            loader_kwargs["schema_path"] = self.schema_path
+            loader_kwargs["target_class"] = self.target_class
 
-        loader = get_file_loader(
-            file_path, schema_path=self.schema_path, target_class=self.target_class, **loader_kwargs
-        )
+        loader = get_file_loader(file_path, **loader_kwargs)
         return loader.iter_instances()
 
     def __iter__(self) -> Iterator[dict[str, Any]]:
@@ -330,10 +321,10 @@ def __iter__(self) -> Iterator[dict[str, Any]]:
         file_format = FileFormat.from_extension(self.base_path)
         if file_format in (FileFormat.TSV, FileFormat.CSV):
             loader_kwargs["skip_empty_rows"] = self.skip_empty_rows
+            loader_kwargs["schema_path"] = self.schema_path
+            loader_kwargs["target_class"] = self.target_class
 
-        loader = get_file_loader(
-            self.base_path, schema_path=self.schema_path, target_class=self.target_class, **loader_kwargs
-        )
+        loader = get_file_loader(self.base_path, **loader_kwargs)
         yield from loader.iter_instances()
 
     def get_available_identifiers(self) -> list[str]:
diff --git a/tests/test_loaders/test_data_loader.py b/tests/test_loaders/test_data_loader.py
index 0cda0d68..8e05eab2 100644
--- a/tests/test_loaders/test_data_loader.py
+++ b/tests/test_loaders/test_data_loader.py
@@ -425,21 +425,18 @@ def test_with_schema(self, fixture_name: str, schema_file: Path, request: pytest
         row = next(loader.iter_instances())
         _assert_schema_aware_row(row)
 
-    def test_ignored_for_yaml(self, tmp_path: Path, schema_file: Path) -> None:
-        """schema_path/target_class are accepted but ignored for non-tabular formats."""
+    def test_rejected_for_yaml(self, tmp_path: Path, schema_file: Path) -> None:
+        """schema_path/target_class are not valid kwargs for non-tabular loaders."""
         yaml_path = tmp_path / "data.yaml"
         yaml_path.write_text(yaml.dump({"id": 1, "zipcode": "90210"}))
-        loader = get_file_loader(yaml_path, schema_path=schema_file, target_class="Record")
-        row = next(loader.iter_instances())
-        assert row["id"] == 1
+        with pytest.raises(TypeError):
+            get_file_loader(yaml_path, schema_path=schema_file, target_class="Record")
 
 
 class TestSchemaAwareDataLoader:
     """DataLoader forwards schema params through to underlying loaders."""
 
-    def test_single_file_with_schema(
-        self, schema_aware_tsv: Path, schema_file: Path
-    ) -> None:
+    def test_single_file_with_schema(self, schema_aware_tsv: Path, schema_file: Path) -> None:
         loader = DataLoader(schema_aware_tsv, schema_path=schema_file, target_class="Record")
         row = next(iter(loader))
         _assert_schema_aware_row(row)
@@ -458,9 +455,7 @@ def test_directory_without_schema_coerces(self, tmp_path: Path) -> None:
         row = next(loader["Record"])
         assert isinstance(row["zipcode"], int)
 
-    def test_iter_sources_with_schema(
-        self, schema_aware_tsv: Path, schema_file: Path
-    ) -> None:
+    def test_iter_sources_with_schema(self, schema_aware_tsv: Path, schema_file: Path) -> None:
         loader = DataLoader(schema_aware_tsv, schema_path=schema_file, target_class="Record")
         sources = list(loader.iter_sources())
         assert len(sources) == 1

From 6785bbb4355d081d84595670a9d4ac4c7e59ce57 Mon Sep 17 00:00:00 2001
From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com>
Date: Thu, 4 Jun 2026 17:34:17 -0500
Subject: [PATCH 3/4] Reshape schema-aware loading around SchemaView

DataLoader takes a schemaview instead of schema_path/target_class.
Target class is derived from each file's identifier (directory mode) or
stem (single-file mode), so it composes with multi-file loading. A single
bridge method maps the SchemaView to linkml's path-based delimited loader;
that's the only spot to change when linkml accepts a SchemaView directly.
Leaf loaders keep their path-based exposure to track linkml's API.
---
 src/linkml_map/cli/cli.py              |  4 +--
 src/linkml_map/loaders/data_loaders.py | 39 ++++++++++++++++++--------
 tests/test_cli/test_cli_tabular.py     | 35 +++++++++++++++++++++++
 tests/test_loaders/test_data_loader.py | 35 +++++++++++++++++++++--
 4 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/src/linkml_map/cli/cli.py b/src/linkml_map/cli/cli.py
index b118c99a..599c3750 100644
--- a/src/linkml_map/cli/cli.py
+++ b/src/linkml_map/cli/cli.py
@@ -395,8 +395,8 @@ def _map_data_streaming(
     if emit_spec:
         _emit_spec_to_file(tr, emit_spec)
 
-    # Initialize data loader
-    data_loader = DataLoader(input_path)
+    # Initialize data loader (schema enables type-preserving coercion for TSV/CSV)
+    data_loader = DataLoader(input_path, schemaview=tr.source_schemaview)
 
     # Set up error collection when continue-on-error is enabled
     errors: list[TransformationError] = []
diff --git a/src/linkml_map/loaders/data_loaders.py b/src/linkml_map/loaders/data_loaders.py
index a5423bb7..ac24f830 100644
--- a/src/linkml_map/loaders/data_loaders.py
+++ b/src/linkml_map/loaders/data_loaders.py
@@ -8,6 +8,7 @@
 from typing import Any
 
 import yaml
+from linkml_runtime import SchemaView
 
 
 class FileFormat(str, Enum):
@@ -144,7 +145,7 @@ def get_file_loader(
 
     :param path: Path to the file
     :param file_format: Explicit file format (auto-detected from extension if not provided)
-    :param kwargs: Additional arguments passed to the loader class
+    :param kwargs: Additional arguments passed to the loader
     :return: Appropriate file loader instance
     """
     if file_format is None:
@@ -193,8 +194,7 @@ def __init__(
         base_path: str | Path,
         default_format: FileFormat | None = None,
         skip_empty_rows: bool = True,
-        schema_path: str | Path | None = None,
-        target_class: str | None = None,
+        schemaview: SchemaView | None = None,
     ) -> None:
         """
         Initialize the data loader.
@@ -202,8 +202,8 @@ def __init__(
         :param base_path: Base directory containing data files, or a single file path
         :param default_format: Default format to use when extension is ambiguous
         :param skip_empty_rows: Skip empty rows in tabular files (default: True)
-        :param schema_path: Path to the LinkML schema (enables schema-aware type coercion for TSV/CSV)
-        :param target_class: Target class name within the schema
+        :param schemaview: Source schema (enables schema-aware type coercion for TSV/CSV).
+            The target class is derived from each file's identifier.
         :raises FileNotFoundError: If the path does not exist
         """
         self.base_path = Path(base_path)
@@ -212,8 +212,26 @@ def __init__(
             raise FileNotFoundError(msg)
         self.default_format = default_format
         self.skip_empty_rows = skip_empty_rows
-        self.schema_path = schema_path
-        self.target_class = target_class
+        self.schemaview = schemaview
+
+    def _schema_loader_kwargs(self, identifier: str) -> dict[str, Any]:
+        """
+        Build schema-aware kwargs for a TSV/CSV leaf loader.
+
+        linkml's delimited loader currently takes a ``schema_path``, so we bridge
+        the in-scope :class:`SchemaView` to its source file. When that loader gains
+        native ``SchemaView`` support, this is the single spot that changes.
+
+        :param identifier: Names the source class the file's rows conform to.
+        :return: ``schema_path``/``target_class`` kwargs, or empty if no schema is
+            available (in-memory schemas with no source file degrade to no coercion).
+        """
+        if self.schemaview is None:
+            return {}
+        schema_path = self.schemaview.schema.source_file
+        if schema_path is None:
+            return {}
+        return {"schema_path": schema_path, "target_class": identifier}
 
     @property
     def is_single_file(self) -> bool:
@@ -305,8 +323,7 @@ def __getitem__(self, identifier: str) -> Iterator[dict[str, Any]]:
         file_format = FileFormat.from_extension(file_path)
         if file_format in (FileFormat.TSV, FileFormat.CSV):
             loader_kwargs["skip_empty_rows"] = self.skip_empty_rows
-            loader_kwargs["schema_path"] = self.schema_path
-            loader_kwargs["target_class"] = self.target_class
+            loader_kwargs.update(self._schema_loader_kwargs(identifier))
 
         loader = get_file_loader(file_path, **loader_kwargs)
         return loader.iter_instances()
@@ -321,8 +338,8 @@ def __iter__(self) -> Iterator[dict[str, Any]]:
         file_format = FileFormat.from_extension(self.base_path)
         if file_format in (FileFormat.TSV, FileFormat.CSV):
             loader_kwargs["skip_empty_rows"] = self.skip_empty_rows
-            loader_kwargs["schema_path"] = self.schema_path
-            loader_kwargs["target_class"] = self.target_class
+            # Single-file mode: the file stem names the source class.
+            loader_kwargs.update(self._schema_loader_kwargs(self.base_path.stem))
 
         loader = get_file_loader(self.base_path, **loader_kwargs)
         yield from loader.iter_instances()
diff --git a/tests/test_cli/test_cli_tabular.py b/tests/test_cli/test_cli_tabular.py
index 6dcf42fb..4537aa96 100644
--- a/tests/test_cli/test_cli_tabular.py
+++ b/tests/test_cli/test_cli_tabular.py
@@ -152,6 +152,41 @@ def test_tsv_input_jsonl_output(
             assert "id" in obj
             assert "label" in obj
 
+    def test_tsv_string_id_not_numerically_coerced(
+        self,
+        runner: CliRunner,
+        tmp_path: Path,
+        sample_schema: Path,
+        sample_transform: Path,
+    ) -> None:
+        """A numeric-looking string id keeps its leading zeros end-to-end.
+
+        Without schema-aware loading, pandas-style inference coerces "00123"
+        to the int 123, losing the leading zeros and breaking downstream lookups.
+        """
+        tsv_path = tmp_path / "Person.tsv"
+        tsv_path.write_text(
+            "id\tname\tprimary_email\tage_in_years\tgender\n00123\tAlice\talice@example.com\t30\tcisgender woman\n"
+        )
+        result = runner.invoke(
+            main,
+            [
+                "map-data",
+                "-T",
+                str(sample_transform),
+                "-s",
+                str(sample_schema),
+                "--source-type",
+                "Person",
+                "-f",
+                "jsonl",
+                str(tsv_path),
+            ],
+        )
+        assert result.exit_code == 0
+        obj = json.loads(result.stdout.strip())
+        assert obj["id"] == "00123"
+
     def test_tsv_input_tsv_output(
         self,
         runner: CliRunner,
diff --git a/tests/test_loaders/test_data_loader.py b/tests/test_loaders/test_data_loader.py
index 8e05eab2..3ea8cf82 100644
--- a/tests/test_loaders/test_data_loader.py
+++ b/tests/test_loaders/test_data_loader.py
@@ -5,6 +5,7 @@
 
 import pytest
 import yaml
+from linkml_runtime import SchemaView
 
 from linkml_map.loaders import DataLoader, FileFormat, load_data_file
 from linkml_map.loaders.data_loaders import CsvFileLoader, TsvFileLoader, get_file_loader
@@ -437,14 +438,16 @@ class TestSchemaAwareDataLoader:
     """DataLoader forwards schema params through to underlying loaders."""
 
     def test_single_file_with_schema(self, schema_aware_tsv: Path, schema_file: Path) -> None:
-        loader = DataLoader(schema_aware_tsv, schema_path=schema_file, target_class="Record")
+        # Single-file mode derives target_class from the file stem ("Record").
+        loader = DataLoader(schema_aware_tsv, schemaview=SchemaView(str(schema_file)))
         row = next(iter(loader))
         _assert_schema_aware_row(row)
 
     def test_directory_with_schema(self, tmp_path: Path, schema_file: Path) -> None:
         tsv_path = tmp_path / "Record.tsv"
         tsv_path.write_text("id\tzipcode\tscore\tweight\n1\t90210\t2\t3.5\n")
-        loader = DataLoader(tmp_path, schema_path=schema_file, target_class="Record")
+        # Directory mode derives target_class from the identifier ("Record").
+        loader = DataLoader(tmp_path, schemaview=SchemaView(str(schema_file)))
         row = next(loader["Record"])
         _assert_schema_aware_row(row)
 
@@ -456,9 +459,35 @@ def test_directory_without_schema_coerces(self, tmp_path: Path) -> None:
         assert isinstance(row["zipcode"], int)
 
     def test_iter_sources_with_schema(self, schema_aware_tsv: Path, schema_file: Path) -> None:
-        loader = DataLoader(schema_aware_tsv, schema_path=schema_file, target_class="Record")
+        loader = DataLoader(schema_aware_tsv, schemaview=SchemaView(str(schema_file)))
         sources = list(loader.iter_sources())
         assert len(sources) == 1
         _, rows = sources[0]
         row = next(rows)
         _assert_schema_aware_row(row)
+
+    def test_directory_derives_target_class_per_identifier(self, tmp_path: Path) -> None:
+        """Each file's target_class is derived from its identifier, so the same
+        column name is coerced differently depending on its class's schema range."""
+        schema = {
+            "id": "https://example.org/multi",
+            "name": "multi",
+            "prefixes": {"linkml": "https://w3id.org/linkml/"},
+            "imports": ["linkml:types"],
+            "default_range": "string",
+            "classes": {
+                "Coded": {"attributes": {"code": {"range": "string"}}},
+                "Numbered": {"attributes": {"code": {"range": "integer"}}},
+            },
+        }
+        schema_path = tmp_path / "multi.yaml"
+        schema_path.write_text(yaml.dump(schema))
+        (tmp_path / "Coded.tsv").write_text("code\n007\n")
+        (tmp_path / "Numbered.tsv").write_text("code\n007\n")
+
+        loader = DataLoader(tmp_path, schemaview=SchemaView(str(schema_path)))
+        coded = next(loader["Coded"])
+        numbered = next(loader["Numbered"])
+
+        assert coded["code"] == "007"  # string range preserved
+        assert numbered["code"] == 7  # integer range coerced

From d6343a4fec4af65e9ebd001c6b69d554adfd6969 Mon Sep 17 00:00:00 2001
From: amc-corey-cox <69321580+amc-corey-cox@users.noreply.github.com>
Date: Fri, 5 Jun 2026 10:05:45 -0500
Subject: [PATCH 4/4] Address PR review feedback

Pin the get_file_loader TypeError test to the kwarg-rejection message
so an unrelated TypeError can't pass it.
---
 tests/test_loaders/test_data_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_loaders/test_data_loader.py b/tests/test_loaders/test_data_loader.py
index 3ea8cf82..9c18ab51 100644
--- a/tests/test_loaders/test_data_loader.py
+++ b/tests/test_loaders/test_data_loader.py
@@ -430,7 +430,7 @@ def test_rejected_for_yaml(self, tmp_path: Path, schema_file: Path) -> None:
         """schema_path/target_class are not valid kwargs for non-tabular loaders."""
         yaml_path = tmp_path / "data.yaml"
         yaml_path.write_text(yaml.dump({"id": 1, "zipcode": "90210"}))
-        with pytest.raises(TypeError):
+        with pytest.raises(TypeError, match="unexpected keyword argument"):
             get_file_loader(yaml_path, schema_path=schema_file, target_class="Record")