Skip to content

Commit 8c9f6a2

Browse files
committed
cleanup
1 parent fa75f25 commit 8c9f6a2

File tree

8 files changed

+27
-90
lines changed

8 files changed

+27
-90
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,3 @@ src/substrait/_version.py
142142
.directory
143143
.gdb_history
144144
.DS_Store
145-
/.envrc

src/substrait/bimap.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Bidirectional map for URI <-> URN conversion during the migration period.
33
44
This module provides a UriUrnBiDiMap class that maintains a bidirectional mapping
5-
between URIs and URNs, ensuring consistency and detecting conflicts.
5+
between URIs and URNs.
66
77
NOTE: This file is temporary and can be removed once the URI -> URN migration
88
is complete across all Substrait implementations. At that point, only URN-based
@@ -34,8 +34,8 @@ def put(self, uri: str, urn: str) -> None:
3434
ValueError: If the URI or URN already exists with a different mapping
3535
"""
3636
# Check for conflicts
37-
if uri in self._uri_to_urn:
38-
existing_urn = self._uri_to_urn[uri]
37+
if self.contains_uri(uri):
38+
existing_urn = self.get_urn(uri)
3939
if existing_urn != urn:
4040
raise ValueError(
4141
f"URI '{uri}' is already mapped to URN '{existing_urn}', "
@@ -44,8 +44,8 @@ def put(self, uri: str, urn: str) -> None:
4444
# Already have this exact mapping, nothing to do
4545
return
4646

47-
if urn in self._urn_to_uri:
48-
existing_uri = self._urn_to_uri[urn]
47+
if self.contains_urn(urn):
48+
existing_uri = self.get_uri(urn)
4949
if existing_uri != uri:
5050
raise ValueError(
5151
f"URN '{urn}' is already mapped to URI '{existing_uri}', "
@@ -54,7 +54,6 @@ def put(self, uri: str, urn: str) -> None:
5454
# Already have this exact mapping, nothing to do
5555
return
5656

57-
# Add the bidirectional mapping
5857
self._uri_to_urn[uri] = urn
5958
self._urn_to_uri[urn] = uri
6059

@@ -101,11 +100,3 @@ def contains_urn(self, urn: str) -> bool:
101100
True if the URN is in the map, False otherwise
102101
"""
103102
return urn in self._urn_to_uri
104-
105-
def __len__(self) -> int:
106-
"""Return the number of mappings in the bimap."""
107-
return len(self._uri_to_urn)
108-
109-
def __repr__(self) -> str:
110-
"""Return a string representation of the bimap."""
111-
return f"UriUrnBiDiMap({len(self)} mappings)"

src/substrait/builders/extended_expression.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -230,15 +230,14 @@ def resolve(
230230
if not func:
231231
raise Exception(f"Unknown function {function} for {signature}")
232232

233-
# Create URN extension
234233
func_extension_urns = [
235234
ste.SimpleExtensionURN(
236235
extension_urn_anchor=registry.lookup_urn(urn), urn=urn
237236
)
238237
]
239238

240-
# Create URI extension (convert URN to URI via bimap)
241-
uri = registry.urn_to_uri(urn)
239+
# Create URI extension for backwards compatibility during URI -> URN migration
240+
uri = registry._uri_urn_bimap.get_uri(urn)
242241
func_extension_uris = []
243242
if uri:
244243
uri_anchor = registry.lookup_uri_anchor(uri)
@@ -247,7 +246,6 @@ def resolve(
247246
ste.SimpleExtensionURI(extension_uri_anchor=uri_anchor, uri=uri)
248247
]
249248

250-
# Create extension function declaration with both URI and URN references
251249
func_extensions = [
252250
ste.SimpleExtensionDeclaration(
253251
extension_function=ste.SimpleExtensionDeclaration.ExtensionFunction(
@@ -259,7 +257,6 @@ def resolve(
259257
)
260258
]
261259

262-
# Merge extensions from all expressions
263260
extension_urns = merge_extension_urns(
264261
func_extension_urns, *[b.extension_urns for b in bound_expressions]
265262
)
@@ -329,15 +326,14 @@ def resolve(
329326
if not func:
330327
raise Exception(f"Unknown function {function} for {signature}")
331328

332-
# Create URN extension
333329
func_extension_urns = [
334330
ste.SimpleExtensionURN(
335331
extension_urn_anchor=registry.lookup_urn(urn), urn=urn
336332
)
337333
]
338334

339-
# Create URI extension (convert URN to URI via bimap)
340-
uri = registry.urn_to_uri(urn)
335+
# Create URI extension for backwards compatibility during URI -> URN migration
336+
uri = registry._uri_urn_bimap.get_uri(urn)
341337
func_extension_uris = []
342338
if uri:
343339
uri_anchor = registry.lookup_uri_anchor(uri)
@@ -346,7 +342,6 @@ def resolve(
346342
ste.SimpleExtensionURI(extension_uri_anchor=uri_anchor, uri=uri)
347343
]
348344

349-
# Create extension function declaration with both URI and URN references
350345
func_extensions = [
351346
ste.SimpleExtensionDeclaration(
352347
extension_function=ste.SimpleExtensionDeclaration.ExtensionFunction(
@@ -358,7 +353,6 @@ def resolve(
358353
)
359354
]
360355

361-
# Merge extensions from all expressions
362356
extension_urns = merge_extension_urns(
363357
func_extension_urns, *[b.extension_urns for b in bound_expressions]
364358
)
@@ -430,15 +424,14 @@ def resolve(
430424
if not func:
431425
raise Exception(f"Unknown function {function} for {signature}")
432426

433-
# Create URN extension
434427
func_extension_urns = [
435428
ste.SimpleExtensionURN(
436429
extension_urn_anchor=registry.lookup_urn(urn), urn=urn
437430
)
438431
]
439432

440-
# Create URI extension (convert URN to URI via bimap)
441-
uri = registry.urn_to_uri(urn)
433+
# Create URI extension for backwards compatibility during URI -> URN migration
434+
uri = registry._uri_urn_bimap.get_uri(urn)
442435
func_extension_uris = []
443436
if uri:
444437
uri_anchor = registry.lookup_uri_anchor(uri)
@@ -447,7 +440,6 @@ def resolve(
447440
ste.SimpleExtensionURI(extension_uri_anchor=uri_anchor, uri=uri)
448441
]
449442

450-
# Create extension function declaration with both URI and URN references
451443
func_extensions = [
452444
ste.SimpleExtensionDeclaration(
453445
extension_function=ste.SimpleExtensionDeclaration.ExtensionFunction(
@@ -459,7 +451,7 @@ def resolve(
459451
)
460452
]
461453

462-
# Merge extensions from all expressions
454+
463455
extension_urns = merge_extension_urns(
464456
func_extension_urns,
465457
*[b.extension_urns for b in bound_expressions],

src/substrait/builders/plan.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
def _merge_extensions(*objs):
2929
"""Merge extension URIs, URNs, and declarations from multiple plan/expression objects.
3030
31-
During the URI/URN migration period, we maintain both URI and URN references
32-
for maximum compatibility.
31+
During the URI -> URN migration period, we maintain both URI and URN references
32+
for backwards compatibility.
3333
"""
3434
return {
3535
"extension_uris": merge_extension_uris(*[b.extension_uris for b in objs if b]),

src/substrait/extension_registry.py

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ def satisfies_signature(self, signature: tuple) -> Optional[str]:
251251
class ExtensionRegistry:
252252
def __init__(self, load_default_extensions=True) -> None:
253253
self._urn_mapping: dict = defaultdict(dict) # URN -> anchor ID
254+
# NOTE: during the URI -> URN migration, we only need an id generator for URN. We can use the same anchor for plan construction for URIs.
254255
self._urn_id_generator = itertools.count(1)
255256

256257
self._function_mapping: dict = defaultdict(dict)
@@ -278,7 +279,7 @@ def register_extension_yaml(
278279
279280
Args:
280281
fname: Path to the YAML file
281-
uri: URI for the extension (for URI/URN bimap)
282+
uri: URI for the extension (this is required during the URI -> URN migration)
282283
"""
283284
fname = Path(fname)
284285
with open(fname) as f: # type: ignore
@@ -297,13 +298,10 @@ def register_extension_dict(self, definitions: dict, uri: str) -> None:
297298
if not urn:
298299
raise ValueError("Extension definitions must contain a 'urn' field")
299300

300-
# Validate URN format
301301
self._validate_urn_format(urn)
302302

303-
# Assign anchor to URN (URI will use the same anchor during output)
304303
self._urn_mapping[urn] = next(self._urn_id_generator)
305304

306-
# Store URI <-> URN mapping for output generation
307305
self._uri_urn_bimap.put(uri, urn)
308306

309307
simple_extensions = build_simple_extensions(definitions)
@@ -369,28 +367,6 @@ def lookup_uri_anchor(self, uri: str) -> Optional[int]:
369367
return self._urn_mapping.get(urn)
370368
return None
371369

372-
def uri_to_urn(self, uri: str) -> Optional[str]:
373-
"""Convert a URI to its corresponding URN using the bimap.
374-
375-
Args:
376-
uri: The extension URI
377-
378-
Returns:
379-
The corresponding URN, or None if not in the bimap
380-
"""
381-
return self._uri_urn_bimap.get_urn(uri)
382-
383-
def urn_to_uri(self, urn: str) -> Optional[str]:
384-
"""Convert a URN to its corresponding URI using the bimap.
385-
386-
Args:
387-
urn: The extension URN
388-
389-
Returns:
390-
The corresponding URI, or None if not in the bimap
391-
"""
392-
return self._uri_urn_bimap.get_uri(urn)
393-
394370
def _validate_urn_format(self, urn: str) -> None:
395371
"""Validate that a URN follows the expected format.
396372

tests/test_bimap.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -78,24 +78,3 @@ def test_contains(self):
7878

7979
assert bimap.contains_uri(uri)
8080
assert bimap.contains_urn(urn)
81-
82-
def test_len(self):
83-
"""Test the length of the bimap."""
84-
bimap = UriUrnBiDiMap()
85-
86-
assert len(bimap) == 0
87-
88-
bimap.put("uri1", "urn1")
89-
assert len(bimap) == 1
90-
91-
bimap.put("uri2", "urn2")
92-
assert len(bimap) == 2
93-
94-
def test_repr(self):
95-
"""Test the string representation of the bimap."""
96-
bimap = UriUrnBiDiMap()
97-
98-
assert repr(bimap) == "UriUrnBiDiMap(0 mappings)"
99-
100-
bimap.put("uri1", "urn1")
101-
assert repr(bimap) == "UriUrnBiDiMap(1 mappings)"

tests/test_extension_registry.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def test_registry_uri_to_urn_conversion():
363363
registry.register_extension_dict(yaml.safe_load(content_with_urn), uri=uri)
364364

365365
# Test URI to URN conversion
366-
assert registry.uri_to_urn(uri) == "extension:test:bimap"
366+
assert registry._uri_urn_bimap.get_urn(uri) == "extension:test:bimap"
367367

368368

369369
def test_registry_urn_to_uri_conversion():
@@ -384,7 +384,7 @@ def test_registry_urn_to_uri_conversion():
384384
registry.register_extension_dict(yaml.safe_load(content_with_urn), uri=uri)
385385

386386
# Test URN to URI conversion
387-
assert registry.urn_to_uri("extension:test:bimap2") == uri
387+
assert registry._uri_urn_bimap.get_uri("extension:test:bimap2") == uri
388388

389389

390390
def test_registry_uri_anchor_lookup():
@@ -408,8 +408,8 @@ def test_registry_nonexistent_uri_urn_returns_none():
408408
"""Test that looking up non-existent URI/URN returns None."""
409409
registry = ExtensionRegistry(load_default_extensions=False)
410410

411-
assert registry.uri_to_urn("https://nonexistent.com/test.yaml") is None
412-
assert registry.urn_to_uri("extension:nonexistent:test") is None
411+
assert registry._uri_urn_bimap.get_urn("https://nonexistent.com/test.yaml") is None
412+
assert registry._uri_urn_bimap.get_uri("extension:nonexistent:test") is None
413413
assert registry.lookup_uri_anchor("https://nonexistent.com/test.yaml") is None
414414

415415

@@ -419,14 +419,14 @@ def test_registry_default_extensions_have_uri_mappings():
419419

420420
# Check that at least one default extension has a URI mapping
421421
urn = "extension:io.substrait:functions_comparison"
422-
uri = registry.urn_to_uri(urn)
422+
uri = registry._uri_urn_bimap.get_uri(urn)
423423

424424
assert uri is not None
425425
assert "https://github.com/substrait-io/substrait/blob/main/extensions" in uri
426426
assert "functions_comparison.yaml" in uri
427427

428428
# Verify reverse mapping works
429-
assert registry.uri_to_urn(uri) == urn
429+
assert registry._uri_urn_bimap.get_urn(uri) == urn
430430

431431

432432
# ============================================================================

tests/test_uri_urn_migration.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ def test_register_with_uri(self):
5353
assert registry.lookup_uri_anchor(uri) is not None
5454

5555
# Test bimap conversions
56-
assert registry.uri_to_urn(uri) == "extension:example:test"
57-
assert registry.urn_to_uri("extension:example:test") == uri
56+
assert registry._uri_urn_bimap.get_urn(uri) == "extension:example:test"
57+
assert registry._uri_urn_bimap.get_uri("extension:example:test") == uri
5858

5959
def test_register_requires_uri(self):
6060
"""Test that registering an extension requires a URI during migration."""
@@ -75,7 +75,7 @@ def test_default_extensions_have_uris(self):
7575

7676
# Check one of the default extensions
7777
urn = "extension:io.substrait:functions_comparison"
78-
uri_from_bimap = registry.urn_to_uri(urn)
78+
uri_from_bimap = registry._uri_urn_bimap.get_uri(urn)
7979

8080
# Should have a URI derived from DEFAULT_URN_PREFIX
8181
assert uri_from_bimap is not None
@@ -165,8 +165,8 @@ def test_uri_and_urn_always_paired(self):
165165
assert registry.lookup_uri_anchor(uri) is not None
166166

167167
# Verify bimap has both directions
168-
assert registry.uri_to_urn(uri) == "extension:test:functions_paired"
169-
assert registry.urn_to_uri("extension:test:functions_paired") == uri
168+
assert registry._uri_urn_bimap.get_urn(uri) == "extension:test:functions_paired"
169+
assert registry._uri_urn_bimap.get_uri("extension:test:functions_paired") == uri
170170

171171

172172
# ============================================================================

0 commit comments

Comments
 (0)