Skip to content

Commit f29eb39

Browse files
authored
feat: major internal refactoring for extensions (#45)
BREAKING CHANGE: various substrait_validator::output types relating to extensions have changed. Name resolution diagnostic messages and codes also changed, for as far as these were implemented before.
1 parent 377a634 commit f29eb39

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2287
-1136
lines changed

c/src/lib.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,39 @@ pub extern "C" fn substrait_validator_config_uri_resolver(
572572
true
573573
}
574574

575+
/// Sets the maximum recursion depth for URI resolution, in the presence of
576+
/// transitive dependencies. Setting this to a negative number disables the
577+
/// limit, setting this to zero disables URI resolution entirely.
578+
///
579+
/// Returns whether the function was successful. If false is returned, retrieve
580+
/// the error message with substrait_validator_get_last_error().
581+
#[no_mangle]
582+
pub extern "C" fn substrait_validator_config_max_uri_resolution_depth(
583+
config: *mut ConfigHandle,
584+
max_depth: i64,
585+
) -> bool {
586+
// Check for nulls.
587+
if config.is_null() {
588+
set_last_error("received null configuration handle");
589+
return false;
590+
}
591+
592+
// UNSAFE: unpack configuration handle. Assumes that the pointer was
593+
// created by substrait_validator_config_new(), or behavior is undefined.
594+
let config = unsafe { &mut (*config).config };
595+
596+
// Update configuration and return success.
597+
if max_depth < 0 {
598+
config.set_max_uri_resolution_depth(None);
599+
} else if let Ok(max_depth) = usize::try_from(max_depth) {
600+
config.set_max_uri_resolution_depth(Some(max_depth));
601+
} else {
602+
set_last_error("specified depth is out of range");
603+
return false;
604+
}
605+
true
606+
}
607+
575608
/// Parse/validation result handle.
576609
pub struct ResultHandle {
577610
pub result: substrait_validator::ParseResult,

proto/substrait/validator/validator.proto

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,6 @@ message Node {
5757
// elsewhere in the plan.
5858
NodeReference node_reference = 4;
5959

60-
// Special case for proto_primitive for references to YAML files via a URI.
61-
// If resolved, the keys in the toplevel YAML map are represented using
62-
// Field messages in data.
63-
YamlReference yaml_reference = 5;
64-
6560
// This node represents a YAML map/object. The keys are represented using
6661
// Field messages in data.
6762
google.protobuf.Empty yaml_map = 6;
@@ -72,6 +67,14 @@ message Node {
7267

7368
// This node represents a YAML primitive.
7469
PrimitiveData yaml_primitive = 8;
70+
71+
// Special case for string primitives that were interpreted and resolved as
72+
// a URI. These nodes will have a single child node with path `data` that
73+
// represents the parse result of the referred file.
74+
string resolved_uri = 9;
75+
76+
// No longer used. The more generic ResolvedUri type is used instead.
77+
YamlReference yaml_reference = 5 [deprecated = true];
7578
}
7679

7780
// Semantic classification of this node.
@@ -178,6 +181,8 @@ message Node {
178181

179182
// Information about a reference to a YAML file.
180183
message YamlReference {
184+
option deprecated = true;
185+
181186
// URI to the YAML file.
182187
string uri = 1;
183188
}

py/src/lib.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,13 @@ impl Config {
124124
})
125125
})
126126
}
127+
128+
/// Sets the maximum recursion depth for URI resolution, in the presence of
129+
/// transitive dependencies. Setting this to None disables the limit,
130+
/// setting this to zero disables URI resolution entirely.
131+
pub fn set_max_uri_resolution_depth(&mut self, depth: Option<usize>) {
132+
self.config.set_max_uri_resolution_depth(depth);
133+
}
127134
}
128135

129136
/// Represents a Substrait plan parse tree, as parsed by the validator.

py/substrait_validator/__init__.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,17 @@ def substrait_version() -> str:
405405
"application of any --override-uri options)."
406406
),
407407
)
408+
@click.option(
409+
"--uri-depth",
410+
type=int,
411+
default=None,
412+
help=(
413+
"Sets the maximum recursion depth for resolving transitive "
414+
"dependencies. You can specify a negative number to disable "
415+
"the limit, or set this to zero to disable URI resolution "
416+
"entirely."
417+
),
418+
)
408419
@click.option(
409420
"--help-diagnostics",
410421
is_flag=True,
@@ -437,6 +448,7 @@ def cli( # noqa: C901
437448
diagnostic_level,
438449
override_uri,
439450
use_urllib,
451+
uri_depth,
440452
help_diagnostics,
441453
print_version,
442454
print_substrait_version,
@@ -758,6 +770,11 @@ def print_diag(diag, first_prefix="", next_prefix=""):
758770
fatal(e)
759771
if use_urllib:
760772
config.add_urllib_resolver()
773+
if uri_depth is not None:
774+
if uri_depth < 0:
775+
config.set_max_uri_resolution_depth(None)
776+
else:
777+
config.set_max_uri_resolution_depth(uri_depth)
761778

762779
# Run the parser/validator.
763780
result = plan_to_result_handle(in_plan, config)

py/tests/test_api.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ def resolver(s):
127127
# Disable missing root relation error, so we don't have to supply one.
128128
config.override_diagnostic_level(5001, "info", "info")
129129

130+
# Explicitly disable the resolution depth limit, to opt in to URI
131+
# resolution.
132+
config.set_max_uri_resolution_depth(None)
133+
130134
# Add the resolver.
131135
config.add_uri_resolver(resolver)
132136

py/tests/test_cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,8 @@ def x(*args):
252252
"0",
253253
"info",
254254
"info", # all other diagnostics -> info
255+
"--uri-depth",
256+
"-1", # opt in to URI resolution, this will be the default
255257
*args
256258
).exit_code
257259

rs/src/export/html/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -514,11 +514,11 @@ fn format_node_tree(
514514
&target.path,
515515
None,
516516
),
517-
tree::NodeType::YamlReference(yaml) => {
517+
tree::NodeType::ResolvedUri(uri) => {
518518
format!(
519519
"= {} {brief} {}",
520-
format_span("value", &yaml.uri),
521-
format_span("type", "string, resolved to YAML")
520+
format_span("value", &uri),
521+
format_span("type", "string, resolved as URI")
522522
)
523523
}
524524
tree::NodeType::YamlMap => format!("{brief} {}", format_span("type", "YAML map")),

rs/src/export/proto.rs

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -182,16 +182,12 @@ impl From<&tree::NodeType> for validator::node::NodeType {
182182
path: Some((&node.path).into()),
183183
})
184184
}
185-
tree::NodeType::YamlReference(info) => {
186-
validator::node::NodeType::YamlReference(validator::node::YamlReference {
187-
uri: info.uri.name().unwrap_or_default().to_string(),
188-
})
189-
}
190185
tree::NodeType::YamlMap => validator::node::NodeType::YamlMap(()),
191186
tree::NodeType::YamlArray => validator::node::NodeType::YamlArray(()),
192187
tree::NodeType::YamlPrimitive(data) => {
193188
validator::node::NodeType::YamlPrimitive(data.into())
194189
}
190+
tree::NodeType::ResolvedUri(uri) => validator::node::NodeType::ResolvedUri(uri.clone()),
195191
}
196192
}
197193
}
@@ -292,7 +288,7 @@ impl From<&data::Class> for validator::data_type::Class {
292288
validator::data_type::class::Kind::Compound(compound.into())
293289
}
294290
data::Class::UserDefined(user_defined) => {
295-
validator::data_type::class::Kind::UserDefinedType(user_defined.as_ref().into())
291+
validator::data_type::class::Kind::UserDefinedType(user_defined.into())
296292
}
297293
data::Class::Unresolved => validator::data_type::class::Kind::UnresolvedType(()),
298294
}),
@@ -340,10 +336,8 @@ impl From<&data::class::Compound> for i32 {
340336
}
341337
}
342338

343-
impl From<&extension::Reference<data::class::UserDefinedDefinition>>
344-
for validator::data_type::UserDefinedType
345-
{
346-
fn from(node: &extension::Reference<data::class::UserDefinedDefinition>) -> Self {
339+
impl From<&extension::simple::type_class::Reference> for validator::data_type::UserDefinedType {
340+
fn from(node: &extension::simple::type_class::Reference) -> Self {
347341
Self {
348342
uri: node.uri.name().unwrap_or_default().to_string(),
349343
name: node.name.name().unwrap_or_default().to_string(),
@@ -352,10 +346,10 @@ impl From<&extension::Reference<data::class::UserDefinedDefinition>>
352346
}
353347
}
354348

355-
impl From<&data::class::UserDefinedDefinition>
349+
impl From<&extension::simple::type_class::Definition>
356350
for validator::data_type::user_defined_type::Definition
357351
{
358-
fn from(node: &data::class::UserDefinedDefinition) -> Self {
352+
fn from(node: &extension::simple::type_class::Definition) -> Self {
359353
Self {
360354
structure: node
361355
.structure
@@ -394,24 +388,24 @@ impl From<&data::Variation> for validator::data_type::Variation {
394388
}
395389
}
396390

397-
impl From<&data::variation::UserDefinedDefinition>
391+
impl From<&extension::simple::type_variation::Definition>
398392
for validator::data_type::user_defined_variation::Definition
399393
{
400-
fn from(node: &data::variation::UserDefinedDefinition) -> Self {
394+
fn from(node: &extension::simple::type_variation::Definition) -> Self {
401395
Self {
402396
base_type: None,
403397
function_behavior: (&node.function_behavior).into(),
404398
}
405399
}
406400
}
407401

408-
impl From<&data::variation::FunctionBehavior> for i32 {
409-
fn from(node: &data::variation::FunctionBehavior) -> Self {
402+
impl From<&extension::simple::type_variation::FunctionBehavior> for i32 {
403+
fn from(node: &extension::simple::type_variation::FunctionBehavior) -> Self {
410404
match node {
411-
data::variation::FunctionBehavior::Inherits => {
405+
extension::simple::type_variation::FunctionBehavior::Inherits => {
412406
validator::data_type::user_defined_variation::FunctionBehavior::Inherits
413407
}
414-
data::variation::FunctionBehavior::Separate => {
408+
extension::simple::type_variation::FunctionBehavior::Separate => {
415409
validator::data_type::user_defined_variation::FunctionBehavior::Separate
416410
}
417411
}

rs/src/input/config.rs

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ fn resolve_with_curl(uri: &str) -> Result<Vec<u8>, curl::Error> {
3939
}
4040

4141
/// Configuration structure.
42-
#[derive(Default)]
4342
pub struct Config {
4443
/// When set, do not generate warnings for unknown protobuf fields that are
4544
/// set to their protobuf-defined default value.
@@ -80,6 +79,26 @@ pub struct Config {
8079
/// error. If no downloader is specified, only file:// URLs with an
8180
/// absolute path are supported.
8281
pub uri_resolver: Option<UriResolver>,
82+
83+
/// Optional URI resolution depth. If specified, dependencies are only
84+
/// resolved this many levels deep. Setting this to zero effectively
85+
/// disables extension URI resolution altogether.
86+
pub max_uri_resolution_depth: Option<usize>,
87+
}
88+
89+
// TODO: enable URI resolution by default once all that works. Then this can
90+
// be derived again. Also still need to expose the depth option in extensions.
91+
impl Default for Config {
92+
fn default() -> Self {
93+
Self {
94+
ignore_unknown_fields: Default::default(),
95+
allowed_proto_any_urls: Default::default(),
96+
diagnostic_level_overrides: Default::default(),
97+
uri_overrides: Default::default(),
98+
uri_resolver: Default::default(),
99+
max_uri_resolution_depth: Some(0),
100+
}
101+
}
83102
}
84103

85104
impl Config {
@@ -152,4 +171,11 @@ impl Config {
152171
pub fn add_curl_uri_resolver(&mut self) {
153172
self.add_uri_resolver(resolve_with_curl)
154173
}
174+
175+
/// Sets the maximum recursion depth for URI resolution, in the presence of
176+
/// transitive dependencies. Setting this to None disables the limit,
177+
/// setting this to zero disables URI resolution entirely.
178+
pub fn set_max_uri_resolution_depth(&mut self, depth: Option<usize>) {
179+
self.max_uri_resolution_depth = depth;
180+
}
155181
}

rs/src/output/diagnostic.rs

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -218,28 +218,34 @@ pub enum Classification {
218218
#[strum(props(Description = "invalid YAML value type"))]
219219
YamlInvalidType = 2008,
220220

221+
#[strum(props(Description = "cyclic dependency"))]
222+
YamlCyclicDependency = 2009,
223+
221224
// Link resolution diagnostics (group 3).
222225
#[strum(props(HiddenDescription = "link resolution diagnostic"))]
223226
Link = 3000,
224227

225228
#[strum(props(Description = "failed to resolve anchor"))]
226229
LinkMissingAnchor = 3001,
227230

228-
#[strum(props(Description = "failed to resolve function name"))]
229-
LinkMissingFunctionName = 3002,
230-
231-
#[strum(props(Description = "failed to resolve type name"))]
232-
LinkMissingTypeName = 3003,
233-
234-
#[strum(props(Description = "failed to resolve type variation name"))]
235-
LinkMissingTypeVariationName = 3004,
236-
237231
#[strum(props(HiddenDescription = "use of anchor zero"))]
238232
LinkAnchorZero = 3005,
239233

240234
#[strum(props(Description = "failed to resolve type variation name & class pair"))]
241235
LinkMissingTypeVariationNameAndClass = 3006,
242236

237+
#[strum(props(Description = "unresolved name lookup"))]
238+
LinkUnresolvedName = 3007,
239+
240+
#[strum(props(Description = "ambiguous name lookup"))]
241+
LinkAmbiguousName = 3008,
242+
243+
#[strum(props(Description = "duplicate definition"))]
244+
LinkDuplicateDefinition = 3009,
245+
246+
#[strum(props(HiddenDescription = "invalid compound vs. simple function name usage"))]
247+
LinkCompoundVsSimpleFunctionName = 3010,
248+
243249
// Type-related diagnostics (group 4).
244250
#[strum(props(HiddenDescription = "type-related diagnostics"))]
245251
Type = 4000,

0 commit comments

Comments
 (0)