Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,27 @@ endfunction()

################################################################################

# ankerl/unordered_dense — Robin Hood open-addressing hash map (MIT)
# Prefer a local vendored copy (external/ankerl/unordered_dense.h) over
# fetching from the network so offline builds work.
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/external/ankerl/unordered_dense.h")
message(STATUS "Using vendored ankerl/unordered_dense from external/")
add_library(unordered_dense INTERFACE)
target_include_directories(unordered_dense INTERFACE
"${CMAKE_CURRENT_SOURCE_DIR}/external")
else()
include(FetchContent)
FetchContent_Declare(
unordered_dense
GIT_REPOSITORY https://github.com/martinus/unordered-dense.git
GIT_TAG v4.4.0
GIT_SHALLOW TRUE
)
FetchContent_MakeAvailable(unordered_dense)
endif()

################################################################################

include(version_license)
version_license(
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/version_license.h.template"
Expand Down
5 changes: 4 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ if(TAU_PARSER_BUILD_HEADER_ONLY)
)
target_compile_definitions(tauparser INTERFACE TAU_PARSER_BUILD_HEADER_ONLY)
tauparser_setup(tauparser INTERFACE "")
target_link_libraries(tauparser INTERFACE unordered_dense)
else()
if(TAU_PARSER_BUILD_STATIC_LIBRARY)
add_library(tauparser_static STATIC ${TAU_PARSER_SOURCES})
Expand All @@ -55,8 +56,9 @@ else()
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)
tauparser_setup(tauparser_static PUBLIC "")
target_link_libraries(tauparser_static PUBLIC unordered_dense)
endif()

if(TAU_PARSER_BUILD_SHARED_LIBRARY)
add_library(tauparser_shared SHARED ${TAU_PARSER_SOURCES})
set_target_properties(tauparser_shared PROPERTIES
Expand All @@ -67,6 +69,7 @@ else()
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
)
tauparser_setup(tauparser_shared PUBLIC "")
target_link_libraries(tauparser_shared PUBLIC unordered_dense)
endif()

# Create an alias target for backward compatibility
Expand Down
49 changes: 49 additions & 0 deletions src/grammar.tmpl.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// To view the license please visit
// https://github.com/IDNI/parser/blob/main/LICENSE.md

#include <deque>
#include <queue>
#include "parser.h"

Expand Down Expand Up @@ -833,4 +834,52 @@ std::ostream& print_dictmap(std::ostream& os,
}
#endif // DEBUG

template <typename C, typename T>
std::set<std::pair<lit<C,T>, std::array<size_t,2>>>
grammar<C,T>::derive_all(
const std::vector<std::pair<lit<C,T>, size_t>>& seeds) const
{
// Worklist-based bottom-up fixpoint.
// A "fact" is (literal, {begin, end}) meaning literal covers span [begin,end).
using span_t = std::array<size_t, 2>;
using fact = std::pair<lit<C,T>, span_t>;
std::set<fact> derived;
std::deque<fact> worklist;

// Seed initial facts: each (lit, pos) gives span [pos, pos+1).
for (const auto& [l, pos] : seeds) {
fact f{ l, { pos, pos + 1 } };
if (derived.insert(f).second)
worklist.push_back(f);
}

while (!worklist.empty()) {
auto [proven_lit, proven_span] = worklist.front();
worklist.pop_front();

// Unit-rule propagation: for every production A -> B (single literal body)
// where B == proven_lit, derive A with the same span.
for (size_t p = 0; p < G.size(); ++p) {
const lit<C,T>& head = G[p].first;
const std::vector<lits<C,T>>& conjs = G[p].second;
// A production fires as a unit rule when every conjunction has
// exactly one literal equal to proven_lit (standard unit rule:
// one conjunction, one literal).
if (conjs.size() == 1 && conjs[0].size() == 1
&& conjs[0][0] == proven_lit)
{
fact new_fact{ head, proven_span };
if (derived.insert(new_fact).second)
worklist.push_back(new_fact);
}
}

// TODO: binary-rule closure (A -> B C where B covers [i,j] and
// C covers [j,k] -> A covers [i,k]). Requires span-join over
// the derived set. Omitted here for simplicity; unit-rule
// propagation is sufficient for role-hierarchy and simple EL queries.
}
return derived;
}

} // idni namespace
5 changes: 5 additions & 0 deletions src/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,11 @@ struct grammar {
*/
lit<C, T> nt(const std::basic_string<C>& s);
const lit<C, T>& get_start() const;
// Bottom-up derivation fixpoint.
// seeds: (nonterminal_literal, position) pairs representing known memberships.
// Returns: set of all derivable (literal, span) pairs via unit-rule closure.
std::set<std::pair<lit<C,T>, std::array<size_t,2>>>
derive_all(const std::vector<std::pair<lit<C,T>, size_t>>& seeds) const;
private:
bool all_nulls(const lits<C, T>& a) const;
nonterminals<C, T>& nts;
Expand Down
82 changes: 82 additions & 0 deletions src/utility/annotated_forest.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// annotated_forest.h — per-node mutable labels on top of forest<NodeT>
// Enables EL ABox concept labels, fragment flags, and arbitrary per-node metadata.

#ifndef __IDNI__UTILITY__ANNOTATED_FOREST_H__
#define __IDNI__UTILITY__ANNOTATED_FOREST_H__

#include <map>
#include "forest.h"

namespace idni {

template <typename NodeT, typename LabelT>
struct annotated_forest {
using node = typename forest<NodeT>::node;
using nodes = typename forest<NodeT>::nodes;
using nodes_set = typename forest<NodeT>::nodes_set;
using enter_t = typename forest<NodeT>::enter_t;
using exit_t = typename forest<NodeT>::exit_t;
using revisit_t = typename forest<NodeT>::revisit_t;
using ambig_t = typename forest<NodeT>::ambig_t;

forest<NodeT>& f;
std::map<node, LabelT> labels;

explicit annotated_forest(forest<NodeT>& f_) : f(f_) {}

LabelT& label(const node& n) { return labels[n]; }
const LabelT& label(const node& n) const { return labels.at(n); }
bool has_label(const node& n) const { return labels.count(n) > 0; }

// For each edge (parent -> child), call update_fn(label(parent), label(child))
// and store result at child. Returns true if any label changed.
template <typename UpdateFn>
bool propagate(UpdateFn&& update_fn) {
bool changed = false;
f.traverse(f.root(),
[](const node&) {}, // enter: no-op
[&](const node& n, const nodes_set& csets) {
if (!has_label(n)) return;
for (auto& cset : csets)
for (auto& child : cset) {
LabelT old_lbl = has_label(child) ? label(child) : LabelT{};
LabelT new_lbl = update_fn(label(n), old_lbl);
if (!(new_lbl == old_lbl)) {
labels[child] = std::move(new_lbl);
changed = true;
}
}
},
[](const node&) { return false; }, // no revisit
[](const node&, const nodes_set& ns) { return ns; } // no ambig filter
);
return changed;
}

// Forward traversal — delegates to forest::traverse.
// Use the concrete std::function aliases so callers can pass generic lambdas.
bool traverse(const node& root,
enter_t cb_enter,
exit_t cb_exit = [](const node&, const nodes_set&){},
revisit_t cb_revisit = [](const node&){ return false; },
ambig_t cb_ambig = [](const node&, const nodes_set& ns){ return ns; }) {
return f.traverse(root, cb_enter, cb_exit, cb_revisit, cb_ambig);
}

// Backward traversal — requires build_reverse_index() to have been called.
template <typename cb_enter_t, typename cb_revisit_t>
void traverse_backward(const nodes_set& starts,
cb_enter_t cb_enter,
cb_revisit_t cb_revisit) {
f.traverse_backward(starts, cb_enter, cb_revisit);
}

template <typename cb_enter_t>
void traverse_backward(const nodes_set& starts, cb_enter_t cb_enter) {
f.traverse_backward(starts, cb_enter);
}
};

} // idni namespace

#endif // __IDNI__UTILITY__ANNOTATED_FOREST_H__
28 changes: 28 additions & 0 deletions src/utility/forest.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,31 @@ struct forest {
cb_exit_t cb_exit = NO_EXIT,
cb_revisit_t cb_revisit = NO_REVISIT,
cb_ambig_t cb_ambig = NO_AMBIG) const;

// Build reverse adjacency index: child -> set of parent nodes.
// O(|edges|) one-time cost. Must call before traverse_backward/predecessors.
void build_reverse_index();

// Invalidate reverse index (call after any structural modification).
void invalidate_reverse_index();

// Visit all direct predecessors of node n via cb(predecessor_node).
template <typename cb_t>
void predecessors(const node& n, cb_t&& cb) const;

// Backward BFS traversal from a set of start nodes.
// cb_enter(node) called on each visited node.
// cb_revisit(node)->bool: return true to re-visit an already-seen node.
template <typename cb_enter_t, typename cb_revisit_t>
void traverse_backward(const nodes_set& starts,
cb_enter_t cb_enter,
cb_revisit_t cb_revisit) const;

// Backward traversal without revisit predicate (no-revisit by default).
template <typename cb_enter_t>
void traverse_backward(const nodes_set& starts, cb_enter_t cb_enter) const {
traverse_backward(starts, cb_enter, [](const node&){ return false; });
}
/// Replace each node with its immediate children,
/// assuming its only one pack (unambigous)
/// the caller to ensure the right order to avoid cyclic
Expand All @@ -256,6 +281,9 @@ struct forest {
std::ostream& print_data(std::ostream& os) const;
#endif
private:
std::map<node, nodes> reverse_index;
bool reverse_index_valid = false;

template <typename cb_enter_t, typename cb_exit_t,
typename cb_revisit_t, typename cb_ambig_t>
bool _traverse(const node_graph& g, const node& root,
Expand Down
51 changes: 51 additions & 0 deletions src/utility/forest.tmpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,57 @@ bool forest<NodeT>::replace_node(graph& g, const node& torepl,
return gchange;
}

template <typename NodeT>
void forest<NodeT>::build_reverse_index() {
reverse_index.clear();
for (auto& [parent, children_sets] : g)
for (auto& child_nodes : children_sets)
for (auto& child : child_nodes)
reverse_index[child].push_back(parent);
reverse_index_valid = true;
}

template <typename NodeT>
void forest<NodeT>::invalidate_reverse_index() {
reverse_index.clear();
reverse_index_valid = false;
}

template <typename NodeT>
template <typename cb_t>
void forest<NodeT>::predecessors(const node& n, cb_t&& cb) const {
assert(reverse_index_valid);
auto it = reverse_index.find(n);
if (it != reverse_index.end())
for (auto& parent : it->second)
cb(parent);
}

template <typename NodeT>
template <typename cb_enter_t, typename cb_revisit_t>
void forest<NodeT>::traverse_backward(const nodes_set& starts,
cb_enter_t cb_enter, cb_revisit_t cb_revisit) const
{
assert(reverse_index_valid);
std::set<node> visited;
std::deque<node> queue;
for (auto& pack : starts)
for (auto& n : pack)
if (visited.insert(n).second)
queue.push_back(n);
while (!queue.empty()) {
node n = queue.front();
queue.pop_front();
cb_enter(n);
auto it = reverse_index.find(n);
if (it != reverse_index.end())
for (auto& parent : it->second)
if (visited.find(parent) == visited.end() || cb_revisit(parent))
if (visited.insert(parent).second)
queue.push_back(parent);
}
}

#ifdef DEBUG
template <typename NodeT>
std::ostream& forest<NodeT>::print_data(std::ostream& os) const {
Expand Down
Loading