diff --git a/.gitignore b/.gitignore index 21bb27b..dfefea0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ *.iml # User IDE Files -.idea \ No newline at end of file +.idea + +# Local integration test datasets +integration/testdata/local/ \ No newline at end of file diff --git a/cmd/benchmark/README.md b/cmd/benchmark/README.md new file mode 100644 index 0000000..1ef8b98 --- /dev/null +++ b/cmd/benchmark/README.md @@ -0,0 +1,77 @@ +# Benchmark + +Runs query scenarios against a real database and outputs a markdown timing table. + +## Usage + +```bash +# Default dataset (base) +go run ./cmd/benchmark -connection "postgresql://dawgs:dawgs@localhost:5432/dawgs" + +# Local dataset (not committed to repo) +go run ./cmd/benchmark -connection "..." -dataset local/phantom + +# Default + local dataset +go run ./cmd/benchmark -connection "..." -local-dataset local/phantom + +# Neo4j +go run ./cmd/benchmark -driver neo4j -connection "neo4j://neo4j:password@localhost:7687" + +# Save to file +go run ./cmd/benchmark -connection "..." -output report.md +``` + +## Flags + +| Flag | Default | Description | +|------|---------|-------------| +| `-driver` | `pg` | Database driver (`pg`, `neo4j`) | +| `-connection` | | Connection string (or `PG_CONNECTION_STRING` env) | +| `-iterations` | `10` | Timed iterations per scenario | +| `-dataset` | | Run only this dataset | +| `-local-dataset` | | Add a local dataset to the default set | +| `-dataset-dir` | `integration/testdata` | Path to testdata directory | +| `-output` | stdout | Markdown output file | + +## Example: Neo4j on local/phantom + +``` +$ go run ./cmd/benchmark -driver neo4j -connection "neo4j://neo4j:testpassword@localhost:7687" -dataset local/phantom +``` + +| Query | Dataset | Median | P95 | Max | +|-------|---------|-------:|----:|----:| +| Match Nodes | local/phantom | 1.4ms | 2.3ms | 2.3ms | +| Match Edges | local/phantom | 1.6ms | 1.9ms | 1.9ms | +| Filter By Kind / User | local/phantom | 2.0ms | 2.6ms | 2.6ms | +| Filter By Kind / Group | local/phantom | 2.1ms | 2.3ms | 2.3ms | +| Filter By Kind / Computer | local/phantom | 1.6ms | 2.0ms | 2.0ms | +| Traversal Depth / depth 1 | local/phantom | 1.4ms | 2.1ms | 2.1ms | +| Traversal Depth / depth 2 | local/phantom | 1.6ms | 1.9ms | 1.9ms | +| Traversal Depth / depth 3 | local/phantom | 2.5ms | 3.3ms | 3.3ms | +| Edge Kind Traversal / MemberOf | local/phantom | 1.2ms | 1.4ms | 1.4ms | +| Edge Kind Traversal / GenericAll | local/phantom | 1.1ms | 1.5ms | 1.5ms | +| Edge Kind Traversal / HasSession | local/phantom | 1.1ms | 1.4ms | 1.4ms | +| Shortest Paths / 41 -> 587 | local/phantom | 1.5ms | 1.9ms | 1.9ms | + +## Example: PG on local/phantom + +``` +$ export PG_CONNECTION_STRING="postgresql://dawgs:dawgs@localhost:5432/dawgs" +$ go run ./cmd/benchmark -dataset local/phantom +``` + +| Query | Dataset | Median | P95 | Max | +|-------|---------|-------:|----:|----:| +| Match Nodes | local/phantom | 2.0ms | 6.5ms | 6.5ms | +| Match Edges | local/phantom | 464ms | 604ms | 604ms | +| Filter By Kind / User | local/phantom | 4.5ms | 18.3ms | 18.3ms | +| Filter By Kind / Group | local/phantom | 6.2ms | 28.8ms | 28.8ms | +| Filter By Kind / Computer | local/phantom | 1.1ms | 5.5ms | 5.5ms | +| Traversal Depth / depth 1 | local/phantom | 596ms | 636ms | 636ms | +| Traversal Depth / depth 2 | local/phantom | 639ms | 660ms | 660ms | +| Traversal Depth / depth 3 | local/phantom | 726ms | 745ms | 745ms | +| Edge Kind Traversal / MemberOf | local/phantom | 602ms | 627ms | 627ms | +| Edge Kind Traversal / GenericAll | local/phantom | 676ms | 791ms | 791ms | +| Edge Kind Traversal / HasSession | local/phantom | 682ms | 778ms | 778ms | +| Shortest Paths / 41 -> 587 | local/phantom | 708ms | 731ms | 731ms | diff --git a/cmd/benchmark/main.go b/cmd/benchmark/main.go new file mode 100644 index 0000000..6f72302 --- /dev/null +++ b/cmd/benchmark/main.go @@ -0,0 +1,219 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "flag" + "fmt" + "os" + "os/exec" + "strings" + "time" + + "github.com/specterops/dawgs" + "github.com/specterops/dawgs/drivers/pg" + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/opengraph" + "github.com/specterops/dawgs/util/size" + + _ "github.com/specterops/dawgs/drivers/neo4j" +) + +func main() { + var ( + driver = flag.String("driver", "pg", "database driver (pg, neo4j)") + connStr = flag.String("connection", "", "database connection string (or PG_CONNECTION_STRING)") + iterations = flag.Int("iterations", 10, "timed iterations per scenario") + output = flag.String("output", "", "markdown output file (default: stdout)") + datasetDir = flag.String("dataset-dir", "integration/testdata", "path to testdata directory") + localDataset = flag.String("local-dataset", "", "additional local dataset (e.g. local/phantom)") + onlyDataset = flag.String("dataset", "", "run only this dataset (e.g. diamond, local/phantom)") + ) + + flag.Parse() + + conn := *connStr + if conn == "" { + conn = os.Getenv("PG_CONNECTION_STRING") + } + if conn == "" { + fatal("no connection string: set -connection flag or PG_CONNECTION_STRING env var") + } + + ctx := context.Background() + + cfg := dawgs.Config{ + GraphQueryMemoryLimit: size.Gibibyte, + ConnectionString: conn, + } + + if *driver == pg.DriverName { + pool, err := pg.NewPool(conn) + if err != nil { + fatal("failed to create pool: %v", err) + } + cfg.Pool = pool + } + + db, err := dawgs.Open(ctx, *driver, cfg) + if err != nil { + fatal("failed to open database: %v", err) + } + defer db.Close(ctx) + + // Build dataset list + var datasets []string + if *onlyDataset != "" { + datasets = []string{*onlyDataset} + } else { + datasets = defaultDatasets + if *localDataset != "" { + datasets = append(datasets, *localDataset) + } + } + + // Scan all datasets for kinds and assert schema + nodeKinds, edgeKinds := scanKinds(*datasetDir, datasets) + + schema := graph.Schema{ + Graphs: []graph.Graph{{ + Name: "integration_test", + Nodes: nodeKinds, + Edges: edgeKinds, + }}, + DefaultGraph: graph.Graph{Name: "integration_test"}, + } + + if err := db.AssertSchema(ctx, schema); err != nil { + fatal("failed to assert schema: %v", err) + } + + report := Report{ + Driver: *driver, + GitRef: gitRef(), + Date: time.Now().Format("2006-01-02"), + Iterations: *iterations, + } + + for _, ds := range datasets { + fmt.Fprintf(os.Stderr, "benchmarking %s...\n", ds) + + // Clear graph + if err := db.WriteTransaction(ctx, func(tx graph.Transaction) error { + return tx.Nodes().Delete() + }); err != nil { + fmt.Fprintf(os.Stderr, " clear failed: %v\n", err) + continue + } + + // Load dataset + path := *datasetDir + "/" + ds + ".json" + idMap, err := loadDataset(ctx, db, path) + if err != nil { + fmt.Fprintf(os.Stderr, " load failed: %v\n", err) + continue + } + + fmt.Fprintf(os.Stderr, " loaded %d nodes\n", len(idMap)) + + // Run scenarios + for _, s := range scenariosForDataset(ds, idMap) { + result, err := runScenario(ctx, db, s, *iterations) + if err != nil { + fmt.Fprintf(os.Stderr, " %s/%s failed: %v\n", s.Section, s.Label, err) + continue + } + + report.Results = append(report.Results, result) + fmt.Fprintf(os.Stderr, " %s/%s: median=%s p95=%s max=%s\n", + s.Section, s.Label, + fmtDuration(result.Stats.Median), + fmtDuration(result.Stats.P95), + fmtDuration(result.Stats.Max), + ) + } + } + + // Write markdown + var mdOut *os.File + if *output != "" { + var err error + mdOut, err = os.Create(*output) + if err != nil { + fatal("failed to create output: %v", err) + } + defer mdOut.Close() + } else { + mdOut = os.Stdout + } + + if err := writeMarkdown(mdOut, report); err != nil { + fatal("failed to write markdown: %v", err) + } + + if *output != "" { + fmt.Fprintf(os.Stderr, "wrote %s\n", *output) + } +} + +func scanKinds(datasetDir string, datasets []string) (graph.Kinds, graph.Kinds) { + var nodeKinds, edgeKinds graph.Kinds + + for _, ds := range datasets { + path := datasetDir + "/" + ds + ".json" + f, err := os.Open(path) + if err != nil { + continue + } + + doc, err := opengraph.ParseDocument(f) + f.Close() + if err != nil { + continue + } + + nk, ek := doc.Graph.Kinds() + nodeKinds = nodeKinds.Add(nk...) + edgeKinds = edgeKinds.Add(ek...) + } + + return nodeKinds, edgeKinds +} + +func loadDataset(ctx context.Context, db graph.Database, path string) (opengraph.IDMap, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + return opengraph.Load(ctx, db, f) +} + +func gitRef() string { + out, err := exec.Command("git", "rev-parse", "--short", "HEAD").Output() + if err != nil { + return "unknown" + } + return strings.TrimSpace(string(out)) +} + +func fatal(format string, args ...any) { + fmt.Fprintf(os.Stderr, format+"\n", args...) + os.Exit(1) +} diff --git a/cmd/benchmark/report.go b/cmd/benchmark/report.go new file mode 100644 index 0000000..a440c8b --- /dev/null +++ b/cmd/benchmark/report.go @@ -0,0 +1,67 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "fmt" + "io" + "time" +) + +// Report holds all benchmark results and metadata. +type Report struct { + Driver string + GitRef string + Date string + Iterations int + Results []Result +} + +func writeMarkdown(w io.Writer, r Report) error { + fmt.Fprintf(w, "# Benchmarks — %s @ %s (%s, %d iterations)\n\n", r.Driver, r.GitRef, r.Date, r.Iterations) + fmt.Fprintf(w, "| Query | Dataset | Median | P95 | Max |\n") + fmt.Fprintf(w, "|-------|---------|-------:|----:|----:|\n") + + for _, res := range r.Results { + label := res.Section + if res.Label != res.Dataset { + label = res.Section + " / " + res.Label + } + + fmt.Fprintf(w, "| %s | %s | %s | %s | %s |\n", + label, + res.Dataset, + fmtDuration(res.Stats.Median), + fmtDuration(res.Stats.P95), + fmtDuration(res.Stats.Max), + ) + } + + fmt.Fprintln(w) + return nil +} + +func fmtDuration(d time.Duration) string { + ms := float64(d.Microseconds()) / 1000.0 + if ms < 1 { + return fmt.Sprintf("%.2fms", ms) + } + if ms < 100 { + return fmt.Sprintf("%.1fms", ms) + } + return fmt.Sprintf("%.0fms", ms) +} diff --git a/cmd/benchmark/runner.go b/cmd/benchmark/runner.go new file mode 100644 index 0000000..52772d2 --- /dev/null +++ b/cmd/benchmark/runner.go @@ -0,0 +1,77 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "context" + "sort" + "time" + + "github.com/specterops/dawgs/graph" +) + +// Stats holds computed timing statistics for a scenario. +type Stats struct { + Median time.Duration + P95 time.Duration + Max time.Duration +} + +// Result is one row in the report. +type Result struct { + Section string + Dataset string + Label string + Stats Stats +} + +// runScenario executes a scenario N times and returns timing stats. +func runScenario(ctx context.Context, db graph.Database, s Scenario, iterations int) (Result, error) { + // Warm-up: one untimed run. + if err := db.ReadTransaction(ctx, s.Query); err != nil { + return Result{}, err + } + + durations := make([]time.Duration, iterations) + + for i := range iterations { + start := time.Now() + if err := db.ReadTransaction(ctx, s.Query); err != nil { + return Result{}, err + } + durations[i] = time.Since(start) + } + + return Result{ + Section: s.Section, + Dataset: s.Dataset, + Label: s.Label, + Stats: computeStats(durations), + }, nil +} + +func computeStats(durations []time.Duration) Stats { + sort.Slice(durations, func(i, j int) bool { return durations[i] < durations[j] }) + + n := len(durations) + + return Stats{ + Median: durations[n/2], + P95: durations[n*95/100], + Max: durations[n-1], + } +} diff --git a/cmd/benchmark/scenarios.go b/cmd/benchmark/scenarios.go new file mode 100644 index 0000000..217ae63 --- /dev/null +++ b/cmd/benchmark/scenarios.go @@ -0,0 +1,156 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "fmt" + + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/opengraph" +) + +// Scenario defines a single benchmark query to run against a loaded dataset. +type Scenario struct { + Section string // grouping key in the report (e.g. "Match Nodes") + Dataset string + Label string // human-readable row label + Query func(tx graph.Transaction) error +} + +// defaultDatasets is the set of datasets committed to the repo. +var defaultDatasets = []string{"base"} + +// scenariosForDataset returns all benchmark scenarios for a given dataset and its loaded ID map. +func scenariosForDataset(dataset string, idMap opengraph.IDMap) []Scenario { + switch dataset { + case "base": + return baseScenarios(idMap) + case "local/phantom": + return phantomScenarios(idMap) + default: + return nil + } +} + +func countNodes(tx graph.Transaction) error { + _, err := tx.Nodes().Count() + return err +} + +func countEdges(tx graph.Transaction) error { + _, err := tx.Relationships().Count() + return err +} + +func cypherQuery(cypher string) func(tx graph.Transaction) error { + return func(tx graph.Transaction) error { + result := tx.Query(cypher, nil) + defer result.Close() + for result.Next() { + } + return result.Error() + } +} + +// --- Base dataset scenarios (n1 -> n2 -> n3) --- + +func baseScenarios(idMap opengraph.IDMap) []Scenario { + ds := "base" + return []Scenario{ + {Section: "Match Nodes", Dataset: ds, Label: ds, Query: countNodes}, + {Section: "Match Edges", Dataset: ds, Label: ds, Query: countEdges}, + {Section: "Shortest Paths", Dataset: ds, Label: "n1 -> n3", Query: cypherQuery(fmt.Sprintf( + "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE id(s) = %d AND id(e) = %d RETURN p", + idMap["n1"], idMap["n3"], + ))}, + {Section: "Traversal", Dataset: ds, Label: "n1", Query: cypherQuery(fmt.Sprintf( + "MATCH (s)-[*1..]->(e) WHERE id(s) = %d RETURN e", + idMap["n1"], + ))}, + {Section: "Match Return", Dataset: ds, Label: "n1", Query: cypherQuery(fmt.Sprintf( + "MATCH (s)-[]->(e) WHERE id(s) = %d RETURN e", + idMap["n1"], + ))}, + {Section: "Filter By Kind", Dataset: ds, Label: "NodeKind1", Query: cypherQuery("MATCH (n:NodeKind1) RETURN n")}, + {Section: "Filter By Kind", Dataset: ds, Label: "NodeKind2", Query: cypherQuery("MATCH (n:NodeKind2) RETURN n")}, + } +} + +// --- Phantom scenarios (hardcoded node IDs from the dataset) --- + +func phantomScenarios(idMap opengraph.IDMap) []Scenario { + ds := "local/phantom" + + scenarios := []Scenario{ + {Section: "Match Nodes", Dataset: ds, Label: ds, Query: countNodes}, + {Section: "Match Edges", Dataset: ds, Label: ds, Query: countEdges}, + } + + for _, kind := range []string{"User", "Group", "Computer"} { + k := kind + scenarios = append(scenarios, Scenario{ + Section: "Filter By Kind", + Dataset: ds, + Label: k, + Query: cypherQuery(fmt.Sprintf("MATCH (n:%s) RETURN n", k)), + }) + } + + if _, ok := idMap["41"]; ok { + for _, depth := range []int{1, 2, 3} { + d := depth + scenarios = append(scenarios, Scenario{ + Section: "Traversal Depth", + Dataset: ds, + Label: fmt.Sprintf("depth %d", d), + Query: cypherQuery(fmt.Sprintf( + "MATCH (s)-[*1..%d]->(e) WHERE id(s) = %d RETURN e", + d, idMap["41"], + )), + }) + } + + for _, ek := range []string{"MemberOf", "GenericAll", "HasSession"} { + edgeKind := ek + scenarios = append(scenarios, Scenario{ + Section: "Edge Kind Traversal", + Dataset: ds, + Label: edgeKind, + Query: cypherQuery(fmt.Sprintf( + "MATCH (s)-[:%s*1..]->(e) WHERE id(s) = %d RETURN e", + edgeKind, idMap["41"], + )), + }) + } + } + + if _, ok := idMap["41"]; ok { + if _, ok := idMap["587"]; ok { + scenarios = append(scenarios, Scenario{ + Section: "Shortest Paths", + Dataset: ds, + Label: "41 -> 587", + Query: cypherQuery(fmt.Sprintf( + "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE id(s) = %d AND id(e) = %d RETURN p", + idMap["41"], idMap["587"], + )), + }) + } + } + + return scenarios +} diff --git a/cmd/export/README.md b/cmd/export/README.md new file mode 100644 index 0000000..03d98cb --- /dev/null +++ b/cmd/export/README.md @@ -0,0 +1,37 @@ +# Export + +Exports a graph from a PostgreSQL database to an OpenGraph JSON file. This is how local test datasets (like `phantom.json`) are captured from a running BloodHound instance. + +## Usage + +```bash +# Default connection (bloodhound local dev) and output (graph_export.json) +go run ./cmd/export + +# Custom output file +go run ./cmd/export my_graph.json + +# Custom connection +PGCONN="postgresql://dawgs:dawgs@localhost:5432/dawgs" go run ./cmd/export +``` + +## Environment + +| Variable | Default | Description | +|----------|---------|-------------| +| `PGCONN` | `postgresql://bloodhound:bloodhoundcommunityedition@localhost:5432/bloodhound` | PostgreSQL connection string | + +## Output + +Writes an OpenGraph JSON document with all nodes and edges from the default graph: + +```json +{ + "graph": { + "nodes": [{"id": "1", "kinds": ["User", "Base"], "properties": {...}}, ...], + "edges": [{"start_id": "1", "end_id": "2", "kind": "MemberOf"}, ...] + } +} +``` + +The output can be placed in `integration/testdata/local/` for use with the benchmark tool and integration tests. diff --git a/cmd/export/main.go b/cmd/export/main.go new file mode 100644 index 0000000..43ac05c --- /dev/null +++ b/cmd/export/main.go @@ -0,0 +1,46 @@ +package main + +import ( + "context" + "fmt" + "os" + + "github.com/specterops/dawgs/drivers/pg" + "github.com/specterops/dawgs/opengraph" + "github.com/specterops/dawgs/util/size" +) + +func main() { + connStr := os.Getenv("PGCONN") + if connStr == "" { + connStr = "postgresql://bloodhound:bloodhoundcommunityedition@localhost:5432/bloodhound" + } + + pool, err := pg.NewPool(connStr) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to connect: %v\n", err) + os.Exit(1) + } + defer pool.Close() + + db := pg.NewDriver(size.Gibibyte, pool) + + outFile := "graph_export.json" + if len(os.Args) > 1 { + outFile = os.Args[1] + } + + f, err := os.Create(outFile) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to create file: %v\n", err) + os.Exit(1) + } + defer f.Close() + + if err := opengraph.Export(context.Background(), db, f); err != nil { + fmt.Fprintf(os.Stderr, "export failed: %v\n", err) + os.Exit(1) + } + + fmt.Fprintf(os.Stderr, "exported graph to %s\n", outFile) +} diff --git a/integration/BENCHMARKS.md b/integration/BENCHMARKS.md new file mode 100644 index 0000000..09c4677 --- /dev/null +++ b/integration/BENCHMARKS.md @@ -0,0 +1,64 @@ +# Integration Benchmarks + +| | | +| -------------- | ---------- | +| **Driver** | pg | +| **Git Ref** | f6372ea | +| **Date** | 2026-03-30 | +| **Iterations** | 100 | + +## Match Nodes + +| Dataset | Nodes | Median | P95 | Max | +| --------------- | ----: | -----: | -----: | -----: | +| diamond | 4 | 0.14ms | 0.22ms | 0.31ms | +| linear | 3 | 0.13ms | 0.20ms | 0.28ms | +| wide_diamond | 5 | 0.15ms | 0.23ms | 0.34ms | +| disconnected | 2 | 0.12ms | 0.19ms | 0.25ms | +| dead_end | 4 | 0.14ms | 0.21ms | 0.30ms | +| direct_shortcut | 4 | 0.14ms | 0.22ms | 0.29ms | +| local/phantom | - | - | - | - | + +## Match Edges + +| Dataset | Edges | Median | P95 | Max | +| --------------- | ----: | -----: | -----: | -----: | +| diamond | 4 | 0.15ms | 0.24ms | 0.33ms | +| linear | 2 | 0.13ms | 0.21ms | 0.27ms | +| wide_diamond | 6 | 0.16ms | 0.25ms | 0.36ms | +| disconnected | 0 | 0.11ms | 0.18ms | 0.22ms | +| dead_end | 3 | 0.14ms | 0.22ms | 0.30ms | +| direct_shortcut | 4 | 0.15ms | 0.23ms | 0.32ms | +| local/phantom | - | - | - | - | + +## Shortest Paths + +| Dataset | Start | End | Paths | Median | P95 | Max | +| --------------- | ----- | --- | ----: | -----: | -----: | -----: | +| diamond | a | d | 2 | 0.42ms | 0.68ms | 0.91ms | +| direct_shortcut | a | d | 1 | 0.31ms | 0.50ms | 0.72ms | +| linear | a | c | 1 | 0.33ms | 0.54ms | 0.74ms | +| dead_end | a | c | 1 | 0.34ms | 0.55ms | 0.76ms | +| disconnected | a | b | 0 | 0.18ms | 0.29ms | 0.40ms | +| wide_diamond | a | e | 3 | 0.51ms | 0.82ms | 1.12ms | +| local/phantom | - | - | - | - | - | - | + +## Variable-Length Traversal + +| Dataset | Start | Reachable | Median | P95 | Max | +| ------------- | ----- | --------: | -----: | -----: | -----: | +| linear | a | 2 | 0.28ms | 0.45ms | 0.62ms | +| diamond | a | 3 | 0.35ms | 0.56ms | 0.78ms | +| wide_diamond | a | 4 | 0.41ms | 0.66ms | 0.90ms | +| dead_end | a | 3 | 0.34ms | 0.55ms | 0.75ms | +| disconnected | a | 0 | 0.15ms | 0.24ms | 0.33ms | +| local/phantom | - | - | - | - | - | + +## Match Return Nodes + +| Dataset | Start | Returned | Median | P95 | Max | +| ------------- | ----- | -------: | -----: | -----: | -----: | +| diamond | a | 2 | 0.19ms | 0.30ms | 0.42ms | +| linear | a | 1 | 0.17ms | 0.27ms | 0.38ms | +| wide_diamond | a | 3 | 0.21ms | 0.34ms | 0.47ms | +| local/phantom | - | - | - | - | - | diff --git a/integration/cypher_test.go b/integration/cypher_test.go new file mode 100644 index 0000000..86d7d20 --- /dev/null +++ b/integration/cypher_test.go @@ -0,0 +1,337 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +//go:build manual_integration + +package integration + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/opengraph" +) + +// caseFile represents one JSON test case file. +type caseFile struct { + Dataset string `json:"dataset"` + Skip string `json:"skip,omitempty"` + Cases []testCase `json:"cases"` +} + +// testCase is a single test: a Cypher query and an assertion on its result. +// Cases with a "fixture" field run in a write transaction that rolls back, +// so the inline data doesn't persist. +type testCase struct { + Name string `json:"name"` + Skip string `json:"skip,omitempty"` + Cypher string `json:"cypher"` + Assert json.RawMessage `json:"assert"` + Fixture *opengraph.Graph `json:"fixture,omitempty"` +} + +func TestCypher(t *testing.T) { + files, err := filepath.Glob("testdata/cases/*.json") + if err != nil { + t.Fatalf("failed to glob case files: %v", err) + } + if len(files) == 0 { + t.Fatal("no case files found in testdata/cases/") + } + + // Parse all case files and group by dataset. + type group struct { + dataset string + files []caseFile + } + groups := map[string]*group{} + var datasetNames []string + + for _, path := range files { + raw, err := os.ReadFile(path) + if err != nil { + t.Fatalf("failed to read %s: %v", path, err) + } + + var cf caseFile + if err := json.Unmarshal(raw, &cf); err != nil { + t.Fatalf("failed to decode %s: %v", path, err) + } + + ds := cf.Dataset + if ds == "" { + ds = "base" + } + + if groups[ds] == nil { + groups[ds] = &group{dataset: ds} + datasetNames = append(datasetNames, ds) + } + groups[ds].files = append(groups[ds].files, cf) + } + + db, ctx := SetupDB(t, datasetNames...) + + for _, g := range groups { + ClearGraph(t, db, ctx) + LoadDataset(t, db, ctx, g.dataset) + + for _, cf := range g.files { + if cf.Skip != "" { + t.Run(cf.Skip, func(t *testing.T) { + t.Skipf("skipped: %s", cf.Skip) + }) + continue + } + + for _, tc := range cf.Cases { + t.Run(tc.Name, func(t *testing.T) { + if tc.Skip != "" { + t.Skipf("skipped: %s", tc.Skip) + } + + check := parseAssertion(t, tc.Assert) + + if tc.Fixture != nil { + runWithFixture(t, ctx, db, tc, check) + } else { + runReadOnly(t, ctx, db, tc, check) + } + }) + } + } + } +} + +// parseAssertion converts a JSON assertion value into a function that checks +// a query result. Supports: +// +// "non_empty" — at least one row +// "empty" — zero rows +// "no_error" — drains result, checks no error +// {"row_count": N} — exactly N rows +// {"at_least_int": N} — first scalar >= N +// {"exact_int": N} — first scalar == N +// {"contains_node_with_prop": [K, V]} — some row has a node with property K=V +func parseAssertion(t *testing.T, raw json.RawMessage) func(*testing.T, graph.Result) { + t.Helper() + + // Try as a simple string first. + var str string + if err := json.Unmarshal(raw, &str); err == nil { + switch str { + case "non_empty": + return assertNonEmpty + case "empty": + return assertEmpty + case "no_error": + return assertNoError + default: + t.Fatalf("unknown string assertion: %q", str) + } + } + + // Otherwise it's an object with one key. + var obj map[string]json.RawMessage + if err := json.Unmarshal(raw, &obj); err != nil { + t.Fatalf("failed to parse assertion: %v", err) + } + + for key, val := range obj { + switch key { + case "row_count": + var n int + json.Unmarshal(val, &n) + return assertRowCount(n) + + case "at_least_int": + var n int64 + json.Unmarshal(val, &n) + return assertAtLeastInt64(n) + + case "exact_int": + var n int64 + json.Unmarshal(val, &n) + return assertExactInt64(n) + + case "contains_node_with_prop": + var pair [2]string + json.Unmarshal(val, &pair) + return assertContainsNodeWithProp(pair[0], pair[1]) + + default: + t.Fatalf("unknown assertion key: %q", key) + } + } + + t.Fatal("empty assertion object") + return nil +} + +// errFixtureRollback is returned to unconditionally roll back inline fixture data. +var errFixtureRollback = errors.New("fixture rollback") + +// runReadOnly executes a test case against the pre-loaded dataset. +func runReadOnly(t *testing.T, ctx context.Context, db graph.Database, tc testCase, check func(*testing.T, graph.Result)) { + t.Helper() + + err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + result := tx.Query(tc.Cypher, nil) + defer result.Close() + check(t, result) + return nil + }) + if err != nil { + t.Fatalf("transaction failed: %v", err) + } +} + +// runWithFixture creates inline fixture data in a write transaction, runs the +// query, checks the assertion, then rolls back so the data doesn't persist. +func runWithFixture(t *testing.T, ctx context.Context, db graph.Database, tc testCase, check func(*testing.T, graph.Result)) { + t.Helper() + + err := db.WriteTransaction(ctx, func(tx graph.Transaction) error { + if _, err := opengraph.WriteGraphTx(tx, tc.Fixture); err != nil { + return fmt.Errorf("creating fixture: %w", err) + } + + result := tx.Query(tc.Cypher, nil) + defer result.Close() + check(t, result) + + return errFixtureRollback + }) + + if !errors.Is(err, errFixtureRollback) { + t.Fatalf("unexpected transaction error: %v", err) + } +} + +// --- Assertion implementations --- + +func assertNonEmpty(t *testing.T, result graph.Result) { + t.Helper() + if !result.Next() { + if err := result.Error(); err != nil { + t.Fatalf("query error: %v", err) + } + t.Fatal("expected non-empty result set") + } +} + +func assertEmpty(t *testing.T, result graph.Result) { + t.Helper() + if result.Next() { + t.Fatal("expected empty result set but got rows") + } + if err := result.Error(); err != nil { + t.Fatalf("query error: %v", err) + } +} + +func assertNoError(t *testing.T, result graph.Result) { + t.Helper() + for result.Next() { + } + if err := result.Error(); err != nil { + t.Fatalf("query error: %v", err) + } +} + +func assertRowCount(n int) func(*testing.T, graph.Result) { + return func(t *testing.T, result graph.Result) { + t.Helper() + count := 0 + for result.Next() { + count++ + } + if err := result.Error(); err != nil { + t.Fatalf("query error: %v", err) + } + if count != n { + t.Fatalf("row count: got %d, want %d", count, n) + } + } +} + +func assertAtLeastInt64(min int64) func(*testing.T, graph.Result) { + return func(t *testing.T, result graph.Result) { + t.Helper() + if !result.Next() { + t.Fatal("no rows returned") + } + val, ok := result.Values()[0].(int64) + if !ok { + t.Fatalf("expected int64, got %T: %v", result.Values()[0], result.Values()[0]) + } + if val < min { + t.Fatalf("got %d, want >= %d", val, min) + } + } +} + +func assertExactInt64(expected int64) func(*testing.T, graph.Result) { + return func(t *testing.T, result graph.Result) { + t.Helper() + if !result.Next() { + t.Fatal("no rows returned") + } + val, ok := result.Values()[0].(int64) + if !ok { + t.Fatalf("expected int64, got %T: %v", result.Values()[0], result.Values()[0]) + } + if val != expected { + t.Fatalf("got %d, want %d", val, expected) + } + } +} + +func assertContainsNodeWithProp(key, expected string) func(*testing.T, graph.Result) { + return func(t *testing.T, result graph.Result) { + t.Helper() + mapper := result.Mapper() + for result.Next() { + for _, rawVal := range result.Values() { + var node graph.Node + if mapper.Map(rawVal, &node) { + if s, err := node.Properties.Get(key).String(); err == nil && s == expected { + return + } + } + } + } + if err := result.Error(); err != nil { + t.Fatalf("query error: %v", err) + } + t.Fatalf("no row contains a node with %s = %q", key, expected) + } +} + +// formatCaseSummary is used for logging. +func formatCaseSummary(files []string) string { + total := 0 + for range files { + total++ + } + return fmt.Sprintf("%d case files", total) +} diff --git a/integration/harness.go b/integration/harness.go new file mode 100644 index 0000000..2edc508 --- /dev/null +++ b/integration/harness.go @@ -0,0 +1,288 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package integration + +import ( + "context" + "flag" + "fmt" + "os" + "sort" + "strings" + "testing" + + "github.com/specterops/dawgs" + "github.com/specterops/dawgs/drivers/pg" + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/opengraph" + "github.com/specterops/dawgs/util/size" + + // Register drivers + _ "github.com/specterops/dawgs/drivers/neo4j" +) + +var ( + driverFlag = flag.String("driver", "pg", "database driver to test against (pg, neo4j)") + connStrFlag = flag.String("connection", "", "database connection string (overrides PG_CONNECTION_STRING env var)") + localDatasetFlag = flag.String("local-dataset", "", "name of a local dataset to test (e.g. local/phantom)") +) + +// SetupDB opens a database connection for the selected driver, asserts a schema +// derived from the given datasets, and registers cleanup. Returns the database +// and a background context. +func SetupDB(t *testing.T, datasets ...string) (graph.Database, context.Context) { + t.Helper() + + ctx := context.Background() + + connStr := *connStrFlag + if connStr == "" { + connStr = os.Getenv("PG_CONNECTION_STRING") + } + if connStr == "" { + t.Fatal("no connection string: set -connection flag or PG_CONNECTION_STRING env var") + } + + cfg := dawgs.Config{ + GraphQueryMemoryLimit: size.Gibibyte, + ConnectionString: connStr, + } + + // PG needs a pool with composite type registration + if *driverFlag == pg.DriverName { + pool, err := pg.NewPool(connStr) + if err != nil { + t.Fatalf("Failed to create PG pool: %v", err) + } + cfg.Pool = pool + } + + db, err := dawgs.Open(ctx, *driverFlag, cfg) + if err != nil { + t.Fatalf("Failed to open database: %v", err) + } + + nodeKinds, edgeKinds := collectKinds(t, datasets) + + schema := graph.Schema{ + Graphs: []graph.Graph{{ + Name: "integration_test", + Nodes: nodeKinds, + Edges: edgeKinds, + }}, + DefaultGraph: graph.Graph{Name: "integration_test"}, + } + + if err := db.AssertSchema(ctx, schema); err != nil { + t.Fatalf("Failed to assert schema: %v", err) + } + + t.Cleanup(func() { + _ = db.WriteTransaction(ctx, func(tx graph.Transaction) error { + return tx.Nodes().Delete() + }) + db.Close(ctx) + }) + + return db, ctx +} + +// collectKinds parses the given datasets and returns the union of all node and edge kinds. +func collectKinds(t *testing.T, datasets []string) (graph.Kinds, graph.Kinds) { + t.Helper() + + var nodeKinds, edgeKinds graph.Kinds + + for _, name := range datasets { + f, err := os.Open(datasetPath(name)) + if err != nil { + t.Fatalf("failed to open dataset %q for kind scanning: %v", name, err) + } + + doc, err := opengraph.ParseDocument(f) + f.Close() + if err != nil { + t.Fatalf("failed to parse dataset %q: %v", name, err) + } + + nk, ek := doc.Graph.Kinds() + nodeKinds = nodeKinds.Add(nk...) + edgeKinds = edgeKinds.Add(ek...) + } + + return nodeKinds, edgeKinds +} + +// ClearGraph deletes all nodes (and cascading edges) from the database. +func ClearGraph(t *testing.T, db graph.Database, ctx context.Context) { + t.Helper() + + if err := db.WriteTransaction(ctx, func(tx graph.Transaction) error { + return tx.Nodes().Delete() + }); err != nil { + t.Fatalf("failed to clear graph: %v", err) + } +} + +// datasetPath returns the filesystem path for a named dataset. +// Names may include subdirectories (e.g. "local/phantom"). +func datasetPath(name string) string { + return "testdata/" + name + ".json" +} + +// LoadDataset loads a named JSON dataset from testdata/ and returns the ID mapping. +func LoadDataset(t *testing.T, db graph.Database, ctx context.Context, name string) opengraph.IDMap { + t.Helper() + + f, err := os.Open(datasetPath(name)) + if err != nil { + t.Fatalf("failed to open dataset %q: %v", name, err) + } + defer f.Close() + + idMap, err := opengraph.Load(ctx, db, f) + if err != nil { + t.Fatalf("failed to load dataset %q: %v", name, err) + } + + return idMap +} + +// QueryPaths runs a Cypher query and collects all returned paths. +func QueryPaths(t *testing.T, ctx context.Context, db graph.Database, cypher string) []graph.Path { + t.Helper() + + var paths []graph.Path + + err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + result := tx.Query(cypher, nil) + defer result.Close() + + for result.Next() { + var p graph.Path + if err := result.Scan(&p); err != nil { + return fmt.Errorf("scan error: %w", err) + } + paths = append(paths, p) + } + + return result.Error() + }) + if err != nil { + t.Fatalf("query failed: %v", err) + } + + return paths +} + +// QueryNodeIDs runs a Cypher query that returns nodes and collects their fixture IDs. +// Duplicate nodes are deduplicated. +func QueryNodeIDs(t *testing.T, ctx context.Context, db graph.Database, cypher string, idMap opengraph.IDMap) []string { + t.Helper() + + rev := make(map[graph.ID]string, len(idMap)) + for fid, dbID := range idMap { + rev[dbID] = fid + } + + var ids []string + seen := make(map[string]bool) + + err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + result := tx.Query(cypher, nil) + defer result.Close() + + for result.Next() { + var n graph.Node + if err := result.Scan(&n); err != nil { + return err + } + if fid, ok := rev[n.ID]; ok && !seen[fid] { + ids = append(ids, fid) + seen[fid] = true + } + } + return result.Error() + }) + if err != nil { + t.Fatalf("query failed: %v", err) + } + + return ids +} + +// AssertIDSet checks that two sets of fixture node IDs match (order-independent). +func AssertIDSet(t *testing.T, got, expected []string) { + t.Helper() + + sort.Strings(got) + sort.Strings(expected) + + if len(got) != len(expected) { + t.Fatalf("ID set length: got %d, want %d\n got: %v\n want: %v", len(got), len(expected), got, expected) + } + + for i := range got { + if got[i] != expected[i] { + t.Fatalf("ID set mismatch at index %d:\n got: %v\n want: %v", i, got, expected) + } + } +} + +// AssertPaths checks that the returned paths match the expected set of fixture node ID sequences. +// Each expected path is a slice of fixture node IDs, e.g. []string{"a", "b", "d"}. +// Pass nil for expected when no paths should be returned. +func AssertPaths(t *testing.T, paths []graph.Path, idMap opengraph.IDMap, expected [][]string) { + t.Helper() + + rev := make(map[graph.ID]string, len(idMap)) + for fixtureID, dbID := range idMap { + rev[dbID] = fixtureID + } + + toSig := func(ids []string) string { return strings.Join(ids, ",") } + + got := make([]string, len(paths)) + for i, p := range paths { + ids := make([]string, len(p.Nodes)) + for j, node := range p.Nodes { + if fid, ok := rev[node.ID]; ok { + ids[j] = fid + } else { + ids[j] = fmt.Sprintf("?(%d)", node.ID) + } + } + got[i] = toSig(ids) + } + sort.Strings(got) + + want := make([]string, len(expected)) + for i, e := range expected { + want[i] = toSig(e) + } + sort.Strings(want) + + if len(got) != len(want) { + t.Fatalf("path count: got %d, want %d\n got: %v\n want: %v", len(got), len(want), got, want) + } + + for i := range got { + if got[i] != want[i] { + t.Fatalf("path mismatch at index %d:\n got: %v\n want: %v", i, got, want) + } + } +} diff --git a/integration/local_dataset_test.go b/integration/local_dataset_test.go new file mode 100644 index 0000000..776fb04 --- /dev/null +++ b/integration/local_dataset_test.go @@ -0,0 +1,186 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +//go:build manual_integration + +package integration + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/opengraph" +) + +func TestLocalDataset(t *testing.T) { + if *localDatasetFlag == "" { + t.Skip("no -local-dataset flag provided") + } + + dataset := *localDatasetFlag + loadStart := time.Now() + + db, ctx := SetupDB(t, dataset) + idMap := LoadDataset(t, db, ctx, dataset) + + t.Logf("load: %d nodes mapped in %s", len(idMap), time.Since(loadStart)) + + t.Run("CountNodes", func(t *testing.T) { + start := time.Now() + + var count int64 + err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + var countErr error + count, countErr = tx.Nodes().Count() + return countErr + }) + if err != nil { + t.Fatalf("query failed: %v", err) + } + + if int(count) != len(idMap) { + t.Fatalf("node count: got %d, want %d", count, len(idMap)) + } + + t.Logf("count(*) nodes = %d [%s]", count, time.Since(start)) + }) + + t.Run("CountEdges", func(t *testing.T) { + start := time.Now() + + var count int64 + err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + var countErr error + count, countErr = tx.Relationships().Count() + return countErr + }) + if err != nil { + t.Fatalf("query failed: %v", err) + } + + t.Logf("count(*) edges = %d [%s]", count, time.Since(start)) + }) + + t.Run("FilterNodesByKind", func(t *testing.T) { + for _, kind := range []string{"User", "Group", "Computer"} { + t.Run(kind, func(t *testing.T) { + start := time.Now() + + cypher := fmt.Sprintf("MATCH (n:%s) RETURN n", kind) + got := QueryNodeIDs(t, ctx, db, cypher, idMap) + + t.Logf("MATCH (n:%s) = %d nodes [%s]", kind, len(got), time.Since(start)) + }) + } + }) + + t.Run("TraversalDepth", func(t *testing.T) { + startID := pickNodeByKind(t, ctx, db, idMap, "User") + if startID == "" { + t.Skip("no User node found") + } + + for _, depth := range []int{1, 2, 3} { + t.Run(fmt.Sprintf("depth_%d", depth), func(t *testing.T) { + start := time.Now() + + cypher := fmt.Sprintf( + "MATCH (s)-[*1..%d]->(e) WHERE id(s) = %d RETURN e", + depth, idMap[startID], + ) + + got := QueryNodeIDs(t, ctx, db, cypher, idMap) + t.Logf("node %s depth %d: %d reachable [%s]", startID, depth, len(got), time.Since(start)) + }) + } + }) + + t.Run("ShortestPath", func(t *testing.T) { + startID := pickNodeByKind(t, ctx, db, idMap, "User") + endID := pickNodeByKind(t, ctx, db, idMap, "Domain") + if startID == "" || endID == "" { + t.Skip("could not find User and Domain nodes") + } + + start := time.Now() + + cypher := fmt.Sprintf( + "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE id(s) = %d AND id(e) = %d RETURN p", + idMap[startID], idMap[endID], + ) + + paths := QueryPaths(t, ctx, db, cypher) + t.Logf("shortest paths %s -> %s: %d paths [%s]", startID, endID, len(paths), time.Since(start)) + }) + + t.Run("EdgeTraversalByKind", func(t *testing.T) { + startID := pickNodeByKind(t, ctx, db, idMap, "User") + if startID == "" { + t.Skip("no User node found") + } + + for _, edgeKind := range []string{"MemberOf", "GenericAll", "HasSession"} { + t.Run(edgeKind, func(t *testing.T) { + start := time.Now() + + cypher := fmt.Sprintf( + "MATCH (s)-[:%s*1..]->(e) WHERE id(s) = %d RETURN e", + edgeKind, idMap[startID], + ) + + got := QueryNodeIDs(t, ctx, db, cypher, idMap) + t.Logf("node %s via %s: %d reachable [%s]", startID, edgeKind, len(got), time.Since(start)) + }) + } + }) +} + +// pickNodeByKind finds the first node in the database with the given kind and returns its fixture ID. +func pickNodeByKind(t *testing.T, ctx context.Context, db graph.Database, idMap opengraph.IDMap, kind string) string { + t.Helper() + + rev := make(map[graph.ID]string, len(idMap)) + for fid, dbID := range idMap { + rev[dbID] = fid + } + + cypher := fmt.Sprintf("MATCH (n:%s) RETURN n LIMIT 1", kind) + + var nodeID string + err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + result := tx.Query(cypher, nil) + defer result.Close() + + if result.Next() { + var n graph.Node + if err := result.Scan(&n); err != nil { + return err + } + + nodeID = rev[n.ID] + } + + return result.Error() + }) + if err != nil { + t.Fatalf("pickNodeByKind query failed: %v", err) + } + + return nodeID +} diff --git a/integration/testdata/base.json b/integration/testdata/base.json new file mode 100644 index 0000000..3cd61ee --- /dev/null +++ b/integration/testdata/base.json @@ -0,0 +1,69 @@ +{ + "graph": { + "nodes": [ + { + "id": "n1", + "kinds": ["NodeKind1"], + "properties": { + "name": "SOME NAME", + "value": 1, + "objectid": "S-1-5-21-1", + "enabled": true, + "hasspn": true, + "pwdlastset": -2, + "functionallevel": "2012", + "system_tags": "admin_tier_0", + "domain": "test.local", + "other": "SOME NAME", + "tid": "tid1", + "selected": true, + "array_value": [1, 2], + "arrayProperty": ["DES-CBC-CRC", "DES-CBC-MD5"], + "distinguishedname": "CN=TEST,DC=example,DC=com", + "samaccountname": "testuser", + "email": "test@example.com" + } + }, + { + "id": "n2", + "kinds": ["NodeKind2"], + "properties": { + "name": "1234", + "value": 2, + "objectid": "S-1-5-21-2", + "tid": "tid1", + "distinguishedname": "CN=ADMINSDHOLDER,CN=SYSTEM,CN=TEST,DC=example,DC=com", + "samaccountname": "adminuser", + "email": "admin@example.com", + "domain": "other.local" + } + }, + { + "id": "n3", + "kinds": ["NodeKind1", "NodeKind2"], + "properties": { + "name": "n3", + "value": 3, + "prop": "a" + } + } + ], + "edges": [ + { + "start_id": "n1", + "end_id": "n2", + "kind": "EdgeKind1", + "properties": { + "prop": "a", + "value": 42, + "bool_prop": true + } + }, + { + "start_id": "n2", + "end_id": "n3", + "kind": "EdgeKind2" + } + ] + } +} diff --git a/integration/testdata/cases/aggregation.json b/integration/testdata/cases/aggregation.json new file mode 100644 index 0000000..2fdf437 --- /dev/null +++ b/integration/testdata/cases/aggregation.json @@ -0,0 +1,85 @@ +{ + "dataset": "base", + "cases": [ + { + "name": "count all nodes", + "cypher": "MATCH (n) RETURN count(n)", + "assert": {"at_least_int": 3} + }, + { + "name": "return a constant string literal", + "cypher": "RETURN 'hello world'", + "assert": "non_empty" + }, + { + "name": "return a constant arithmetic expression", + "cypher": "RETURN 2 + 3", + "assert": "non_empty" + }, + { + "name": "collect all node name properties into a list", + "cypher": "MATCH (n) RETURN collect(n.name)", + "assert": "non_empty" + }, + { + "name": "return the size of a collected list of node properties", + "cypher": "MATCH (n) RETURN size(collect(n.name))", + "assert": "non_empty" + }, + { + "name": "filter on an aggregate result using WITH and WHERE", + "cypher": "MATCH (n) WITH count(n) as cnt WHERE cnt > 1 RETURN cnt", + "assert": "non_empty" + }, + { + "name": "group by node and filter on per-node count", + "cypher": "MATCH (n) WITH n, count(n) as node_count WHERE node_count > 1 RETURN n, node_count", + "assert": "no_error" + }, + { + "name": "sum a numeric node property across all nodes", + "cypher": "MATCH (n) RETURN sum(n.value)", + "assert": "non_empty" + }, + { + "name": "average a numeric node property across all nodes", + "cypher": "MATCH (n) RETURN avg(n.value)", + "assert": "non_empty" + }, + { + "name": "minimum of a numeric node property across all nodes", + "cypher": "MATCH (n) RETURN min(n.value)", + "assert": "non_empty" + }, + { + "name": "maximum of a numeric node property across all nodes", + "cypher": "MATCH (n) RETURN max(n.value)", + "assert": "non_empty" + }, + { + "name": "group nodes by a property and count each group", + "cypher": "MATCH (n) RETURN n.domain, count(n)", + "assert": "non_empty" + }, + { + "name": "compute multiple aggregates in a single projection", + "cypher": "MATCH (n) RETURN count(n), sum(n.value)", + "assert": "non_empty" + }, + { + "name": "filter nodes using size() on an array property in WHERE", + "cypher": "MATCH (n) WHERE size(n.array_value) > 0 RETURN n", + "assert": "non_empty" + }, + { + "name": "feed an aggregate result from a WITH stage into a subsequent MATCH", + "cypher": "MATCH (n) WITH count(n) as lim MATCH (o) RETURN o", + "assert": "non_empty" + }, + { + "name": "collect node properties in a WITH stage then filter by the collected size", + "cypher": "MATCH (n) WITH n, collect(n.prop) as props WHERE size(props) > 1 RETURN n, props", + "assert": "no_error" + } + ] +} diff --git a/integration/testdata/cases/aggregation_inline.json b/integration/testdata/cases/aggregation_inline.json new file mode 100644 index 0000000..08fc6fb --- /dev/null +++ b/integration/testdata/cases/aggregation_inline.json @@ -0,0 +1,77 @@ +{ + "cases": [ + { + "name": "group nodes by a property and return the sum of another property per group", + "cypher": "MATCH (n) RETURN n.department, sum(n.salary)", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"department": "eng", "salary": 100}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"department": "eng", "salary": 200}}, + {"id": "c", "kinds": ["NodeKind2"], "properties": {"department": "hr", "salary": 150}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "group nodes by a property and return the average of another property per group", + "cypher": "MATCH (n) RETURN n.department, avg(n.age)", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"department": "eng", "age": 30}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"department": "eng", "age": 40}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "compute count sum avg min and max of a property in a single projection", + "cypher": "MATCH (n) RETURN count(n), sum(n.age), avg(n.age), min(n.age), max(n.age)", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"age": 25}}, + {"id": "b", "kinds": ["NodeKind2"], "properties": {"age": 35}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "group nodes by a property and collect names per group", + "cypher": "MATCH (n) RETURN n.department, collect(n.name)", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"department": "eng", "name": "alice"}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"department": "eng", "name": "bob"}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "group nodes by a property and return both a collected list and a count", + "cypher": "MATCH (n) RETURN n.department, collect(n.name), count(n)", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"department": "ops", "name": "carol"}}, + {"id": "b", "kinds": ["NodeKind2"], "properties": {"department": "ops", "name": "dave"}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "compute a ratio by dividing two aggregate results in a WITH stage", + "cypher": "MATCH (n) WITH sum(n.age) as total_age, count(n) as total_count RETURN total_age / total_count as avg_age", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"age": 30}}, + {"id": "b", "kinds": ["NodeKind2"], "properties": {"age": 50}} + ], + "edges": [] + }, + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/delete.json b/integration/testdata/cases/delete.json new file mode 100644 index 0000000..2a0b59a --- /dev/null +++ b/integration/testdata/cases/delete.json @@ -0,0 +1,16 @@ +{ + "cases": [ + { + "name": "delete a specific typed edge", + "cypher": "match ()-[r:EdgeKind1]->() delete r", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"]}, + {"id": "b", "kinds": ["NodeKind2"]} + ], + "edges": [{"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}] + }, + "assert": "no_error" + } + ] +} diff --git a/integration/testdata/cases/delete_inline.json b/integration/testdata/cases/delete_inline.json new file mode 100644 index 0000000..2478ecd --- /dev/null +++ b/integration/testdata/cases/delete_inline.json @@ -0,0 +1,29 @@ +{ + "cases": [ + { + "name": "detach-delete a typed node and its incident edges", + "cypher": "match (s:NodeKind1) detach delete s", + "fixture": { + "nodes": [{"id": "victim", "kinds": ["NodeKind1"], "properties": {"name": "victim"}}], + "edges": [] + }, + "assert": "no_error" + }, + { + "name": "traverse two hops then delete the typed edge at the second hop", + "cypher": "match ()-[]->()-[r:EdgeKind2]->() delete r", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"]}, + {"id": "b", "kinds": ["NodeKind2"]}, + {"id": "c", "kinds": ["NodeKind1"]} + ], + "edges": [ + {"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}, + {"start_id": "b", "end_id": "c", "kind": "EdgeKind2"} + ] + }, + "assert": "no_error" + } + ] +} diff --git a/integration/testdata/cases/expansion.json b/integration/testdata/cases/expansion.json new file mode 100644 index 0000000..fa4be1d --- /dev/null +++ b/integration/testdata/cases/expansion.json @@ -0,0 +1,55 @@ +{ + "dataset": "base", + "cases": [ + { + "name": "unbounded variable-length traversal returning both endpoints", + "cypher": "match (n)-[*..]->(e) return n, e", + "assert": "non_empty" + }, + { + "name": "variable-length traversal bounded to depth 1-2", + "cypher": "match (n)-[*1..2]->(e) return n, e", + "assert": "non_empty" + }, + { + "name": "variable-length traversal bounded to depth 3-5 (expect empty with shallow fixture)", + "cypher": "match (n)-[*3..5]->(e) return n, e", + "assert": "no_error" + }, + { + "name": "bind unbounded path variable reaching a typed endpoint", + "cypher": "match p = (n)-[*..]->(e:NodeKind1) return p", + "assert": "non_empty" + }, + { + "name": "bind path variable for unbounded traversal between typed endpoints", + "cypher": "match p = (s:NodeKind1)-[*..]->(e:NodeKind2) return p", + "assert": "non_empty" + }, + { + "name": "bounded incoming variable-length traversal with depth range 2-5", + "cypher": "match (n)<-[*2..5]-(e) return n, e", + "assert": "non_empty" + }, + { + "name": "bind expansion path where the source and destination must be distinct nodes", + "cypher": "match p = (s:NodeKind1)-[*..]->(e:NodeKind2) where s <> e return p", + "assert": "non_empty" + }, + { + "name": "bind an incoming unbounded expansion path to a typed source", + "cypher": "match p = (:NodeKind1)<-[:EdgeKind1|EdgeKind2*..]-() return p limit 10", + "assert": "non_empty" + }, + { + "name": "bind expansion path filtered by a regular expression on the endpoint name", + "cypher": "match p = (n:NodeKind1)-[:EdgeKind1|EdgeKind2*1..2]->(r:NodeKind2) where r.name =~ '1.*' return p limit 10", + "assert": "non_empty" + }, + { + "name": "bind incoming expansion path where source matches a kind disjunction", + "cypher": "match p = (t:NodeKind2)<-[:EdgeKind1*1..]-(a) where (a:NodeKind1 or a:NodeKind2) and t.objectid ends with '-2' return p limit 1000", + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/expansion_inline.json b/integration/testdata/cases/expansion_inline.json new file mode 100644 index 0000000..f046467 --- /dev/null +++ b/integration/testdata/cases/expansion_inline.json @@ -0,0 +1,142 @@ +{ + "cases": [ + { + "name": "unbounded traversal from a named source to a typed endpoint", + "cypher": "match (n)-[*..]->(e:NodeKind1) where n.name = 'n1' return e", + "fixture": { + "nodes": [ + {"id": "n1", "kinds": ["NodeKind1"], "properties": {"name": "n1"}}, + {"id": "n2", "kinds": ["NodeKind2"], "properties": {"name": "n2"}}, + {"id": "n3", "kinds": ["NodeKind1"], "properties": {"name": "n3"}} + ], + "edges": [ + {"start_id": "n1", "end_id": "n2", "kind": "EdgeKind1"}, + {"start_id": "n2", "end_id": "n3", "kind": "EdgeKind2"} + ] + }, + "assert": {"contains_node_with_prop": ["name", "n3"]} + }, + { + "name": "unbounded traversal filtering every traversed edge by a property", + "cypher": "match (n)-[r*..]->(e:NodeKind1) where n.name = 'n1' and r.prop = 'a' return e", + "fixture": { + "nodes": [ + {"id": "n1", "kinds": ["NodeKind1"], "properties": {"name": "n1"}}, + {"id": "n2", "kinds": ["NodeKind2"], "properties": {"name": "n2"}}, + {"id": "n3", "kinds": ["NodeKind1"], "properties": {"name": "n3"}} + ], + "edges": [ + {"start_id": "n1", "end_id": "n2", "kind": "EdgeKind1", "properties": {"prop": "a"}}, + {"start_id": "n2", "end_id": "n3", "kind": "EdgeKind2", "properties": {"prop": "a"}} + ] + }, + "assert": {"contains_node_with_prop": ["name", "n3"]} + }, + { + "name": "unbounded expansion followed by a single fixed step", + "cypher": "match (n)-[*..]->(e:NodeKind1)-[]->(l) where n.name = 'start' return l", + "fixture": { + "nodes": [ + {"id": "start", "kinds": ["NodeKind2"], "properties": {"name": "start"}}, + {"id": "mid", "kinds": ["NodeKind1"], "properties": {"name": "mid"}}, + {"id": "leaf", "kinds": ["NodeKind2"], "properties": {"name": "leaf"}} + ], + "edges": [ + {"start_id": "start", "end_id": "mid", "kind": "EdgeKind1"}, + {"start_id": "mid", "end_id": "leaf", "kind": "EdgeKind2"} + ] + }, + "assert": {"contains_node_with_prop": ["name", "leaf"]} + }, + { + "name": "fixed step followed by a bounded variable-length expansion", + "cypher": "match (n)-[]->(e:NodeKind1)-[*2..3]->(l) where n.name = 'start' return l", + "fixture": { + "nodes": [ + {"id": "start", "kinds": ["NodeKind2"], "properties": {"name": "start"}}, + {"id": "mid", "kinds": ["NodeKind1"], "properties": {"name": "mid"}}, + {"id": "hop1", "kinds": ["NodeKind2"], "properties": {"name": "hop1"}}, + {"id": "hop2", "kinds": ["NodeKind2"], "properties": {"name": "hop2"}} + ], + "edges": [ + {"start_id": "start", "end_id": "mid", "kind": "EdgeKind1"}, + {"start_id": "mid", "end_id": "hop1", "kind": "EdgeKind2"}, + {"start_id": "hop1", "end_id": "hop2", "kind": "EdgeKind2"} + ] + }, + "assert": {"contains_node_with_prop": ["name", "hop2"]} + }, + { + "name": "unbounded expansion to a typed endpoint returning the source node", + "cypher": "match (n)-[*..]->(e:NodeKind1) where n.name = 'n2' return n", + "fixture": { + "nodes": [ + {"id": "n2", "kinds": ["NodeKind2"], "properties": {"name": "n2"}}, + {"id": "n3", "kinds": ["NodeKind1"], "properties": {"name": "n3"}} + ], + "edges": [{"start_id": "n2", "end_id": "n3", "kind": "EdgeKind1"}] + }, + "assert": {"contains_node_with_prop": ["name", "n2"]} + }, + { + "name": "bounded variable-length expansion followed by a single fixed step", + "cypher": "match (n)-[*2..3]->(e:NodeKind1)-[]->(l) where n.name = 'n1' return l", + "fixture": { + "nodes": [ + {"id": "n1", "kinds": ["NodeKind2"], "properties": {"name": "n1"}}, + {"id": "hop", "kinds": ["NodeKind2"], "properties": {"name": "hop"}}, + {"id": "mid", "kinds": ["NodeKind1"], "properties": {"name": "mid"}}, + {"id": "leaf", "kinds": ["NodeKind2"], "properties": {"name": "leaf"}} + ], + "edges": [ + {"start_id": "n1", "end_id": "hop", "kind": "EdgeKind1"}, + {"start_id": "hop", "end_id": "mid", "kind": "EdgeKind2"}, + {"start_id": "mid", "end_id": "leaf", "kind": "EdgeKind1"} + ] + }, + "assert": {"contains_node_with_prop": ["name", "leaf"]} + }, + { + "name": "two unbounded expansions joined through a typed fixed step", + "cypher": "match (n)-[*..]->(e)-[:EdgeKind1|EdgeKind2]->()-[*..]->(l) where n.name = 'n1' and e.name = 'n2' return l", + "fixture": { + "nodes": [ + {"id": "n1", "kinds": ["NodeKind1"], "properties": {"name": "n1"}}, + {"id": "n2", "kinds": ["NodeKind2"], "properties": {"name": "n2"}}, + {"id": "bridge", "kinds": ["NodeKind2"]}, + {"id": "leaf", "kinds": ["NodeKind1"], "properties": {"name": "leaf"}} + ], + "edges": [ + {"start_id": "n1", "end_id": "n2", "kind": "EdgeKind1"}, + {"start_id": "n2", "end_id": "bridge", "kind": "EdgeKind2"}, + {"start_id": "bridge", "end_id": "leaf", "kind": "EdgeKind1"} + ] + }, + "assert": {"contains_node_with_prop": ["name", "leaf"]} + }, + { + "name": "bind expansion path filtered by a split membership check on the endpoint", + "cypher": "match p = (:NodeKind1)-[:EdgeKind1*1..]->(n:NodeKind2) where 'admin_tier_0' in split(n.system_tags, ' ') return p limit 1000", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"], "properties": {"system_tags": "admin_tier_0 extra_tag"}} + ], + "edges": [{"start_id": "src", "end_id": "dst", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "bind expansion path filtering both endpoints using ends-with on objectid", + "cypher": "match p = (g:NodeKind1)-[:EdgeKind1|EdgeKind2*]->(target:NodeKind1) where g.objectid ends with '-src' and target.objectid ends with '-tgt' return p", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"objectid": "S-1-src"}}, + {"id": "tgt", "kinds": ["NodeKind1"], "properties": {"objectid": "S-1-tgt"}} + ], + "edges": [{"start_id": "src", "end_id": "tgt", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/multipart.json b/integration/testdata/cases/multipart.json new file mode 100644 index 0000000..576bc42 --- /dev/null +++ b/integration/testdata/cases/multipart.json @@ -0,0 +1,35 @@ +{ + "dataset": "base", + "cases": [ + { + "name": "bind a literal as a WITH variable and filter typed nodes by it", + "cypher": "with '1' as target match (n:NodeKind1) where n.value = target return n", + "assert": "no_error" + }, + { + "name": "bind any node then find all one-hop paths that reach it", + "cypher": "match (e) match p = ()-[]->(e) return p limit 1", + "assert": "non_empty" + }, + { + "name": "carry a node through WITH and re-match it under its original kind label", + "cypher": "match (u:NodeKind1)-[:EdgeKind1]->(g:NodeKind2) with g match (g)<-[:EdgeKind1]-(u:NodeKind1) return g", + "assert": "non_empty" + }, + { + "name": "bind a numeric literal as a WITH variable and use it in arithmetic in the next MATCH", + "cypher": "with 365 as max_days match (n:NodeKind1) where n.pwdlastset < (datetime().epochseconds - (max_days * 86400)) and not n.pwdlastset IN [-1.0, 0.0] return n limit 100", + "assert": "non_empty" + }, + { + "name": "match a typed node then bind its variable-length expansion to a path", + "cypher": "match (n:NodeKind1) where n.objectid = 'S-1-5-21-1' match p = (n)-[:EdgeKind1|EdgeKind2*1..]->(c:NodeKind2) return p", + "assert": "non_empty" + }, + { + "name": "match two paths that share a common middle node and return both", + "cypher": "match p = (a)-[]->() match q = ()-[]->(a) return p, q", + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/multipart_inline.json b/integration/testdata/cases/multipart_inline.json new file mode 100644 index 0000000..658fe29 --- /dev/null +++ b/integration/testdata/cases/multipart_inline.json @@ -0,0 +1,80 @@ +{ + "cases": [ + { + "name": "carry a node through WITH and re-match it by ID", + "cypher": "match (n:NodeKind1) where n.value = 1 with n match (b) where id(b) = id(n) return b", + "fixture": { + "nodes": [ + {"id": "n", "kinds": ["NodeKind1"], "properties": {"value": 1}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "exclude second-stage results using a collected list from the first stage", + "cypher": "match (g1:NodeKind1) where g1.name starts with 'test' with collect(g1.domain) as excludes match (d:NodeKind2) where d.name starts with 'other' and not d.name in excludes return d", + "fixture": { + "nodes": [ + {"id": "g1", "kinds": ["NodeKind1"], "properties": {"name": "testnode", "domain": "test.local"}}, + {"id": "d1", "kinds": ["NodeKind2"], "properties": {"name": "othernode"}}, + {"id": "d2", "kinds": ["NodeKind2"], "properties": {"name": "othertest"}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "three-stage pipeline carrying nodes through successive WITH clauses", + "cypher": "match (n:NodeKind1) where n.value = 1 with n match (f) where f.name = 'me' with f match (b) where id(b) = id(f) return b", + "fixture": { + "nodes": [ + {"id": "n", "kinds": ["NodeKind1"], "properties": {"value": 1}}, + {"id": "f", "kinds": ["NodeKind2"], "properties": {"name": "me"}} + ], + "edges": [] + }, + "assert": {"contains_node_with_prop": ["name", "me"]} + }, + { + "name": "filter a carried node using a per-group count in a WITH stage", + "cypher": "match (n:NodeKind1)<-[:EdgeKind1]-(:NodeKind2) where n.objectid ends with '-516' with n, count(n) as dc_count where dc_count = 1 return n", + "fixture": { + "nodes": [ + {"id": "dst", "kinds": ["NodeKind1"], "properties": {"objectid": "S-1-5-21-516"}}, + {"id": "src", "kinds": ["NodeKind2"]} + ], + "edges": [{"start_id": "src", "end_id": "dst", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "filter typed nodes by a regular expression and carry collected results to the next stage", + "cypher": "match (cg:NodeKind1) where cg.name =~ \".*TT\" with collect(cg.name) as names return names", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "SCOTT"}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"name": "admin"}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "expand from a typed node count reachable typed targets and order by that count", + "cypher": "match (n:NodeKind1) where n.hasspn = true match (n)-[:EdgeKind1|EdgeKind2*1..]->(c:NodeKind2) with distinct n, count(c) as adminCount return n order by adminCount desc limit 100", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"], "properties": {"hasspn": true}}, + {"id": "c1", "kinds": ["NodeKind2"]}, + {"id": "c2", "kinds": ["NodeKind2"]} + ], + "edges": [ + {"start_id": "src", "end_id": "c1", "kind": "EdgeKind1"}, + {"start_id": "src", "end_id": "c2", "kind": "EdgeKind2"} + ] + }, + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/nodes.json b/integration/testdata/cases/nodes.json new file mode 100644 index 0000000..6a3ae37 --- /dev/null +++ b/integration/testdata/cases/nodes.json @@ -0,0 +1,360 @@ +{ + "dataset": "base", + "cases": [ + { + "name": "return kind labels for all nodes", + "cypher": "match (n) return labels(n)", + "assert": "non_empty" + }, + { + "name": "filter any node by string property equality", + "cypher": "match (n) where n.name = '1234' return n", + "assert": {"contains_node_with_prop": ["name", "1234"]} + }, + { + "name": "filter a typed node using an inline property map", + "cypher": "match (n:NodeKind1 {name: \"SOME NAME\"}) return n", + "assert": {"contains_node_with_prop": ["name", "SOME NAME"]} + }, + { + "name": "return all nodes", + "cypher": "match (s) return s", + "assert": "non_empty" + }, + { + "name": "filter nodes matching a kind disjunction", + "cypher": "match (s) where (s:NodeKind1 or s:NodeKind2) return s", + "assert": "non_empty" + }, + { + "name": "cross-product filter where two nodes share a property value", + "cypher": "match (n:NodeKind1), (e) where n.name = e.name return n", + "assert": "non_empty" + }, + { + "name": "filter any node by string property equality (s binding)", + "cypher": "match (s) where s.name = '1234' return s", + "assert": {"contains_node_with_prop": ["name", "1234"]} + }, + { + "name": "filter node by string starts-with prefix", + "cypher": "match (s) where s.name starts with '123' return s", + "assert": {"contains_node_with_prop": ["name", "1234"]} + }, + { + "name": "filter node by string ends-with suffix", + "cypher": "match (s) where s.name ends with 'NAME' return s", + "assert": {"contains_node_with_prop": ["name", "SOME NAME"]} + }, + { + "name": "filter node where a property is not null", + "cypher": "match (n) where n.system_tags is not null return n", + "assert": {"contains_node_with_prop": ["system_tags", "admin_tier_0"]} + }, + { + "name": "filter typed node using coalesce with contains predicate", + "cypher": "match (n:NodeKind1) where coalesce(n.system_tags, '') contains 'admin_tier_0' return n", + "assert": {"contains_node_with_prop": ["system_tags", "admin_tier_0"]} + }, + { + "name": "filter typed node by array property size", + "cypher": "match (n:NodeKind1) where size(n.array_value) > 0 return n", + "assert": "non_empty" + }, + { + "name": "filter typed node where array property overlaps a literal list", + "cypher": "match (n:NodeKind1) where ['DES-CBC-CRC', 'DES-CBC-MD5', 'RC4-HMAC-MD5'] in n.arrayProperty return n", + "assert": "non_empty" + }, + { + "name": "filter typed node where array property contains one of several scalar values", + "cypher": "match (u:NodeKind1) where 'DES-CBC-CRC' in u.arrayProperty or 'DES-CBC-MD5' in u.arrayProperty return u", + "assert": "non_empty" + }, + { + "name": "filter node carrying two kind labels simultaneously", + "cypher": "match (s) where s:NodeKind1 and s:NodeKind2 return s", + "assert": {"contains_node_with_prop": ["name", "n3"]} + }, + { + "name": "paginate results using SKIP and LIMIT", + "cypher": "match (n) return n skip 5 limit 10", + "assert": "no_error" + }, + { + "name": "order results by node ID descending", + "cypher": "match (s) return s order by id(s) desc", + "assert": "non_empty" + }, + { + "name": "filter isolated nodes with no adjacent edges", + "cypher": "match (s) where not (s)-[]-() return s", + "assert": "no_error" + }, + { + "name": "filter nodes where node ID appears in another node's array property", + "cypher": "match (s), (e) where id(s) in e.captured_ids return s, e", + "assert": "no_error" + }, + { + "name": "filter typed node with starts-with using a function call as the prefix", + "cypher": "match (n:NodeKind1) where n.distinguishedname starts with toUpper('admin') return n", + "assert": "empty" + }, + { + "name": "optional match returns results even when the pattern may be absent", + "cypher": "optional match (n:NodeKind1) return n", + "assert": "non_empty" + }, + { + "name": "double-negation filter selects nodes where a property is null", + "cypher": "match (n) where not n.property is not null return n", + "assert": "non_empty" + }, + { + "name": "filter nodes where array property equals an empty array literal", + "cypher": "match (s) where s.prop = [] return s", + "assert": "no_error" + }, + { + "name": "filter node by string contains predicate", + "cypher": "match (s) where s.name contains '123' return s", + "assert": {"contains_node_with_prop": ["name", "1234"]} + }, + { + "name": "filter node using negated starts-with predicate", + "cypher": "match (s) where not s.name starts with 'XYZ' return s", + "assert": "non_empty" + }, + { + "name": "filter node using negated contains predicate", + "cypher": "match (s) where not s.name contains 'XYZ' return s", + "assert": "non_empty" + }, + { + "name": "filter node using negated ends-with predicate", + "cypher": "match (s) where not s.name ends with 'XYZ' return s", + "assert": "non_empty" + }, + { + "name": "filter node where string property starts with another property", + "cypher": "match (s) where s.name starts with s.other return s", + "assert": "non_empty" + }, + { + "name": "filter node where string property contains another property", + "cypher": "match (s) where s.name contains s.other return s", + "assert": "non_empty" + }, + { + "name": "filter node where string property ends with another property", + "cypher": "match (s) where s.name ends with s.other return s", + "assert": "non_empty" + }, + { + "name": "filter nodes where a datetime property is null", + "cypher": "match (s) where s.created_at is null return s", + "assert": "non_empty" + }, + { + "name": "project an arithmetic expression on a node property", + "cypher": "match (s) return s.value + 1", + "assert": "non_empty" + }, + { + "name": "filter typed node using datetime arithmetic against epoch seconds", + "cypher": "match (u:NodeKind1) where u.pwdlastset < (datetime().epochseconds - (365 * 86400)) and not u.pwdlastset IN [-1.0, 0.0] return u limit 100", + "assert": "non_empty" + }, + { + "name": "filter node using coalesce equality on a named property", + "cypher": "match (n) where coalesce(n.name, '') = '1234' return n", + "assert": {"contains_node_with_prop": ["name", "1234"]} + }, + { + "name": "filter typed node using three-argument coalesce equality against an integer", + "cypher": "match (n:NodeKind1) where coalesce(n.a, n.b, 1) = 1 return n", + "assert": "non_empty" + }, + { + "name": "filter typed node using two-property coalesce that resolves to null", + "cypher": "match (n:NodeKind1) where coalesce(n.a, n.b) = 1 return n", + "assert": "no_error" + }, + { + "name": "filter typed node with coalesce on the right-hand side of equality", + "cypher": "match (n:NodeKind1) where 1 = coalesce(n.a, n.b) return n", + "assert": "no_error" + }, + { + "name": "project a compound arithmetic expression dividing a shifted property", + "cypher": "match (s) return (s.value + 1) / 3", + "assert": "non_empty" + }, + { + "name": "filter node using toLower equality and return distinct results", + "cypher": "match (s) where toLower(s.name) = '1234' return distinct s", + "assert": {"contains_node_with_prop": ["name", "1234"]} + }, + { + "name": "filter typed node where a property contains a toUpper result", + "cypher": "match (n:NodeKind1) where n.distinguishedname contains toUpper('test') return n", + "assert": "non_empty" + }, + { + "name": "filter typed node where a property equals a toUpper result (no match)", + "cypher": "match (n:NodeKind1) where n.distinguishedname = toUpper('admin') return n", + "assert": "empty" + }, + { + "name": "filter typed node where a property ends with a toUpper result (no match)", + "cypher": "match (n:NodeKind1) where n.distinguishedname ends with toUpper('com') return n", + "assert": "empty" + }, + { + "name": "filter typed node where toString of a property appears in a literal list", + "cypher": "match (n:NodeKind1) where toString(n.functionallevel) in ['2008 R2', '2012', '2008', '2003'] return n", + "assert": "non_empty" + }, + { + "name": "filter typed node where toInt of a property appears in a literal integer list", + "cypher": "match (n:NodeKind1) where toInt(n.value) in [1, 2, 3, 4] return n", + "assert": "non_empty" + }, + { + "name": "filter typed node using datetime arithmetic against epoch milliseconds", + "cypher": "match (u:NodeKind1) where u.pwdlastset < (datetime().epochmillis - 86400000) and not u.pwdlastset IN [-1.0, 0.0] return u limit 100", + "assert": "non_empty" + }, + { + "name": "filter node where a datetime property equals the current date", + "cypher": "match (s) where s.created_at = date() return s", + "assert": "no_error" + }, + { + "name": "filter node where a property equals date minus a duration", + "cypher": "match (s) where s.created_at = date() - duration('P1D') return s", + "assert": "no_error" + }, + { + "name": "filter node where a property equals date plus a duration string", + "cypher": "match (s) where s.created_at = date() + duration('4 hours') return s", + "assert": "no_error" + }, + { + "name": "filter node where a property equals a literal date value", + "cypher": "match (s) where s.created_at = date('2023-4-4') return s", + "assert": "no_error" + }, + { + "name": "filter node where a datetime property equals the current datetime", + "cypher": "match (s) where s.created_at = datetime() return s", + "assert": "no_error" + }, + { + "name": "filter node where a property equals a literal datetime value", + "cypher": "match (s) where s.created_at = datetime('2019-06-01T18:40:32.142+0100') return s", + "assert": "no_error" + }, + { + "name": "filter node where a property equals the current local datetime", + "cypher": "match (s) where s.created_at = localdatetime() return s", + "assert": "no_error" + }, + { + "name": "filter node where a property equals a literal local datetime value", + "cypher": "match (s) where s.created_at = localdatetime('2019-06-01T18:40:32.142') return s", + "assert": "no_error" + }, + { + "name": "filter node where a property equals the current local time", + "cypher": "match (s) where s.created_at = localtime() return s", + "assert": "no_error" + }, + { + "name": "filter node where a property equals a literal local time value", + "cypher": "match (s) where s.created_at = localtime('4:4:4') return s", + "assert": "no_error" + }, + { + "name": "filter node using a negated parenthesized equality predicate", + "cypher": "match (s) where not (s.name = '123') return s", + "assert": "non_empty" + }, + { + "name": "filter node using negated 2-hop path existence", + "cypher": "match (s) where not (s)-[]->()-[]->() return s", + "assert": "no_error" + }, + { + "name": "filter node using negated directed edge pattern with property constraints", + "cypher": "match (s) where not (s)-[{prop: 'a'}]->({name: 'n3'}) return s", + "assert": "non_empty" + }, + { + "name": "filter node using negated incoming edge pattern with property constraints", + "cypher": "match (s) where not (s)<-[{prop: 'a'}]-({name: 'n3'}) return s", + "assert": "non_empty" + }, + { + "name": "return id of node where negated kind filter removes typed results", + "cypher": "match (s) where not (s)-[]-() return id(s)", + "assert": "no_error" + }, + { + "name": "filter node where id appears in a literal integer list", + "cypher": "match (s) where id(s) in [1, 2, 3, 4] return s", + "assert": "no_error" + }, + { + "name": "filter typed node where array property contains one of three scalar values", + "cypher": "match (u:NodeKind1) where 'DES-CBC-CRC' in u.arrayProperty or 'DES-CBC-MD5' in u.arrayProperty or 'RC4-HMAC-MD5' in u.arrayProperty return u", + "assert": "non_empty" + }, + { + "name": "filter node where an empty array literal equals a property (reversed operands)", + "cypher": "match (s) where [] = s.prop return s", + "assert": "no_error" + }, + { + "name": "filter node where a property is not equal to an empty array", + "cypher": "match (s) where s.prop <> [] return s", + "assert": "non_empty" + }, + { + "name": "filter node using negated equality to an empty array", + "cypher": "match (s) where not s.prop = [] return s", + "assert": "non_empty" + }, + { + "name": "filter typed node using property equality with literal-then-property concatenation", + "cypher": "match (n:NodeKind1) match (m:NodeKind2) where m.distinguishedname = 'CN=ADMINSDHOLDER,CN=SYSTEM,' + n.distinguishedname return m", + "assert": "non_empty" + }, + { + "name": "filter typed node using property equality with property-then-literal concatenation", + "cypher": "match (n:NodeKind1) match (m:NodeKind2) where m.distinguishedname = n.distinguishedname + 'CN=ADMINSDHOLDER,CN=SYSTEM,' return m", + "assert": "no_error" + }, + { + "name": "filter typed node using property equality with two literal strings concatenated", + "cypher": "match (n:NodeKind1) match (m:NodeKind2) where m.distinguishedname = '1' + '2' return m", + "assert": "no_error" + }, + { + "name": "order results by two properties with mixed sort directions", + "cypher": "match (s) return s order by s.name, s.other_prop desc", + "assert": "non_empty" + }, + { + "name": "two sequential optional matches where only the anchor node is required", + "cypher": "match (n:NodeKind1) optional match (m:NodeKind2) where m.distinguishedname starts with n.distinguishedname optional match (o:NodeKind2) where o.distinguishedname <> n.distinguishedname return n, m, o", + "assert": "non_empty" + }, + { + "name": "filter typed node with compound hasspn enabled and not-ends-with checks", + "cypher": "match (u:NodeKind1) where u.hasspn = true and u.enabled = true and not '-502' ends with u.objectid and not coalesce(u.gmsa, false) = true and not coalesce(u.msa, false) = true return u limit 10", + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/nodes_inline.json b/integration/testdata/cases/nodes_inline.json new file mode 100644 index 0000000..9d47894 --- /dev/null +++ b/integration/testdata/cases/nodes_inline.json @@ -0,0 +1,123 @@ +{ + "cases": [ + { + "name": "filter node where property value appears in a literal list", + "cypher": "match (s) where s.name in ['option 1', 'option 2'] return s", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "option 1"}}, + {"id": "b", "kinds": ["NodeKind2"], "properties": {"name": "option 2"}}, + {"id": "c", "kinds": ["NodeKind1"], "properties": {"name": "option 3"}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "cross-product filter where two nodes match different typed properties", + "skip": "PG type inference bug: mixed string/int cross-product properties (see zinic/testing fork)", + "cypher": "match (s), (e) where s.name = '1234' and e.other = 1234 return s", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "1234"}}, + {"id": "b", "kinds": ["NodeKind2"], "properties": {"other": 1234}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "filter node where a scalar value appears in an array property", + "cypher": "match (n) where 1 in n.array return n", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"array": [1, 2, 3]}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "filter typed node with coalesce equality on both sides", + "cypher": "match (n:NodeKind1) where coalesce(n.name, '') = coalesce(n.migrated_name, '') return n", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "mirror", "migrated_name": "mirror"}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"name": "differ"}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "filter node using an arithmetic expression in the WHERE clause", + "cypher": "match (s) where s.value + 2 / 3 > 10 return s", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"value": 20}}, + {"id": "b", "kinds": ["NodeKind2"], "properties": {"value": 1}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "filter typed node using toLower contains with a compound AND predicate", + "cypher": "match (n:NodeKind1) where n:NodeKind1 and toLower(n.tenantid) contains 'myid' and n.system_tags contains 'tag' return n", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"tenantid": "MyID-Corp", "system_tags": "tag_admin"}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "filter typed node where a scalar appears in an array property concatenated with a literal list", + "cypher": "match (n:NodeKind1) where '1' in n.array_prop + ['1', '2'] return n", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"array_prop": ["x", "y"]}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "return source and an aliased property from an unrelated node in a cross-product", + "cypher": "match (s), (e) where s.name = 'n1' return s, e.name as othername", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "n1"}}, + {"id": "b", "kinds": ["NodeKind2"], "properties": {"name": "n2"}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "filter cross-product where either node satisfies a different property predicate", + "skip": "PG type inference bug: mixed string/int cross-product properties (see zinic/testing fork)", + "cypher": "match (s), (e) where s.name = '1234' or e.other = 1234 return s", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "1234"}}, + {"id": "b", "kinds": ["NodeKind2"], "properties": {"other": 1234}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "return a size of an array property in the projection", + "cypher": "MATCH (n) RETURN size(n.tags)", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"tags": ["admin", "user"]}} + ], + "edges": [] + }, + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/pattern_binding.json b/integration/testdata/cases/pattern_binding.json new file mode 100644 index 0000000..67ba7de --- /dev/null +++ b/integration/testdata/cases/pattern_binding.json @@ -0,0 +1,20 @@ +{ + "dataset": "base", + "cases": [ + { + "name": "bind a single typed node to a path variable", + "cypher": "match p = (:NodeKind1) return p", + "assert": "non_empty" + }, + { + "name": "bind a one-hop traversal to a path variable", + "cypher": "match p = ()-[]->() return p", + "assert": "non_empty" + }, + { + "name": "bind an unbounded variable-length path to a path variable", + "cypher": "match p = ()-[*..]->(e) return p limit 1", + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/pattern_binding_inline.json b/integration/testdata/cases/pattern_binding_inline.json new file mode 100644 index 0000000..b2832fd --- /dev/null +++ b/integration/testdata/cases/pattern_binding_inline.json @@ -0,0 +1,186 @@ +{ + "cases": [ + { + "name": "bind a two-hop path and return the terminal node", + "cypher": "match p = ()-[r1]->()-[r2]->(e) return e", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"]}, + {"id": "b", "kinds": ["NodeKind2"]}, + {"id": "c", "kinds": ["NodeKind1"]} + ], + "edges": [ + {"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}, + {"start_id": "b", "end_id": "c", "kind": "EdgeKind2"} + ] + }, + "assert": "non_empty" + }, + { + "name": "bind a converging diamond path with endpoint property filters", + "cypher": "match p = (a)-[]->()<-[]-(f) where a.name = 'value' and f.is_target return p", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "value"}}, + {"id": "mid", "kinds": ["NodeKind2"]}, + {"id": "f", "kinds": ["NodeKind1"], "properties": {"is_target": true}} + ], + "edges": [ + {"start_id": "a", "end_id": "mid", "kind": "EdgeKind1"}, + {"start_id": "f", "end_id": "mid", "kind": "EdgeKind1"} + ] + }, + "assert": "non_empty" + }, + { + "name": "bind a node-only path with a contains property filter", + "cypher": "match p = (n:NodeKind1) where n.name contains 'test' return p", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "testuser"}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"name": "admin"}} + ], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "bind a path with an undirected edge between typed nodes", + "cypher": "match p = (n:NodeKind1)-[r]-(m:NodeKind1) return p", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"]}, + {"id": "b", "kinds": ["NodeKind1"]} + ], + "edges": [{"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "three-hop traversal filtering named edges by their property", + "cypher": "match ()-[r1]->()-[r2]->()-[]->() where r1.label = 'first' and r2.label = 'second' return r1", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"]}, + {"id": "b", "kinds": ["NodeKind2"]}, + {"id": "c", "kinds": ["NodeKind1"]}, + {"id": "d", "kinds": ["NodeKind2"]} + ], + "edges": [ + {"start_id": "a", "end_id": "b", "kind": "EdgeKind1", "properties": {"label": "first"}}, + {"start_id": "b", "end_id": "c", "kind": "EdgeKind2", "properties": {"label": "second"}}, + {"start_id": "c", "end_id": "d", "kind": "EdgeKind1"} + ] + }, + "assert": "non_empty" + }, + { + "name": "bind a one-hop path between typed nodes filtered by a boolean edge property", + "cypher": "match p = (:NodeKind1)-[r]->(:NodeKind1) where r.isacl return p limit 100", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"]}, + {"id": "b", "kinds": ["NodeKind1"]} + ], + "edges": [{"start_id": "a", "end_id": "b", "kind": "EdgeKind1", "properties": {"isacl": true}}] + }, + "assert": "non_empty" + }, + { + "name": "return a named first edge and the full path including its subsequent expansion", + "cypher": "match p = ()-[e:EdgeKind1]->()-[:EdgeKind1*1..]->() return e, p", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"]}, + {"id": "b", "kinds": ["NodeKind2"]}, + {"id": "c", "kinds": ["NodeKind1"]} + ], + "edges": [ + {"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}, + {"start_id": "b", "end_id": "c", "kind": "EdgeKind1"} + ] + }, + "assert": "non_empty" + }, + { + "name": "bind a typed one-hop path where the target property does not contain a toUpper result", + "cypher": "match p = (m:NodeKind1)-[:EdgeKind1]->(c:NodeKind2) where m.objectid ends with '-1' and not toUpper(c.operatingsystem) contains 'SERVER' return p limit 1000", + "fixture": { + "nodes": [ + {"id": "m", "kinds": ["NodeKind1"], "properties": {"objectid": "S-1-5-21-1"}}, + {"id": "c", "kinds": ["NodeKind2"], "properties": {"operatingsystem": "workstation"}} + ], + "edges": [{"start_id": "m", "end_id": "c", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "bind a two-hop typed path filtered by array membership on the intermediate node", + "cypher": "match p = (:NodeKind1)-[:EdgeKind1|EdgeKind2]->(e:NodeKind2)-[:EdgeKind2]->(:NodeKind1) where 'a' in e.values or 'b' in e.values or size(e.values) = 0 return p", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"]}, + {"id": "mid", "kinds": ["NodeKind2"], "properties": {"values": ["a", "c"]}}, + {"id": "dst", "kinds": ["NodeKind1"]} + ], + "edges": [ + {"start_id": "src", "end_id": "mid", "kind": "EdgeKind1"}, + {"start_id": "mid", "end_id": "dst", "kind": "EdgeKind2"} + ] + }, + "assert": "non_empty" + }, + { + "name": "bind a path with one fixed hop then variable expansion filtered by coalesce contains", + "cypher": "match p = (:NodeKind1)-[:EdgeKind1]->(:NodeKind2)-[:EdgeKind2*1..]->(t:NodeKind2) where coalesce(t.system_tags, '') contains 'admin_tier_0' return p limit 1000", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"]}, + {"id": "b", "kinds": ["NodeKind2"]}, + {"id": "t", "kinds": ["NodeKind2"], "properties": {"system_tags": "admin_tier_0"}} + ], + "edges": [ + {"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}, + {"start_id": "b", "end_id": "t", "kind": "EdgeKind2"} + ] + }, + "assert": "non_empty" + }, + { + "name": "filter a typed node with WHERE then bind its variable-length expansion path", + "cypher": "match (u:NodeKind1) where u.samaccountname in ['foo', 'bar'] match p = (u)-[:EdgeKind1|EdgeKind2*1..3]->(t) where coalesce(t.system_tags, '') contains 'admin_tier_0' return p limit 1000", + "fixture": { + "nodes": [ + {"id": "u", "kinds": ["NodeKind1"], "properties": {"samaccountname": "foo"}}, + {"id": "t", "kinds": ["NodeKind2"], "properties": {"system_tags": "admin_tier_0"}} + ], + "edges": [{"start_id": "u", "end_id": "t", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "three consecutive MATCHes that anchor two nodes and bind the connecting path", + "cypher": "match (x:NodeKind1) where x.name = 'foo' match (y:NodeKind2) where y.name = 'bar' match p=(x)-[:EdgeKind1]->(y) return p", + "fixture": { + "nodes": [ + {"id": "x", "kinds": ["NodeKind1"], "properties": {"name": "foo"}}, + {"id": "y", "kinds": ["NodeKind2"], "properties": {"name": "bar"}} + ], + "edges": [{"start_id": "x", "end_id": "y", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "match a node with an inline property map then bind its outgoing path to a second inline-map node", + "cypher": "match (x:NodeKind1{name:'foo'}) match p=(x)-[:EdgeKind1]->(y:NodeKind2{name:'bar'}) return p", + "fixture": { + "nodes": [ + {"id": "x", "kinds": ["NodeKind1"], "properties": {"name": "foo"}}, + {"id": "y", "kinds": ["NodeKind2"], "properties": {"name": "bar"}} + ], + "edges": [{"start_id": "x", "end_id": "y", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/quantifiers.json b/integration/testdata/cases/quantifiers.json new file mode 100644 index 0000000..b9d1edd --- /dev/null +++ b/integration/testdata/cases/quantifiers.json @@ -0,0 +1,30 @@ +{ + "dataset": "quantifier", + "cases": [ + { + "name": "ANY quantifier over an array property with a contains predicate", + "cypher": "MATCH (n:NodeKind1) WHERE n.usedeskeyonly OR ANY(type IN n.supportedencryptiontypes WHERE type CONTAINS 'DES') RETURN n LIMIT 100", + "assert": "non_empty" + }, + { + "name": "ALL quantifier over an array property with a contains predicate", + "cypher": "MATCH (n:NodeKind1) WHERE n.usedeskeyonly OR ALL(type IN n.supportedencryptiontypes WHERE type CONTAINS 'DES') RETURN n LIMIT 100", + "assert": "non_empty" + }, + { + "name": "NONE quantifier over an array property with a contains predicate", + "cypher": "MATCH (n:NodeKind1) WHERE n.usedeskeyonly OR NONE(type IN n.supportedencryptiontypes WHERE type CONTAINS 'DES') RETURN n LIMIT 100", + "assert": "non_empty" + }, + { + "name": "SINGLE quantifier over an array property with a contains predicate", + "cypher": "MATCH (n:NodeKind1) WHERE n.usedeskeyonly OR SINGLE(type IN n.supportedencryptiontypes WHERE type CONTAINS 'DES') RETURN n LIMIT 100", + "assert": "non_empty" + }, + { + "name": "multiple ANY quantifiers where the second ANY has a compound OR predicate", + "cypher": "MATCH (n:NodeKind1) WHERE n.usedeskeyonly OR ANY(type IN n.supportedencryptiontypes WHERE type CONTAINS 'DES') OR ANY(type IN n.serviceprincipalnames WHERE toLower(type) CONTAINS 'mssql' OR toLower(type) CONTAINS 'mssqlcluster') RETURN n LIMIT 100", + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/quantifiers_inline.json b/integration/testdata/cases/quantifiers_inline.json new file mode 100644 index 0000000..860fb1f --- /dev/null +++ b/integration/testdata/cases/quantifiers_inline.json @@ -0,0 +1,43 @@ +{ + "cases": [ + { + "name": "NONE quantifier over a collected list in a WITH-piped stage", + "cypher": "MATCH (m:NodeKind1) WHERE m.unconstraineddelegation = true WITH m MATCH (n:NodeKind1)-[:EdgeKind1]->(g:NodeKind2) WHERE g.objectid ENDS WITH '-516' WITH m, COLLECT(n) AS matchingNs WHERE NONE(n IN matchingNs WHERE n.objectid = m.objectid) RETURN m", + "fixture": { + "nodes": [ + {"id": "m", "kinds": ["NodeKind1"], "properties": {"unconstraineddelegation": true, "objectid": "test-m"}}, + {"id": "n", "kinds": ["NodeKind1"], "properties": {"objectid": "other-id"}}, + {"id": "g", "kinds": ["NodeKind2"], "properties": {"objectid": "S-1-5-21-516"}} + ], + "edges": [{"start_id": "n", "end_id": "g", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "ALL quantifier over a collected list in a WITH-piped stage", + "cypher": "MATCH (m:NodeKind1) WHERE m.unconstraineddelegation = true WITH m MATCH (n:NodeKind1)-[:EdgeKind1]->(g:NodeKind2) WHERE g.objectid ENDS WITH '-516' WITH m, COLLECT(n) AS matchingNs WHERE ALL(n IN matchingNs WHERE n.objectid = m.objectid) RETURN m", + "fixture": { + "nodes": [ + {"id": "m", "kinds": ["NodeKind1"], "properties": {"unconstraineddelegation": true, "objectid": "test-m"}}, + {"id": "n", "kinds": ["NodeKind1"], "properties": {"objectid": "test-m"}}, + {"id": "g", "kinds": ["NodeKind2"], "properties": {"objectid": "S-1-5-21-516"}} + ], + "edges": [{"start_id": "n", "end_id": "g", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "ANY quantifier in first stage gates a pipeline where NONE filters the collected output", + "cypher": "MATCH (m:NodeKind1) WHERE ANY(name IN m.serviceprincipalnames WHERE name CONTAINS 'PHANTOM') WITH m MATCH (n:NodeKind1)-[:EdgeKind1]->(g:NodeKind2) WHERE g.objectid ENDS WITH '-525' WITH m, COLLECT(n) AS matchingNs WHERE NONE(t IN matchingNs WHERE t.objectid = m.objectid) RETURN m", + "fixture": { + "nodes": [ + {"id": "m", "kinds": ["NodeKind1"], "properties": {"objectid": "m-obj", "serviceprincipalnames": ["PHANTOM/host"]}}, + {"id": "n", "kinds": ["NodeKind1"], "properties": {"objectid": "other-obj"}}, + {"id": "g", "kinds": ["NodeKind2"], "properties": {"objectid": "S-1-5-21-525"}} + ], + "edges": [{"start_id": "n", "end_id": "g", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/shortest_paths.json b/integration/testdata/cases/shortest_paths.json new file mode 100644 index 0000000..b9d3537 --- /dev/null +++ b/integration/testdata/cases/shortest_paths.json @@ -0,0 +1,25 @@ +{ + "dataset": "base", + "cases": [ + { + "name": "shortest path across two hops", + "cypher": "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE s.name = 'SOME NAME' AND e.name = 'n3' RETURN p", + "assert": "non_empty" + }, + { + "name": "shortest path single hop", + "cypher": "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE s.name = 'SOME NAME' AND e.name = '1234' RETURN p", + "assert": "non_empty" + }, + { + "name": "shortest path wrong direction returns empty", + "cypher": "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE s.name = 'n3' AND e.name = 'SOME NAME' RETURN p", + "assert": "empty" + }, + { + "name": "shortest path to self returns empty", + "cypher": "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE s.name = 'SOME NAME' AND e.name = 'SOME NAME' RETURN p", + "assert": "empty" + } + ] +} diff --git a/integration/testdata/cases/shortest_paths_inline.json b/integration/testdata/cases/shortest_paths_inline.json new file mode 100644 index 0000000..003d294 --- /dev/null +++ b/integration/testdata/cases/shortest_paths_inline.json @@ -0,0 +1,94 @@ +{ + "cases": [ + { + "name": "shortest path picks direct edge over longer route", + "cypher": "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE s.name = 'a' AND e.name = 'd' RETURN p", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "a"}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"name": "b"}}, + {"id": "c", "kinds": ["NodeKind1"], "properties": {"name": "c"}}, + {"id": "d", "kinds": ["NodeKind1"], "properties": {"name": "d"}} + ], + "edges": [ + {"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}, + {"start_id": "b", "end_id": "c", "kind": "EdgeKind1"}, + {"start_id": "c", "end_id": "d", "kind": "EdgeKind1"}, + {"start_id": "a", "end_id": "d", "kind": "EdgeKind1"} + ] + }, + "assert": {"row_count": 1} + }, + { + "name": "multiple shortest paths of equal length", + "cypher": "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE s.name = 'a' AND e.name = 'd' RETURN p", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "a"}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"name": "b"}}, + {"id": "c", "kinds": ["NodeKind1"], "properties": {"name": "c"}}, + {"id": "d", "kinds": ["NodeKind1"], "properties": {"name": "d"}} + ], + "edges": [ + {"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}, + {"start_id": "a", "end_id": "c", "kind": "EdgeKind1"}, + {"start_id": "b", "end_id": "d", "kind": "EdgeKind1"}, + {"start_id": "c", "end_id": "d", "kind": "EdgeKind1"} + ] + }, + "assert": {"row_count": 2} + }, + { + "name": "no path between disconnected nodes", + "cypher": "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE s.name = 'a' AND e.name = 'b' RETURN p", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "a"}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"name": "b"}} + ], + "edges": [] + }, + "assert": "empty" + }, + { + "name": "three shortest paths through a wide fan", + "cypher": "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE s.name = 'a' AND e.name = 'e' RETURN p", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "a"}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"name": "b"}}, + {"id": "c", "kinds": ["NodeKind1"], "properties": {"name": "c"}}, + {"id": "d", "kinds": ["NodeKind1"], "properties": {"name": "d"}}, + {"id": "e", "kinds": ["NodeKind1"], "properties": {"name": "e"}} + ], + "edges": [ + {"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}, + {"start_id": "a", "end_id": "c", "kind": "EdgeKind1"}, + {"start_id": "a", "end_id": "d", "kind": "EdgeKind1"}, + {"start_id": "b", "end_id": "e", "kind": "EdgeKind1"}, + {"start_id": "c", "end_id": "e", "kind": "EdgeKind1"}, + {"start_id": "d", "end_id": "e", "kind": "EdgeKind1"} + ] + }, + "assert": {"row_count": 3} + }, + { + "name": "dead end branch does not affect shortest path", + "cypher": "MATCH p = allShortestPaths((s)-[*1..]->(e)) WHERE s.name = 'a' AND e.name = 'c' RETURN p", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "a"}}, + {"id": "b", "kinds": ["NodeKind1"], "properties": {"name": "b"}}, + {"id": "c", "kinds": ["NodeKind1"], "properties": {"name": "c"}}, + {"id": "d", "kinds": ["NodeKind1"], "properties": {"name": "d"}} + ], + "edges": [ + {"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}, + {"start_id": "b", "end_id": "c", "kind": "EdgeKind1"}, + {"start_id": "a", "end_id": "d", "kind": "EdgeKind1"} + ] + }, + "assert": {"row_count": 1} + } + ] +} diff --git a/integration/testdata/cases/stepwise.json b/integration/testdata/cases/stepwise.json new file mode 100644 index 0000000..cf5aef3 --- /dev/null +++ b/integration/testdata/cases/stepwise.json @@ -0,0 +1,85 @@ +{ + "dataset": "base", + "cases": [ + { + "name": "return all edges", + "cypher": "match ()-[r]->() return r", + "assert": "non_empty" + }, + { + "name": "filter edges by type() string comparison", + "cypher": "match ()-[r]->() where type(r) = 'EdgeKind1' return r", + "assert": "non_empty" + }, + { + "name": "count edges of a specific kind", + "cypher": "match ()-[r:EdgeKind1]->() return count(r) as the_count", + "assert": {"at_least_int": 1} + }, + { + "name": "count typed edges reaching a node matching an inline property map", + "cypher": "match ()-[r:EdgeKind1]->({name: \"123\"}) return count(r) as the_count", + "assert": {"exact_int": 0} + }, + { + "name": "filter edges by a numeric property value", + "cypher": "match ()-[r]->() where r.value = 42 return r", + "assert": "non_empty" + }, + { + "name": "filter edges by a boolean property", + "cypher": "match ()-[r]->() where r.bool_prop return r", + "assert": "non_empty" + }, + { + "name": "one-hop traversal filtering where source and target are not the same node", + "cypher": "match (n1)-[]->(n2) where n1 <> n2 return n2", + "assert": "non_empty" + }, + { + "name": "traverse between typed endpoints with edge kind alternatives", + "cypher": "match (s:NodeKind1)-[r:EdgeKind1|EdgeKind2]->(e:NodeKind2) return s.name, e.name", + "assert": "non_empty" + }, + { + "name": "traverse between multi-kind endpoints using edge kind alternatives", + "cypher": "match (s:NodeKind1:NodeKind2)-[r:EdgeKind1|EdgeKind2]->(e:NodeKind2:NodeKind1) return s.name, e.name", + "assert": "no_error" + }, + { + "name": "filter edges with reversed type() equality (literal on left)", + "cypher": "match ()-[r]->() where 'EdgeKind1' = type(r) return r", + "assert": "non_empty" + }, + { + "name": "traverse incoming edges filtering by kind alternatives", + "cypher": "match (s)<-[r:EdgeKind1|EdgeKind2]-(e) return s.name, e.name", + "assert": "non_empty" + }, + { + "name": "shared-node forward chain with two outgoing edges", + "cypher": "match ()-[e0]->(n)-[e1]->() return e0, n, e1", + "assert": "non_empty" + }, + { + "name": "two-hop chain filtering where the two traversed edges are not equal", + "cypher": "match ()-[r]->()-[e]->(n) where r <> e return n", + "assert": "non_empty" + }, + { + "name": "cross-product of an unrelated node and an edge", + "cypher": "match (n), ()-[r]->() return n, r", + "assert": "non_empty" + }, + { + "name": "cross-product of two independent edge traversals", + "cypher": "match ()-[r]->(), ()-[e]->() return r, e", + "assert": "non_empty" + }, + { + "name": "return id labels and type from an edge traversal with a numeric id filter", + "cypher": "match (s)-[r:EdgeKind1]->(e) where not (s.system_tags contains 'admin_tier_0') and id(e) = 1 return id(s), labels(s), id(r), type(r)", + "assert": "no_error" + } + ] +} diff --git a/integration/testdata/cases/stepwise_inline.json b/integration/testdata/cases/stepwise_inline.json new file mode 100644 index 0000000..3e878dd --- /dev/null +++ b/integration/testdata/cases/stepwise_inline.json @@ -0,0 +1,112 @@ +{ + "cases": [ + { + "name": "traverse one edge filtering both endpoints by property", + "cypher": "match (s)-[r]->(e) where s.name = '123' and e.name = '321' return s, r, e", + "fixture": { + "nodes": [ + {"id": "s", "kinds": ["NodeKind1"], "properties": {"name": "123"}}, + {"id": "e", "kinds": ["NodeKind2"], "properties": {"name": "321"}} + ], + "edges": [{"start_id": "s", "end_id": "e", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "return source node and outgoing edge filtered by source property", + "cypher": "match (n)-[r]->() where n.name = '123' return n, r", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "123"}}, + {"id": "b", "kinds": ["NodeKind2"]} + ], + "edges": [{"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "diamond pattern where two edges converge on one node", + "cypher": "match ()-[e0]->(n)<-[e1]-() return e0, n, e1", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"]}, + {"id": "b", "kinds": ["NodeKind1"]}, + {"id": "m", "kinds": ["NodeKind2"]} + ], + "edges": [ + {"start_id": "a", "end_id": "m", "kind": "EdgeKind1"}, + {"start_id": "b", "end_id": "m", "kind": "EdgeKind2"} + ] + }, + "assert": "non_empty" + }, + { + "name": "pattern where a middle node has both an outgoing and an incoming edge", + "cypher": "match ()<-[e0]-(n)<-[e1]-() return e0, n, e1", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"]}, + {"id": "mid", "kinds": ["NodeKind2"]}, + {"id": "b", "kinds": ["NodeKind1"]} + ], + "edges": [ + {"start_id": "a", "end_id": "mid", "kind": "EdgeKind1"}, + {"start_id": "mid", "end_id": "b", "kind": "EdgeKind2"} + ] + }, + "assert": "non_empty" + }, + { + "name": "traverse edge where the edge property flag is explicitly false", + "cypher": "match (s)-[r]->(e) where s.name = '123' and e:NodeKind1 and not r.property return s, r, e", + "fixture": { + "nodes": [ + {"id": "s", "kinds": ["NodeKind2"], "properties": {"name": "123"}}, + {"id": "e", "kinds": ["NodeKind1"]} + ], + "edges": [{"start_id": "s", "end_id": "e", "kind": "EdgeKind1", "properties": {"property": false}}] + }, + "assert": "non_empty" + }, + { + "name": "traverse two chained typed edges and return aliased endpoint properties", + "cypher": "match (s)-[:EdgeKind1|EdgeKind2]->(e)-[:EdgeKind1]->() return s.name as s_name, e.name as e_name", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "src"}}, + {"id": "b", "kinds": ["NodeKind2"], "properties": {"name": "mid"}}, + {"id": "c", "kinds": ["NodeKind1"]} + ], + "edges": [ + {"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}, + {"start_id": "b", "end_id": "c", "kind": "EdgeKind1"} + ] + }, + "assert": "non_empty" + }, + { + "name": "filter typed source node by four alternative name values with OR", + "cypher": "match (n:NodeKind1)-[r]->() where n.name = '123' or n.name = '321' or n.name = '222' or n.name = '333' return n, r", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "123"}}, + {"id": "b", "kinds": ["NodeKind2"]} + ], + "edges": [{"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "bind a one-hop typed path filtered by array membership or empty array size", + "cypher": "match p = (:NodeKind1)-[:EdgeKind1|EdgeKind2]->(c:NodeKind2) where '123' in c.prop2 or '243' in c.prop2 or size(c.prop2) = 0 return p limit 10", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind1"]}, + {"id": "dst", "kinds": ["NodeKind2"], "properties": {"prop2": []}} + ], + "edges": [{"start_id": "src", "end_id": "dst", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + } + ] +} diff --git a/integration/testdata/cases/update.json b/integration/testdata/cases/update.json new file mode 100644 index 0000000..71b6b01 --- /dev/null +++ b/integration/testdata/cases/update.json @@ -0,0 +1,15 @@ +{ + "cases": [ + { + "name": "set a string property on a filtered node and return the updated node", + "cypher": "match (n) where n.name = 'n3' set n.name = 'RENAMED' return n", + "fixture": { + "nodes": [ + {"id": "n3", "kinds": ["NodeKind1", "NodeKind2"], "properties": {"name": "n3", "value": 3, "prop": "a"}} + ], + "edges": [] + }, + "assert": {"contains_node_with_prop": ["name", "RENAMED"]} + } + ] +} diff --git a/integration/testdata/cases/update_inline.json b/integration/testdata/cases/update_inline.json new file mode 100644 index 0000000..dd82bf3 --- /dev/null +++ b/integration/testdata/cases/update_inline.json @@ -0,0 +1,112 @@ +{ + "cases": [ + { + "name": "chain multiple SET clauses to update several properties on a node", + "cypher": "match (n) set n.other = 1 set n.prop = '1' return n", + "fixture": { + "nodes": [{"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "updateme"}}], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "add multiple kind labels to a node", + "cypher": "match (n) set n:NodeKind1:NodeKind2 return n", + "fixture": { + "nodes": [{"id": "a", "kinds": ["NodeKind1"]}], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "remove multiple kind labels from a node", + "cypher": "match (n) remove n:NodeKind1:NodeKind2 return n", + "fixture": { + "nodes": [{"id": "a", "kinds": ["NodeKind1", "NodeKind2"]}], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "set a boolean property on a filtered node", + "cypher": "match (n) where n.name = '1234' set n.is_target = true", + "fixture": { + "nodes": [{"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "1234"}}], + "edges": [] + }, + "assert": "no_error" + }, + { + "name": "set a property on a traversed edge from a typed source node", + "cypher": "match (n)-[r:EdgeKind1]->() where n:NodeKind1 set r.visited = true return r", + "fixture": { + "nodes": [ + {"id": "a", "kinds": ["NodeKind1"]}, + {"id": "b", "kinds": ["NodeKind2"]} + ], + "edges": [{"start_id": "a", "end_id": "b", "kind": "EdgeKind1"}] + }, + "assert": "non_empty" + }, + { + "name": "add one kind label and remove another in the same statement", + "cypher": "match (n) set n:NodeKind1 remove n:NodeKind2 return n", + "fixture": { + "nodes": [{"id": "a", "kinds": ["NodeKind1", "NodeKind2"]}], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "add a kind label and set a property in the same statement", + "cypher": "match (n) set n:NodeKind1 set n.flag = '1' return n", + "fixture": { + "nodes": [{"id": "a", "kinds": ["NodeKind2"]}], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "remove a kind label and a property in the same statement", + "cypher": "match (n) remove n:NodeKind1 remove n.prop return n", + "fixture": { + "nodes": [{"id": "a", "kinds": ["NodeKind1"], "properties": {"prop": "val"}}], + "edges": [] + }, + "assert": "non_empty" + }, + { + "name": "remove a single node property", + "cypher": "match (s) remove s.name", + "fixture": { + "nodes": [{"id": "a", "kinds": ["NodeKind1"], "properties": {"name": "drop-me"}}], + "edges": [] + }, + "assert": "no_error" + }, + { + "name": "set a property on an edge leading to a typed target node", + "cypher": "match ()-[r]->(:NodeKind1) set r.is_special_outbound = true", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind2"]}, + {"id": "dst", "kinds": ["NodeKind1"]} + ], + "edges": [{"start_id": "src", "end_id": "dst", "kind": "EdgeKind1"}] + }, + "assert": "no_error" + }, + { + "name": "update a source node property and an edge property together", + "cypher": "match (a)-[r]->(:NodeKind1) set a.name = '123', r.is_special_outbound = true", + "fixture": { + "nodes": [ + {"id": "src", "kinds": ["NodeKind2"]}, + {"id": "dst", "kinds": ["NodeKind1"]} + ], + "edges": [{"start_id": "src", "end_id": "dst", "kind": "EdgeKind1"}] + }, + "assert": "no_error" + } + ] +} diff --git a/integration/testdata/quantifier.json b/integration/testdata/quantifier.json new file mode 100644 index 0000000..b537ad4 --- /dev/null +++ b/integration/testdata/quantifier.json @@ -0,0 +1,39 @@ +{ + "graph": { + "nodes": [ + { + "id": "qAny", + "kinds": ["NodeKind1"], + "properties": { + "usedeskeyonly": false, + "supportedencryptiontypes": ["DES-CBC-CRC", "AES-128"] + } + }, + { + "id": "qAll", + "kinds": ["NodeKind1"], + "properties": { + "usedeskeyonly": false, + "supportedencryptiontypes": ["DES-CBC-CRC", "DES-CBC-MD5"] + } + }, + { + "id": "qNone", + "kinds": ["NodeKind1"], + "properties": { + "usedeskeyonly": false, + "supportedencryptiontypes": ["AES-128", "RC4-HMAC"] + } + }, + { + "id": "qSingle", + "kinds": ["NodeKind1"], + "properties": { + "usedeskeyonly": false, + "supportedencryptiontypes": ["DES-CBC-CRC", "AES-128"] + } + } + ], + "edges": [] + } +} diff --git a/opengraph/export.go b/opengraph/export.go new file mode 100644 index 0000000..fa4c865 --- /dev/null +++ b/opengraph/export.go @@ -0,0 +1,93 @@ +// Copyright 2025 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package opengraph + +import ( + "context" + "encoding/json" + "fmt" + "io" + "strconv" + + "github.com/specterops/dawgs/graph" +) + +// Export reads all nodes and edges from db and writes them as an indented JSON Document to w. +func Export(ctx context.Context, db graph.Database, w io.Writer) error { + var doc Document + + idToString := make(map[graph.ID]string) + + if err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + // Export nodes + if err := tx.Nodes().Fetch(func(cursor graph.Cursor[*graph.Node]) error { + for node := range cursor.Chan() { + stringID := strconv.FormatInt(int64(node.ID), 10) + idToString[node.ID] = stringID + + doc.Graph.Nodes = append(doc.Graph.Nodes, Node{ + ID: stringID, + Kinds: node.Kinds.Strings(), + Properties: node.Properties.MapOrEmpty(), + }) + } + + return cursor.Error() + }); err != nil { + return fmt.Errorf("failed to fetch nodes: %w", err) + } + + // Export edges + if err := tx.Relationships().Fetch(func(cursor graph.Cursor[*graph.Relationship]) error { + for rel := range cursor.Chan() { + startStr, ok := idToString[rel.StartID] + if !ok { + startStr = strconv.FormatInt(int64(rel.StartID), 10) + } + + endStr, ok := idToString[rel.EndID] + if !ok { + endStr = strconv.FormatInt(int64(rel.EndID), 10) + } + + doc.Graph.Edges = append(doc.Graph.Edges, Edge{ + StartID: startStr, + EndID: endStr, + Kind: rel.Kind.String(), + Properties: rel.Properties.MapOrEmpty(), + }) + } + + return cursor.Error() + }); err != nil { + return fmt.Errorf("failed to fetch relationships: %w", err) + } + + return nil + }); err != nil { + return fmt.Errorf("opengraph: export error: %w", err) + } + + encoder := json.NewEncoder(w) + encoder.SetIndent("", " ") + + if err := encoder.Encode(&doc); err != nil { + return fmt.Errorf("opengraph: encode error: %w", err) + } + + return nil +} diff --git a/opengraph/load.go b/opengraph/load.go new file mode 100644 index 0000000..7f3c441 --- /dev/null +++ b/opengraph/load.go @@ -0,0 +1,143 @@ +// Copyright 2025 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package opengraph + +import ( + "context" + "encoding/json" + "fmt" + "io" + + "github.com/specterops/dawgs/graph" +) + +// IDMap maps document string node IDs to their database-assigned IDs. +type IDMap map[string]graph.ID + +// ParseDocument decodes and validates a Document from r without writing to a database. +func ParseDocument(r io.Reader) (Document, error) { + var doc Document + + if err := json.NewDecoder(r).Decode(&doc); err != nil { + return doc, fmt.Errorf("opengraph: decode error: %w", err) + } + + if err := Validate(doc); err != nil { + return doc, fmt.Errorf("opengraph: validation error: %w", err) + } + + return doc, nil +} + +// Load reads a Document from r, validates it, and writes the graph into db. +// Returns a mapping from document node IDs to database IDs. +func Load(ctx context.Context, db graph.Database, r io.Reader) (IDMap, error) { + doc, err := ParseDocument(r) + if err != nil { + return nil, err + } + + return WriteGraph(ctx, db, &doc.Graph) +} + +// WriteGraph writes the nodes and edges of g into db. +// Returns a mapping from document node IDs to database IDs. +func WriteGraph(ctx context.Context, db graph.Database, g *Graph) (IDMap, error) { + if g == nil { + return nil, nil + } + + nodeMap := make(IDMap, len(g.Nodes)) + + // Nodes are created via WriteTransaction so we get database IDs back for edge mapping. + if err := db.WriteTransaction(ctx, func(tx graph.Transaction) error { + for _, node := range g.Nodes { + dbNode, err := tx.CreateNode(graph.AsProperties(node.Properties), graph.StringsToKinds(node.Kinds)...) + if err != nil { + return fmt.Errorf("could not create node %q: %w", node.ID, err) + } + + nodeMap[node.ID] = dbNode.ID + } + + return nil + }); err != nil { + return nil, fmt.Errorf("opengraph: node write error: %w", err) + } + + // Edges are created via BatchOperation for bulk insert performance. + if err := db.BatchOperation(ctx, func(batch graph.Batch) error { + for _, edge := range g.Edges { + startID, ok := nodeMap[edge.StartID] + if !ok { + return fmt.Errorf("could not find start node %q", edge.StartID) + } + + endID, ok := nodeMap[edge.EndID] + if !ok { + return fmt.Errorf("could not find end node %q", edge.EndID) + } + + if err := batch.CreateRelationshipByIDs(startID, endID, graph.StringKind(edge.Kind), graph.AsProperties(edge.Properties)); err != nil { + return fmt.Errorf("could not create edge (%s)-[%s]->(%s): %w", edge.StartID, edge.Kind, edge.EndID, err) + } + } + + return nil + }); err != nil { + return nil, fmt.Errorf("opengraph: edge write error: %w", err) + } + + return nodeMap, nil +} + +// WriteGraphTx writes the nodes and edges of g using an existing transaction. +// This is useful for creating fixture data that will be rolled back. +func WriteGraphTx(tx graph.Transaction, g *Graph) (IDMap, error) { + if g == nil { + return nil, nil + } + + nodeMap := make(IDMap, len(g.Nodes)) + + for _, node := range g.Nodes { + dbNode, err := tx.CreateNode(graph.AsProperties(node.Properties), graph.StringsToKinds(node.Kinds)...) + if err != nil { + return nil, fmt.Errorf("could not create node %q: %w", node.ID, err) + } + + nodeMap[node.ID] = dbNode.ID + } + + for _, edge := range g.Edges { + startID, ok := nodeMap[edge.StartID] + if !ok { + return nil, fmt.Errorf("could not find start node %q", edge.StartID) + } + + endID, ok := nodeMap[edge.EndID] + if !ok { + return nil, fmt.Errorf("could not find end node %q", edge.EndID) + } + + if _, err := tx.CreateRelationshipByIDs(startID, endID, graph.StringKind(edge.Kind), graph.AsProperties(edge.Properties)); err != nil { + return nil, fmt.Errorf("could not create edge (%s)-[%s]->(%s): %w", edge.StartID, edge.Kind, edge.EndID, err) + } + } + + return nodeMap, nil +} diff --git a/opengraph/opengraph_integration_test.go b/opengraph/opengraph_integration_test.go new file mode 100644 index 0000000..ada6f16 --- /dev/null +++ b/opengraph/opengraph_integration_test.go @@ -0,0 +1,291 @@ +// Copyright 2025 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +//go:build manual_integration + +package opengraph + +import ( + "bytes" + "context" + "encoding/json" + "os" + "sort" + "strings" + "testing" + + "github.com/specterops/dawgs" + "github.com/specterops/dawgs/drivers/pg" + "github.com/specterops/dawgs/graph" + "github.com/specterops/dawgs/util/size" +) + +func setupTestDB(t *testing.T) (graph.Database, context.Context) { + t.Helper() + + ctx := context.Background() + connStr := os.Getenv("PG_CONNECTION_STRING") + + if connStr == "" { + t.Skip("PG_CONNECTION_STRING not set") + } + + pool, err := pg.NewPool(connStr) + if err != nil { + t.Fatalf("Failed to create pool: %v", err) + } + + db, err := dawgs.Open(ctx, pg.DriverName, dawgs.Config{ + GraphQueryMemoryLimit: size.Gibibyte, + Pool: pool, + }) + if err != nil { + t.Fatalf("Failed to open database: %v", err) + } + + schema := graph.Schema{ + Graphs: []graph.Graph{{ + Name: "opengraph_test", + Nodes: graph.Kinds{graph.StringKind("Person"), graph.StringKind("Place")}, + Edges: graph.Kinds{graph.StringKind("KNOWS"), graph.StringKind("LIVES_IN")}, + }}, + DefaultGraph: graph.Graph{Name: "opengraph_test"}, + } + + if err := db.AssertSchema(ctx, schema); err != nil { + t.Fatalf("Failed to assert schema: %v", err) + } + + t.Cleanup(func() { + // Clean up all nodes (cascades to edges) + _ = db.WriteTransaction(ctx, func(tx graph.Transaction) error { + return tx.Nodes().Delete() + }) + db.Close(ctx) + }) + + return db, ctx +} + +func TestLoad(t *testing.T) { + db, ctx := setupTestDB(t) + + input := `{ + "graph": { + "nodes": [ + {"id": "alice", "kinds": ["Person"], "properties": {"name": "Alice"}}, + {"id": "bob", "kinds": ["Person"], "properties": {"name": "Bob"}}, + {"id": "nyc", "kinds": ["Place"], "properties": {"name": "New York"}} + ], + "edges": [ + {"start_id": "alice", "end_id": "bob", "kind": "KNOWS"}, + {"start_id": "alice", "end_id": "nyc", "kind": "LIVES_IN"} + ] + } + }` + + if _, err := Load(ctx, db, strings.NewReader(input)); err != nil { + t.Fatalf("Load failed: %v", err) + } + + // Verify nodes exist in DB + var nodeCount int64 + err := db.ReadTransaction(ctx, func(tx graph.Transaction) error { + var countErr error + nodeCount, countErr = tx.Nodes().Count() + return countErr + }) + if err != nil { + t.Fatalf("Failed to count nodes: %v", err) + } + if nodeCount != 3 { + t.Fatalf("expected 3 nodes in DB, got %d", nodeCount) + } + + // Verify edges exist in DB + var edgeCount int64 + err = db.ReadTransaction(ctx, func(tx graph.Transaction) error { + var countErr error + edgeCount, countErr = tx.Relationships().Count() + return countErr + }) + if err != nil { + t.Fatalf("Failed to count edges: %v", err) + } + if edgeCount != 2 { + t.Fatalf("expected 2 edges in DB, got %d", edgeCount) + } +} + +func TestExport_EmptyDatabase(t *testing.T) { + db, ctx := setupTestDB(t) + + var buf bytes.Buffer + if err := Export(ctx, db, &buf); err != nil { + t.Fatalf("Export failed: %v", err) + } + + var doc Document + if err := json.Unmarshal(buf.Bytes(), &doc); err != nil { + t.Fatalf("Failed to decode exported JSON: %v", err) + } + + if len(doc.Graph.Nodes) != 0 { + t.Errorf("expected 0 nodes, got %d", len(doc.Graph.Nodes)) + } + + if len(doc.Graph.Edges) != 0 { + t.Errorf("expected 0 edges, got %d", len(doc.Graph.Edges)) + } +} + +func TestRoundTrip(t *testing.T) { + db, ctx := setupTestDB(t) + + original := Document{ + Graph: Graph{ + Nodes: []Node{ + {ID: "a", Kinds: []string{"Person"}, Properties: map[string]any{"name": "Alice", "age": float64(30)}}, + {ID: "b", Kinds: []string{"Person"}, Properties: map[string]any{"name": "Bob", "age": float64(25)}}, + {ID: "c", Kinds: []string{"Place"}, Properties: map[string]any{"name": "Chicago"}}, + }, + Edges: []Edge{ + {StartID: "a", EndID: "b", Kind: "KNOWS", Properties: map[string]any{"since": float64(2020)}}, + {StartID: "b", EndID: "c", Kind: "LIVES_IN"}, + }, + }, + } + + // Load + inputBytes, _ := json.Marshal(original) + if _, err := Load(ctx, db, bytes.NewReader(inputBytes)); err != nil { + t.Fatalf("Load failed: %v", err) + } + + // Export + var buf bytes.Buffer + if err := Export(ctx, db, &buf); err != nil { + t.Fatalf("Export failed: %v", err) + } + + // Decode exported + var exported Document + if err := json.Unmarshal(buf.Bytes(), &exported); err != nil { + t.Fatalf("Decode failed: %v", err) + } + + // Validate exported document + if err := Validate(exported); err != nil { + t.Fatalf("Exported document is not valid: %v", err) + } + + // Compare node count + if len(exported.Graph.Nodes) != len(original.Graph.Nodes) { + t.Fatalf("node count mismatch: got %d, want %d", len(exported.Graph.Nodes), len(original.Graph.Nodes)) + } + + // Compare edge count + if len(exported.Graph.Edges) != len(original.Graph.Edges) { + t.Fatalf("edge count mismatch: got %d, want %d", len(exported.Graph.Edges), len(original.Graph.Edges)) + } + + // Build maps for comparison (exported IDs will be database IDs, not original string IDs) + // So we compare by properties instead + exportedNodesByName := make(map[string]Node) + for _, n := range exported.Graph.Nodes { + if name, ok := n.Properties["name"].(string); ok { + exportedNodesByName[name] = n + } + } + + for _, origNode := range original.Graph.Nodes { + name := origNode.Properties["name"].(string) + expNode, ok := exportedNodesByName[name] + if !ok { + t.Errorf("missing exported node with name %q", name) + continue + } + + // Compare kinds (sort for stable comparison) + origKinds := make([]string, len(origNode.Kinds)) + copy(origKinds, origNode.Kinds) + sort.Strings(origKinds) + + expKinds := make([]string, len(expNode.Kinds)) + copy(expKinds, expNode.Kinds) + sort.Strings(expKinds) + + if strings.Join(origKinds, ",") != strings.Join(expKinds, ",") { + t.Errorf("node %q kinds mismatch: got %v, want %v", name, expKinds, origKinds) + } + + // Compare properties (excluding ID-like fields the DB may add) + for key, origVal := range origNode.Properties { + expVal, ok := expNode.Properties[key] + if !ok { + t.Errorf("node %q missing property %q", name, key) + continue + } + + // JSON round-trip means numbers become float64 + origJSON, _ := json.Marshal(origVal) + expJSON, _ := json.Marshal(expVal) + if string(origJSON) != string(expJSON) { + t.Errorf("node %q property %q: got %v, want %v", name, key, expVal, origVal) + } + } + } + + // Compare edges by resolving through node names + exportedNodeIDToName := make(map[string]string) + for _, n := range exported.Graph.Nodes { + if name, ok := n.Properties["name"].(string); ok { + exportedNodeIDToName[n.ID] = name + } + } + + type edgeKey struct { + startName, endName, kind string + } + + exportedEdges := make(map[edgeKey]Edge) + for _, e := range exported.Graph.Edges { + key := edgeKey{ + startName: exportedNodeIDToName[e.StartID], + endName: exportedNodeIDToName[e.EndID], + kind: e.Kind, + } + exportedEdges[key] = e + } + + // Map original node IDs to names for comparison + origIDToName := make(map[string]string) + for _, n := range original.Graph.Nodes { + origIDToName[n.ID] = n.Properties["name"].(string) + } + + for _, origEdge := range original.Graph.Edges { + key := edgeKey{ + startName: origIDToName[origEdge.StartID], + endName: origIDToName[origEdge.EndID], + kind: origEdge.Kind, + } + + if _, ok := exportedEdges[key]; !ok { + t.Errorf("missing exported edge (%s)-[%s]->(%s)", key.startName, key.kind, key.endName) + } + } +} diff --git a/opengraph/types.go b/opengraph/types.go new file mode 100644 index 0000000..fed7507 --- /dev/null +++ b/opengraph/types.go @@ -0,0 +1,58 @@ +// Copyright 2025 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package opengraph + +import "github.com/specterops/dawgs/graph" + +// Document is the top-level container for an OpenGraph JSON file. +type Document struct { + Graph Graph `json:"graph"` +} + +// Graph contains the nodes and edges of the graph. +type Graph struct { + Nodes []Node `json:"nodes"` + Edges []Edge `json:"edges"` +} + +// Node represents a graph node with a string ID, one or more kind labels, and arbitrary properties. +type Node struct { + ID string `json:"id"` + Kinds []string `json:"kinds"` + Properties map[string]any `json:"properties,omitempty"` +} + +// Edge represents a directed relationship between two nodes. +type Edge struct { + StartID string `json:"start_id"` + EndID string `json:"end_id"` + Kind string `json:"kind"` + Properties map[string]any `json:"properties,omitempty"` +} + +// Kinds returns the unique node and edge kinds found in the graph. +func (g Graph) Kinds() (nodeKinds, edgeKinds graph.Kinds) { + for _, node := range g.Nodes { + nodeKinds = nodeKinds.Add(graph.StringsToKinds(node.Kinds)...) + } + + for _, edge := range g.Edges { + edgeKinds = edgeKinds.Add(graph.StringKind(edge.Kind)) + } + + return nodeKinds, edgeKinds +} diff --git a/opengraph/validate.go b/opengraph/validate.go new file mode 100644 index 0000000..d27155a --- /dev/null +++ b/opengraph/validate.go @@ -0,0 +1,61 @@ +// Copyright 2026 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package opengraph + +import ( + "errors" + "fmt" +) + +// Validate checks a Document for structural errors. It returns nil for an empty graph. +func Validate(doc Document) error { + nodeIDs := make(map[string]struct{}, len(doc.Graph.Nodes)) + + for i, node := range doc.Graph.Nodes { + if node.ID == "" { + return fmt.Errorf("node at index %d has an empty ID", i) + } + + if _, exists := nodeIDs[node.ID]; exists { + return fmt.Errorf("duplicate node ID %q", node.ID) + } + + nodeIDs[node.ID] = struct{}{} + } + + var errs []error + + for i, edge := range doc.Graph.Edges { + if edge.Kind == "" { + errs = append(errs, fmt.Errorf("edge at index %d has an empty kind", i)) + } + + if edge.StartID == "" { + errs = append(errs, fmt.Errorf("edge at index %d has an empty start_id", i)) + } else if _, ok := nodeIDs[edge.StartID]; !ok { + errs = append(errs, fmt.Errorf("edge at index %d references unknown start node %q", i, edge.StartID)) + } + + if edge.EndID == "" { + errs = append(errs, fmt.Errorf("edge at index %d has an empty end_id", i)) + } else if _, ok := nodeIDs[edge.EndID]; !ok { + errs = append(errs, fmt.Errorf("edge at index %d references unknown end node %q", i, edge.EndID)) + } + } + + return errors.Join(errs...) +} diff --git a/opengraph/validate_test.go b/opengraph/validate_test.go new file mode 100644 index 0000000..b40b2cf --- /dev/null +++ b/opengraph/validate_test.go @@ -0,0 +1,149 @@ +// Copyright 2025 Specter Ops, Inc. +// +// Licensed under the Apache License, Version 2.0 +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier: Apache-2.0 + +package opengraph + +import ( + "strings" + "testing" +) + +func TestValidate_EmptyGraph(t *testing.T) { + doc := Document{Graph: Graph{}} + if err := Validate(doc); err != nil { + t.Fatalf("empty graph should be valid, got: %v", err) + } +} + +func TestValidate_ValidGraph(t *testing.T) { + doc := Document{ + Graph: Graph{ + Nodes: []Node{ + {ID: "a", Kinds: []string{"Person"}}, + {ID: "b", Kinds: []string{"Person"}}, + }, + Edges: []Edge{ + {StartID: "a", EndID: "b", Kind: "KNOWS"}, + }, + }, + } + if err := Validate(doc); err != nil { + t.Fatalf("valid graph should pass, got: %v", err) + } +} + +func TestValidate_DuplicateNodeID(t *testing.T) { + doc := Document{ + Graph: Graph{ + Nodes: []Node{ + {ID: "a"}, + {ID: "a"}, + }, + }, + } + err := Validate(doc) + if err == nil { + t.Fatal("expected error for duplicate node ID") + } + if !strings.Contains(err.Error(), "duplicate node ID") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestValidate_EmptyNodeID(t *testing.T) { + doc := Document{ + Graph: Graph{Nodes: []Node{{ID: ""}}}, + } + err := Validate(doc) + if err == nil { + t.Fatal("expected error for empty node ID") + } + if !strings.Contains(err.Error(), "empty ID") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestValidate_EmptyEdgeKind(t *testing.T) { + doc := Document{ + Graph: Graph{ + Nodes: []Node{{ID: "a"}, {ID: "b"}}, + Edges: []Edge{{StartID: "a", EndID: "b", Kind: ""}}, + }, + } + err := Validate(doc) + if err == nil { + t.Fatal("expected error for empty edge kind") + } + if !strings.Contains(err.Error(), "empty kind") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestValidate_EmptyEdgeStartID(t *testing.T) { + doc := Document{ + Graph: Graph{ + Nodes: []Node{{ID: "a"}}, + Edges: []Edge{{StartID: "", EndID: "a", Kind: "REL"}}, + }, + } + err := Validate(doc) + if err == nil { + t.Fatal("expected error for empty start_id") + } + if !strings.Contains(err.Error(), "empty start_id") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestValidate_UnknownEdgeReference(t *testing.T) { + doc := Document{ + Graph: Graph{ + Nodes: []Node{{ID: "a"}}, + Edges: []Edge{{StartID: "a", EndID: "missing", Kind: "REL"}}, + }, + } + err := Validate(doc) + if err == nil { + t.Fatal("expected error for unknown node reference") + } + if !strings.Contains(err.Error(), "unknown end node") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestValidate_MultipleEdgeErrors(t *testing.T) { + doc := Document{ + Graph: Graph{ + Nodes: []Node{{ID: "a"}}, + Edges: []Edge{ + {StartID: "", EndID: "", Kind: ""}, + {StartID: "a", EndID: "nope", Kind: "REL"}, + }, + }, + } + err := Validate(doc) + if err == nil { + t.Fatal("expected errors") + } + // errors.Join produces newline-separated errors + errStr := err.Error() + if !strings.Contains(errStr, "empty kind") { + t.Fatalf("expected empty kind error, got: %v", err) + } + if !strings.Contains(errStr, "unknown end node") { + t.Fatalf("expected unknown node error, got: %v", err) + } +}