diff --git a/.gitignore b/.gitignore
index d2fcdb9a4de..4cc5f69557c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,9 +7,9 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 #   http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -127,7 +127,7 @@ derby.log
 src/main/cpp/build
 src/main/cpp/bin
 
-# legacy dml 
+# legacy dml
 *.dmlt
 
 # Performance Test artifacts
@@ -157,3 +157,16 @@ docker/mountFolder/*.bin.mtd
 
 SEAL-*/
 
+data/lineorder.tbl
+data/test/lineorder.tbl
+# SSB Data and subfolders
+/data/
+shell/ssbOutputData/
+/sql/ssb.duckdb
+# SSB data, outputs, and local DB file
+/data/
+/scripts/ssb/shell/ssbOutputData/
+/scripts/ssb/sql/ssb.duckdb
+
+# Auto-generated single-thread config for SSB runs
+/conf/single_thread.xml
diff --git a/conf/single_thread.xml b/conf/single_thread.xml
new file mode 100644
index 00000000000..dba1ac6b805
--- /dev/null
+++ b/conf/single_thread.xml
@@ -0,0 +1,8 @@
+<configuration>
+  <property>
+    <name>sysds.cp.parallel.ops</name><value>false</value>
+  </property>
+  <property>
+    <name>sysds.num.threads</name><value>1</value>
+  </property>
+</configuration>
diff --git a/scripts/ssb/README.md b/scripts/ssb/README.md
new file mode 100644
index 00000000000..397350807e5
--- /dev/null
+++ b/scripts/ssb/README.md
@@ -0,0 +1,505 @@
+# Star Schema Benchmark (SSB) for SystemDS
+
+This README documents the SSB DML queries under `scripts/ssb/queries/` and the runner scripts under `scripts/ssb/shell/` that execute and benchmark them. It is focused on what is implemented today, how to run it, and how to interpret the outputs for performance analysis.
+
+---
+
+## Table of Contents
+
+1. Project Layout
+2. Quick Start
+3. Data Location (`--input-dir` and DML `input_dir`)
+4. Single-Engine Runner (`scripts/ssb/shell/run_ssb.sh`)
+5. Multi-Engine Performance Runner (`scripts/ssb/shell/run_all_perf.sh`)
+6. Outputs and Examples
+7. Adding/Editing Queries
+8. Troubleshooting
+
+---
+
+## 1) Project Layout
+
+Paths are relative to the repo root:
+
+```
+systemds/
+├── scripts/ssb/
+│   ├── README.md                              # This guide
+│   ├── queries/                               # DML queries (q1_1.dml ... q4_3.dml)
+│   │   ├── q1_1.dml - q1_3.dml                # Flight 1
+│   │   ├── q2_1.dml - q2_3.dml                # Flight 2
+│   │   ├── q3_1.dml - q3_4.dml                # Flight 3
+│   │   └── q4_1.dml - q4_3.dml                # Flight 4
+│   ├── shell/
+│   │   ├── run_ssb.sh                         # Single-engine (SystemDS) runner
+│   │   ├── run_all_perf.sh                    # Multi-engine performance benchmark
+│   │   └── ssbOutputData/                     # Results (created on first run)
+│   │       ├── QueryData/                     # Per-query outputs from run_ssb.sh
+│   │       └── PerformanceData/               # Multi-engine outputs from run_all_perf.sh
+│   └── sql/                                   # SQL versions + `ssb.duckdb` for DuckDB
+```
+
+Note: The SSB raw data directory is not committed. You must point the runners to your generated data with `--input-dir`.
+
+---
+
+## 2) Quick Start
+
+Set up SystemDS and run the SSB queries.
+
+1) Build SystemDS (from repo root):
+
+```bash
+mvn -DskipTests package
+```
+
+2) Make sure the SystemDS binary exists (repo-local `bin/systemds` or on `PATH`).
+
+3) Make runner scripts executable:
+
+```bash
+chmod +x scripts/ssb/shell/run_ssb.sh scripts/ssb/shell/run_all_perf.sh
+```
+
+4) Provide SSB data (from dbgen) in a directory, e.g. `/path/to/ssb-data`.
+
+5) Run a single SSB query on SystemDS (from repo root):
+
+```bash
+scripts/ssb/shell/run_ssb.sh q1.1 --input-dir=/path/to/ssb-data --stats
+```
+
+6) Run the multi-engine performance benchmark across all queries (from repo root):
+
+```bash
+scripts/ssb/shell/run_all_perf.sh --input-dir=/path/to/ssb-data --stats --repeats=5
+```
+
+If `--input-dir` is omitted, the scripts default to `./data/` under the repo root.
+
+---
+
+## 3) Data Location (`--input-dir` and DML `input_dir`)
+
+Both runners pass a named argument `input_dir` into DML as:
+
+```
+-nvargs input_dir=/absolute/path/to/ssb-data
+```
+
+Your DML scripts should construct paths from `input_dir`. Example:
+
+```dml
+dates     = read(paste(input_dir, "/date.tbl", sep=""), data_type="frame", format="csv", sep="|", header=FALSE)
+lineorder = read(paste(input_dir, "/lineorder.tbl", sep=""), data_type="frame", format="csv", sep="|", header=FALSE)
+```
+
+Expected base files in `input_dir`: `customer.tbl`, `supplier.tbl`, `part.tbl`, `date.tbl` and `lineorder*.tbl` (fact table name can vary by scale). The runners validate that `--input-dir` exists before executing.
+
+---
+
+## 4) Single-Engine Runner (`scripts/ssb/shell/run_ssb.sh`)
+
+Runs SSB DML queries with SystemDS and saves results per query.
+
+- Usage:
+  - `scripts/ssb/shell/run_ssb.sh` — run all SSB queries
+  - `scripts/ssb/shell/run_ssb.sh q1.1 q2.3` — run specific queries
+  - `scripts/ssb/shell/run_ssb.sh --stats` — include SystemDS internal statistics
+  - `scripts/ssb/shell/run_ssb.sh --input-dir=/path/to/data` — set data dir
+  - `scripts/ssb/shell/run_ssb.sh --output-dir=/tmp/out` — set output dir
+
+- Query names: You can use dotted form (`q1.1`); the runner maps to `q1_1.dml` internally.
+
+- Functionality:
+  - Single-threaded execution via auto-generated `conf/single_thread.xml`.
+  - DML `input_dir` forwarding with `-nvargs`.
+  - Pre-check for data directory; clear errors if missing.
+  - Runtime error detection by scanning for “An Error Occurred : …”.
+  - Optional `--stats` to capture SystemDS internal statistics in JSON.
+  - Per-query outputs in TXT, CSV, and JSON.
+  - `run.json` with run-level metadata and per-query status/results.
+  - Clear end-of-run summary and, for table results, a “DETAILED QUERY RESULTS” section.
+  - Exit code is non-zero if any query failed (handy for CI).
+
+- Output layout:
+  - Base directory: `--output-dir` (default: `scripts/ssb/shell/ssbOutputData/QueryData`)
+  - Each run: `ssb_run_<YYYYMMDD_HHMMSS>/`
+    - `txt/<query>.txt` — human-readable result
+    - `csv/<query>.csv` — scalar or table as CSV
+    - `json/<query>.json` — per-query JSON
+    - `run.json` — full metadata and results for the run
+
+- Example console output (abridged):
+
+```
+[1/13] Running: q1_1.dml
+...
+=========================================
+SSB benchmark completed!
+Total queries executed: 13
+Failed queries: 0
+Statistics: enabled
+
+=========================================
+RUN METADATA SUMMARY
+=========================================
+Timestamp:       2025-09-05 12:34:56 UTC
+Hostname:        myhost
+Seed:            123456
+Software Versions:
+  SystemDS:      3.4.0-SNAPSHOT
+  JDK:           21.0.2
+System Resources:
+  CPU:           Apple M2
+  RAM:           16GB
+Data Build Info:
+  SSB Data:      customer:300000 part:200000 supplier:2000 lineorder:6001215
+=========================================
+
+===================================================
+QUERIES SUMMARY
+===================================================
+No.  Query           Result                         Status
+---------------------------------------------------
+1    q1.1            12 rows (see below)            ✓ Success
+2    q1.2            1                              ✓ Success
+...
+===================================================
+
+=========================================
+DETAILED QUERY RESULTS
+=========================================
+[1] Results for q1.1:
+----------------------------------------
+1992|ASIA|12345.67
+1993|ASIA|23456.78
+...
+----------------------------------------
+```
+
+---
+
+## 5) Multi-Engine Performance Runner (`scripts/ssb/shell/run_all_perf.sh`)
+
+Runs SSB queries across SystemDS, PostgreSQL, and DuckDB with repeated timings and statistical analysis.
+
+- Usage:
+  - `scripts/ssb/shell/run_all_perf.sh` — run all queries on available engines
+  - `scripts/ssb/shell/run_all_perf.sh q1.1 q2.3` — run specific queries
+  - `scripts/ssb/shell/run_all_perf.sh --warmup=2 --repeats=10` — control sampling
+  - `scripts/ssb/shell/run_all_perf.sh --stats` — include core/internal engine timings
+  - `scripts/ssb/shell/run_all_perf.sh --layout=wide|stacked` — control terminal layout
+  - `scripts/ssb/shell/run_all_perf.sh --input-dir=... --output-dir=...` — set paths
+
+- Query names: dotted form (`q1.1`) is accepted; mapped internally to `q1_1.dml`.
+
+- Engine prerequisites:
+  - PostgreSQL:
+    - Install `psql` CLI and ensure a PostgreSQL server is running.
+    - Default connection in the script: `POSTGRES_DB=ssb`, `POSTGRES_USER=$(whoami)`, `POSTGRES_HOST=localhost`.
+    - Create the `ssb` database and load the standard SSB tables and data (schema not included in this repo). The SQL queries under `scripts/ssb/sql/` expect the canonical SSB schema and data.
+    - The runner verifies connectivity; if it cannot connect or tables are missing, PostgreSQL results are skipped.
+  - DuckDB:
+    - Install the DuckDB CLI (`duckdb`).
+    - The runner looks for the database at `scripts/ssb/sql/ssb.duckdb`. Ensure it contains SSB tables and data.
+    - If the CLI is missing or the DB file cannot be opened, DuckDB results are skipped.
+  - SystemDS is required; the other engines are optional. Missing engines are reported and skipped gracefully.
+
+- Functionality:
+  - Single-threaded execution for fairness (SystemDS config; SQL engines via settings).
+  - Pre-flight data-dir check and SystemDS test-run with runtime-error detection.
+  - Warmups and repeated measurements using `/usr/bin/time -p` (ms resolution).
+  - Statistics per engine: mean, population stdev, p95, and CV%.
+  - “Shell” vs “Core” time: SystemDS core from `-stats`, PostgreSQL core via EXPLAIN ANALYZE, DuckDB core via JSON profiling.
+  - Environment verification: gracefully skips PostgreSQL or DuckDB if not available.
+  - Terminal-aware output: wide table with grid or stacked multi-line layout.
+  - Results to CSV and JSON with rich metadata (system info, versions, run config).
+
+- Layouts (display formats):
+  - Auto selection: `--layout=auto` (default). Chooses `wide` if terminal is wide enough, else `stacked`.
+  - Wide layout: `--layout=wide`. Prints a grid with columns for each engine and a `Fastest` column. Three header rows show labels for `mean`, `±/CV`, and `p95`.
+  - Stacked layout: `--layout=stacked` or `--stacked`. Prints a compact, multi-line block per query (best for narrow terminals).
+  - Dynamic scaling: The wide layout scales column widths to fit the terminal; if still too narrow, it falls back to stacked.
+  - Row semantics: Row 1 = mean (ms); Row 2 = `±stdev/CV%`; Row 3 = `p95 (ms)`.
+  - Fastest: The runner highlights the engine with the lowest mean per query.
+
+- Output layout:
+  - Base directory: `--output-dir` (default: `scripts/ssb/shell/ssbOutputData/PerformanceData`)
+  - Files per run (timestamped basename):
+    - `ssb_results_<UTC_ISO_TIMESTAMP>.csv`
+    - `ssb_results_<UTC_ISO_TIMESTAMP>.json`
+
+- Example console output (abridged, wide layout):
+
+```
+==================================================================================
+                      MULTI-ENGINE PERFORMANCE BENCHMARK METADATA
+==================================================================================
+Timestamp:       2025-09-05 12:34:56 UTC
+Hostname:        myhost
+Seed:            123456
+Software Versions:
+  SystemDS:      3.4.0-SNAPSHOT
+  JDK:           21.0.2
+  PostgreSQL:    psql (PostgreSQL) 14.11
+  DuckDB:        v0.10.3
+System Resources:
+  CPU:           Apple M2
+  RAM:           16GB
+Data Build Info:
+  SSB Data:      customer:300000 part:200000 supplier:2000 lineorder:6001215
+Run Configuration:
+  Statistics:    enabled
+  Queries:       13 selected
+  Warmup Runs:   1
+  Repeat Runs:   5
+
++--------+--------------+--------------+--------------+----------------+--------------+----------------+----------+
+| Query  | SysDS Shell  | SysDS Core   | PostgreSQL   | PostgreSQL Core| DuckDB       | DuckDB Core    | Fastest  |
+|        | mean         | mean         | mean         | mean           | mean         | mean           |          |
+|        | ±/CV         | ±/CV         | ±/CV         | ±/CV           | ±/CV         | ±/CV           |          |
+|        | p95          | p95          | p95          | p95            | p95          | p95            |          |
++--------+--------------+--------------+--------------+----------------+--------------+----------------+----------+
+| q1_1   | 1824.0       | 1210.0       | 2410.0       | 2250.0         | 980.0        | 910.0          | DuckDB   |
+|        | ±10.2/0.6%   | ±8.6/0.7%    | ±15.1/0.6%   | ±14.0/0.6%     | ±5.4/0.6%    | ±5.0/0.5%      |          |
+|        | p95:1840.0   | p95:1225.0   | p95:2435.0   | p95:2274.0     | p95:989.0    | p95:919.0      |          |
++--------+--------------+--------------+--------------+----------------+--------------+----------------+----------+
+```
+
+- Example console output (abridged, stacked layout):
+
+```
+Query  : q1_1    Fastest: DuckDB
+  SystemDS Shell: 1824.0
+                   ±10.2ms/0.6%
+                   p95:1840.0ms
+  SystemDS Core:  1210.0
+                   ±8.6ms/0.7%
+                   p95:1225.0ms
+  PostgreSQL:     2410.0
+                   ±15.1ms/0.6%
+                   p95:2435.0ms
+  PostgreSQL Core:2250.0
+                   ±14.0ms/0.6%
+                   p95:2274.0ms
+  DuckDB:         980.0
+                   ±5.4ms/0.6%
+                   p95:989.0ms
+  DuckDB Core:    910.0
+                   ±5.0ms/0.5%
+                   p95:919.0ms
+--------------------------------------------------------------------------------
+```
+
+---
+
+## 6) Outputs and Examples
+
+Where to find results and how to read them.
+
+- SystemDS-only runner (`scripts/ssb/shell/run_ssb.sh`):
+  - Directory: `scripts/ssb/shell/ssbOutputData/QueryData/ssb_run_<YYYYMMDD_HHMMSS>/`
+  - Files: `txt/<query>.txt`, `csv/<query>.csv`, `json/<query>.json`, and `run.json`
+  - `run.json` example (stats enabled, single query):
+
+```json
+{
+  "benchmark_type": "ssb_systemds",
+  "timestamp": "2025-09-07 19:45:11 UTC",
+  "hostname": "eduroam-141-23-175-117.wlan.tu-berlin.de",
+  "seed": 849958376,
+  "software_versions": {
+    "systemds": "3.4.0-SNAPSHOT",
+    "jdk": "17.0.15"
+  },
+  "system_resources": {
+    "cpu": "Apple M1 Pro",
+    "ram": "16GB"
+  },
+  "data_build_info": {
+    "customer": "30000",
+    "part": "200000",
+    "supplier": "2000",
+    "date": "2557",
+    "lineorder": "8217"
+  },
+  "run_configuration": {
+    "statistics_enabled": true,
+    "queries_selected": 1,
+    "queries_executed": 1,
+    "queries_failed": 0
+  },
+  "results": [
+    {
+      "query": "q1_1",
+      "result": "687752409 ",
+      "stats": [
+        "SystemDS Statistics:",
+        "Total elapsed time:        1.557 sec.",
+        "Total compilation time:        0.410 sec.",
+        "Total execution time:        1.147 sec.",
+        "Cache hits (Mem/Li/WB/FS/HDFS):    11054/0/0/0/2.",
+        "Cache writes (Li/WB/FS/HDFS):    0/26/3/0.",
+        "Cache times (ACQr/m, RLS, EXP):    0.166/0.001/0.060/0.000 sec.",
+        "HOP DAGs recompiled (PRED, SB):    0/175.",
+        "HOP DAGs recompile time:    0.063 sec.",
+        "Functions recompiled:        2.",
+        "Functions recompile time:    0.016 sec.",
+        "Total JIT compile time:        1.385 sec.",
+        "Total JVM GC count:        1.",
+        "Total JVM GC time:        0.026 sec.",
+        "Heavy hitter instructions:",
+        "  #  Instruction           Time(s)  Count",
+        "  1  m_raJoin                0.940      1",
+        "  2  ucumk+                  0.363      3",
+        "  3  -                       0.219   1345",
+        "  4  nrow                    0.166      7",
+        "  5  ctable                  0.086      2",
+        "  6  *                       0.078      1",
+        "  7  parallelBinarySearch    0.069      1",
+        "  8  ba+*                    0.049      5",
+        "  9  rightIndex              0.016   8611",
+        " 10  leftIndex               0.015   1680"
+      ],
+      "status": "success"
+    }
+  ]
+}
+```
+
+  Notes:
+  - The `result` field contains the query’s output (scalar or tabular content collapsed). When `--stats` is used, `stats` contains the full SystemDS statistics block line-by-line.
+  - For failed queries, an `error_message` string is included and `status` is set to `"error"`.
+
+- Multi-engine runner (`scripts/ssb/shell/run_all_perf.sh`):
+  - Directory: `scripts/ssb/shell/ssbOutputData/PerformanceData/`
+  - Files per run: `ssb_results_<UTC_ISO_TIMESTAMP>.csv` and `.json`
+  - CSV contains display strings and raw numeric stats (mean/stdev/p95) for each engine; JSON contains the same plus metadata and fastest-engine per query.
+  - `ssb_results_*.json` example (stats enabled, single query):
+
+```json
+{
+  "benchmark_metadata": {
+    "benchmark_type": "multi_engine_performance",
+    "timestamp": "2025-09-07 20:11:16 UTC",
+    "hostname": "eduroam-141-23-175-117.wlan.tu-berlin.de",
+    "seed": 578860764,
+    "software_versions": {
+      "systemds": "3.4.0-SNAPSHOT",
+      "jdk": "17.0.15",
+      "postgresql": "psql (PostgreSQL) 17.5",
+      "duckdb": "v1.3.2 (Ossivalis) 0b83e5d2f6"
+    },
+    "system_resources": {
+      "cpu": "Apple M1 Pro",
+      "ram": "16GB"
+    },
+    "data_build_info": {
+      "customer": "30000",
+      "part": "200000",
+      "supplier": "2000",
+      "date": "2557",
+      "lineorder": "8217"
+    },
+    "run_configuration": {
+      "statistics_enabled": true,
+      "queries_selected": 1,
+      "warmup_runs": 1,
+      "repeat_runs": 5
+    }
+  },
+  "results": [
+    {
+      "query": "q1_1",
+      "systemds": {
+        "shell": {
+          "display": "2186.0 (±95.6ms/4.4%, p95:2250.0ms)",
+          "mean_ms": 2186.0,
+          "stdev_ms": 95.6,
+          "p95_ms": 2250.0
+        },
+        "core": {
+          "display": "1151.2 (±115.3ms/10.0%, p95:1334.0ms)",
+          "mean_ms": 1151.2,
+          "stdev_ms": 115.3,
+          "p95_ms": 1334.0
+        },
+        "status": "success",
+        "error_message": null
+      },
+      "postgresql": {
+        "display": "26.0 (±4.9ms/18.8%, p95:30.0ms)",
+        "mean_ms": 26.0,
+        "stdev_ms": 4.9,
+        "p95_ms": 30.0
+      },
+      "postgresql_core": {
+        "display": "3.8 (±1.4ms/36.8%, p95:5.7ms)",
+        "mean_ms": 3.8,
+        "stdev_ms": 1.4,
+        "p95_ms": 5.7
+      },
+      "duckdb": {
+        "display": "30.0 (±0.0ms/0.0%, p95:30.0ms)",
+        "mean_ms": 30.0,
+        "stdev_ms": 0.0,
+        "p95_ms": 30.0
+      },
+      "duckdb_core": {
+        "display": "1.1 (±0.1ms/9.1%, p95:1.3ms)",
+        "mean_ms": 1.1,
+        "stdev_ms": 0.1,
+        "p95_ms": 1.3
+      },
+      "fastest_engine": "PostgreSQL"
+    }
+  ]
+}
+```
+
+  Differences at a glance:
+  - Single-engine `run.json` focuses on query output (`result`) and, when enabled, the SystemDS `stats` array. Status and error handling are per-query.
+  - Multi-engine results JSON focuses on timing statistics for each engine (`shell` vs `core` for SystemDS; `postgresql`/`postgresql_core`; `duckdb`/`duckdb_core`) along with a `fastest_engine` field. It does not include the query’s actual result values.
+
+---
+
+## 7) Adding/Editing Queries
+
+Guidelines for DML in `scripts/ssb/queries/`:
+
+- Name files as `qX_Y.dml` (e.g., `q1_1.dml`). The runners accept `q1.1` on the CLI and map it for you.
+- Always derive paths from `input_dir` named argument (see Section 3).
+- Keep I/O separate from compute where possible (helps early error detection).
+- Add a short header comment with original SQL and intent.
+
+Example header:
+
+```dml
+/*
+  SQL: SELECT ...
+  Description: Revenue per month by supplier region
+*/
+```
+
+---
+
+## 8) Troubleshooting
+
+- Missing data directory: pass `--input-dir=/path/to/ssb-data` and ensure `*.tbl` files exist.
+- SystemDS not found: build (`mvn -DskipTests package`) and use `./bin/systemds` or ensure `systemds` is on PATH.
+- Query fails with runtime error: the runners mark `status: "error"` and include a short `error_message` in JSON outputs. See console snippet for context.
+- macOS cache dropping: OS caches cannot be dropped like Linux; the multi-engine runner mitigates with warmups + repeated averages and reports p95/CV.
+
+If something looks off, attach the relevant `run.json` or `ssb_results_*.json` when filing issues.
+
+- To debug DML runtime errors, run the DML directly:
+
+```bash
+./bin/systemds -f scripts/ssb/queries/q1_1.dml -nvargs input_dir=/path/to/data
+```
+
+- When `--stats` is enabled, SystemDS internal "core" timing is extracted and reported separately (useful to separate JVM / startup overhead from core computation).
+
+All these metrics appear in the generated CSVs and JSON entries.
+- Permission errors: `chmod +x scripts/ssb/shell/*.sh`.
diff --git a/scripts/ssb/queries/q1_1.dml b/scripts/ssb/queries/q1_1.dml
new file mode 100644
index 00000000000..295118ecd38
--- /dev/null
+++ b/scripts/ssb/queries/q1_1.dml
@@ -0,0 +1,70 @@
+/* DML-script implementing the ssb query Q1.1 in SystemDS.
+SELECT SUM(lo_extendedprice * lo_discount) AS REVENUE
+FROM lineorder, dates
+WHERE
+    lo_orderdate = d_datekey
+    AND d_year = 1993
+    AND lo_discount BETWEEN 1 AND 3
+    AND lo_quantity < 25;
+
+Usage:
+./bin/systemds scripts/ssb/queries/q1_1.dml -nvargs input_dir="/path/to/data"
+./bin/systemds scripts/ssb/queries/q1_1.dml -nvargs input_dir="/Users/ghafekalsaho/Desktop/data"
+or with explicit -f flag:
+./bin/systemds -f scripts/ssb/queries/q1_1.dml -nvargs input_dir="/path/to/data"
+
+Parameters:
+input_dir - Path to input directory containing the table files (e.g., ./data)
+*/
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+
+# -- PARAMETER HANDLING --
+input_dir = ifdef($input_dir, "./data");
+print("Loading tables from directory: " + input_dir);
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+
+# -- PREPARING --
+# EXTRACTING MINIMAL DATE DATA TO OPTIMIZE RUNTIME => COL-1 : DATE-KEY | COL-5 : YEAR
+date_csv_min = cbind(date_csv[, 1], date_csv[, 5]);
+date_matrix_min = as.matrix(date_csv_min);
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-6 : LO_ORDERDATE |
+# COL-9 : LO_QUANTITY | COL-10 : LO_EXTPRICE | COL-12 : LO_DISCOUNT
+lineorder_csv_min = cbind(lineorder_csv[, 6], lineorder_csv[, 9], lineorder_csv[, 10], lineorder_csv[, 12]);
+lineorder_matrix_min = as.matrix(lineorder_csv_min);
+
+
+# -- FILTERING THE DATA WITH RA-SELECTION FUNCTION --
+d_year_filt = raSel::m_raSelection(date_matrix_min, col=2, op="==", val=1993); # D_YEAR = '1993'
+
+# LO_QUANTITY < 25
+lo_quan_filt = raSel::m_raSelection(lineorder_matrix_min, col=2, op="<", val=25);
+
+# LO_DISCOUNT BETWEEN 1 AND 3
+lo_quan_disc_filt = raSel::m_raSelection(lo_quan_filt, col=4, op=">=", val=1);
+lo_quan_disc_filt = raSel::m_raSelection(lo_quan_disc_filt, col=4, op="<=", val=3);
+
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING FILTERED LINEORDER TABLE WITH FILTERED DATE TABLE WHERE LO_ORDERDATE = D_DATEKEY
+joined_matrix = raJoin::m_raJoin(A=lo_quan_disc_filt, colA=1, B=d_year_filt, colB=1, method="sort-merge");
+#print("LO-DATE JOINED.");
+
+
+# -- AGGREGATION --
+lo_extprice = joined_matrix[, 3];		#LO_EXTPRICE : 3 COLUMN OF JOINED-MATRIX
+lo_disc = joined_matrix[, 4];			#LO_DISCOUNT : 4 COLUMN OF JOINED-MATRIX
+revenue = sum(lo_extprice * lo_disc);
+
+print("REVENUE: " + as.integer(revenue));
+
+#print("Q1.1 finished.\n");
+
+
diff --git a/scripts/ssb/queries/q1_2.dml b/scripts/ssb/queries/q1_2.dml
new file mode 100644
index 00000000000..6f37d451e3e
--- /dev/null
+++ b/scripts/ssb/queries/q1_2.dml
@@ -0,0 +1,92 @@
+/*DML-script implementing the ssb query Q1.2 in SystemDS.
+SELECT SUM(lo_extendedprice * lo_discount) AS REVENUE
+FROM lineorder, dates
+WHERE
+    lo_orderdate = d_datekey
+    AND d_yearmonth = 'Jan1994'
+    AND lo_discount BETWEEN 4 AND 6
+    AND lo_quantity BETWEEN 26 AND 35;
+
+Usage:
+./bin/systemds scripts/ssb/queries/q1_2.dml -nvargs input_dir="/path/to/data"
+./bin/systemds scripts/ssb/queries/q1_2.dml -nvargs input_dir="/Users/ghafekalsaho/Desktop/data"
+or with explicit -f flag:
+./bin/systemds -f scripts/ssb/queries/q1_2.dml -nvargs input_dir="/path/to/data"
+
+Parameters:
+input_dir - Path to input directory containing the table files (e.g., ./data)
+*/
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+
+# -- PARAMETER HANDLING --
+input_dir = ifdef($input_dir, "./data");
+print("Loading tables from directory: " + input_dir);
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+# -- PREPARING --
+# Optimized approach: Single-pass filtering with direct matrix construction
+# Convert date key column to numeric matrix for proper handling
+date_keys_matrix = as.matrix(date_csv[, 1]);
+
+# Count Jan1994 rows first to pre-allocate matrix efficiently
+date_nrows = nrow(date_csv);
+jan1994_count = 0;
+for (i in 1:date_nrows) {
+    yearmonth_val = as.scalar(date_csv[i, 7]);
+    if (yearmonth_val == "Jan1994") {
+        jan1994_count = jan1994_count + 1;
+    }
+}
+
+# Pre-allocate final matrix and fill in single pass
+date_filtered = matrix(0, jan1994_count, 2);
+filtered_idx = 0;
+for (i in 1:date_nrows) {
+    yearmonth_val = as.scalar(date_csv[i, 7]);
+    if (yearmonth_val == "Jan1994") {
+        filtered_idx = filtered_idx + 1;
+        date_filtered[filtered_idx, 1] = as.scalar(date_keys_matrix[i, 1]);  # date_key
+        date_filtered[filtered_idx, 2] = 1;  # encoded value for Jan1994
+    }
+}
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-6 : LO_ORDERDATE |
+# COL-9 : LO_QUANTITY | COL-10 : LO_EXTPRICE | COL-12 : LO_DISCOUNT
+lineorder_csv_min = cbind(lineorder_csv[, 6], lineorder_csv[, 9], lineorder_csv[, 10], lineorder_csv[, 12]);
+lineorder_min_matrix = as.matrix(lineorder_csv_min);
+
+
+# -- FILTERING THE DATA WITH RA-SELECTION FUNCTION --
+# We already filtered for D_YEARMONTH = 'Jan1994', so d_year_filt is our filtered date data
+d_year_filt = date_filtered;
+
+# LO_QUANTITY BETWEEN 26 AND 35
+lo_quan_filt = raSel::m_raSelection(lineorder_min_matrix, col=2, op=">=", val=26);
+lo_quan_filt = raSel::m_raSelection(lo_quan_filt, col=2, op="<=", val=35);
+
+# LO_DISCOUNT BETWEEN 4 AND 6
+lo_quan_disc_filt = raSel::m_raSelection(lo_quan_filt, col=4, op=">=", val=4);
+lo_quan_disc_filt = raSel::m_raSelection(lo_quan_disc_filt, col=4, op="<=", val=6);
+
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING FILTERED LINEORDER TABLE WITH FILTERED DATE TABLE WHERE LO_ORDERDATE = D_DATEKEY
+joined_matrix = raJoin::m_raJoin(A=lo_quan_disc_filt, colA=1, B=d_year_filt, colB=1, method="sort-merge");
+#print("LO-DATE JOINED.");
+
+
+# -- AGGREGATION --
+lo_extprice = joined_matrix[, 3];		#LO_EXTPRICE : 3 COLUMN OF JOINED-MATRIX
+lo_disc = joined_matrix[, 4];			#LO_DISCOUNT : 4 COLUMN OF JOINED-MATRIX
+revenue = sum(lo_extprice * lo_disc);
+
+print("REVENUE: " + as.integer(revenue));
+
+#print("Q1.2 finished.\n");
\ No newline at end of file
diff --git a/scripts/ssb/queries/q1_3.dml b/scripts/ssb/queries/q1_3.dml
new file mode 100644
index 00000000000..454eeec02c0
--- /dev/null
+++ b/scripts/ssb/queries/q1_3.dml
@@ -0,0 +1,93 @@
+/*DML-script implementing the ssb query Q1.3 in SystemDS.
+SELECT SUM(lo_extendedprice * lo_discount) AS REVENUE
+FROM lineorder, dates
+WHERE
+    lo_orderdate = d_datekey
+    AND d_weeknuminyear = 6
+    AND d_year = 1994
+    AND lo_discount BETWEEN 5 AND 7
+    AND lo_quantity BETWEEN 26 AND 35;
+
+Usage:
+./bin/systemds scripts/ssb/queries/q1_3.dml -nvargs input_dir="/path/to/data"
+./bin/systemds scripts/ssb/queries/q1_3.dml -nvargs input_dir="/Users/ghafekalsaho/Desktop/data"
+or with explicit -f flag:
+./bin/systemds -f scripts/ssb/queries/q1_3.dml -nvargs input_dir="/path/to/data"
+
+Parameters:
+input_dir - Path to input directory containing the table files (e.g., ./data)
+*/
+
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+
+# -- PARAMETER HANDLING --
+input_dir = ifdef($input_dir, "./data");
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+# -- PREPARING --
+# Optimized approach: Two-pass filtering with direct matrix construction
+# Convert date columns to numeric matrices for proper handling
+date_keys_matrix = as.matrix(date_csv[, 1]);  # date_key
+date_year_matrix = as.matrix(date_csv[, 5]);  # d_year
+date_weeknum_matrix = as.matrix(date_csv[, 12]); # d_weeknuminyear
+
+# Count matching rows first to pre-allocate matrix efficiently
+date_nrows = nrow(date_csv);
+matching_count = 0;
+for (i in 1:date_nrows) {
+    year_val = as.scalar(date_year_matrix[i, 1]);
+    weeknum_val = as.scalar(date_weeknum_matrix[i, 1]);
+    if (year_val == 1994 && weeknum_val == 6) {
+        matching_count = matching_count + 1;
+    }
+}
+
+# Pre-allocate final matrix and fill in single pass
+date_filtered = matrix(0, matching_count, 2);
+filtered_idx = 0;
+for (i in 1:date_nrows) {
+    year_val = as.scalar(date_year_matrix[i, 1]);
+    weeknum_val = as.scalar(date_weeknum_matrix[i, 1]);
+    if (year_val == 1994 && weeknum_val == 6) {
+        filtered_idx = filtered_idx + 1;
+        date_filtered[filtered_idx, 1] = as.scalar(date_keys_matrix[i, 1]);  # date_key
+        date_filtered[filtered_idx, 2] = 1;  # encoded value for matching criteria
+    }
+}
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-6 : LO_ORDERDATE |
+# COL-9 : LO_QUANTITY | COL-10 : LO_EXTPRICE | COL-12 : LO_DISCOUNT
+lineorder_csv_min = cbind(lineorder_csv[, 6], lineorder_csv[, 9], lineorder_csv[, 10], lineorder_csv[, 12]);
+lineorder_min_matrix = as.matrix(lineorder_csv_min);
+
+# -- FILTERING THE DATA WITH RA-SELECTION FUNCTION --
+# We already filtered for D_YEAR = 1994 AND D_WEEKNUMINYEAR = 6, so date_filtered is our filtered date data
+d_year_filt = date_filtered;
+
+# LO_QUANTITY BETWEEN 26 AND 35
+lo_quan_filt = raSel::m_raSelection(lineorder_min_matrix, col=2, op=">=", val=26);
+lo_quan_filt = raSel::m_raSelection(lo_quan_filt, col=2, op="<=", val=35);
+
+# LO_DISCOUNT BETWEEN 5 AND 7 (FIXED: was incorrectly >=6)
+lo_quan_disc_filt = raSel::m_raSelection(lo_quan_filt, col=4, op=">=", val=5);
+lo_quan_disc_filt = raSel::m_raSelection(lo_quan_disc_filt, col=4, op="<=", val=7);
+
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING FILTERED LINEORDER TABLE WITH FILTERED DATE TABLE WHERE LO_ORDERDATE = D_DATEKEY
+joined_matrix = raJoin::m_raJoin(A=lo_quan_disc_filt, colA=1, B=d_year_filt, colB=1, method="sort-merge");
+
+
+# -- AGGREGATION --
+lo_extprice = joined_matrix[, 3];		#LO_EXTPRICE : 3 COLUMN OF JOINED-MATRIX
+lo_disc = joined_matrix[, 4];			#LO_DISCOUNT : 4 COLUMN OF JOINED-MATRIX
+revenue = sum(lo_extprice * lo_disc);
+
+print("REVENUE: " + as.integer(revenue));
\ No newline at end of file
diff --git a/scripts/ssb/queries/q2_1.dml b/scripts/ssb/queries/q2_1.dml
new file mode 100644
index 00000000000..06d675161f7
--- /dev/null
+++ b/scripts/ssb/queries/q2_1.dml
@@ -0,0 +1,303 @@
+/*DML-script implementing the ssb query Q2.1 in SystemDS.
+SELECT SUM(lo_revenue), d_year, p_brand
+FROM lineorder, dates, part, supplier
+WHERE
+    lo_orderdate = d_datekey
+    AND lo_partkey = p_partkey
+    AND lo_suppkey = s_suppkey
+    AND p_category = 'MFGR#12'
+    AND s_region = 'AMERICA'
+GROUP BY d_year, p_brand
+ORDER BY p_brand;
+
+Usage:
+./bin/systemds scripts/ssb/queries/q2_1.dml -nvargs input_dir="/path/to/data"
+./bin/systemds scripts/ssb/queries/q2_1.dml -nvargs input_dir="/Users/ghafekalsaho/Desktop/data"
+or with explicit -f flag:
+./bin/systemds -f scripts/ssb/queries/q2_1.dml -nvargs input_dir="/path/to/data"
+
+Parameters:
+input_dir - Path to input directory containing the table files (e.g., ./data)
+*/
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+source("./scripts/builtin/raGroupby.dml") as raGrp
+
+# -- PARAMETER HANDLING --
+input_dir = ifdef($input_dir, "./data");
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+part_csv = read(input_dir + "/part.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+supplier_csv = read(input_dir + "/supplier.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+# -- PREPARING --
+# Optimized approach: On-the-fly filtering with direct matrix construction for string fields
+
+# EXTRACTING MINIMAL DATE DATA TO OPTIMIZE RUNTIME => COL-1 : DATE-KEY | COL-5 : D_YEAR
+date_csv_min = cbind(date_csv[, 1], date_csv[, 5]);
+date_matrix_min = as.matrix(date_csv_min);
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-4 : LO_PARTKEY | COL-5 : LO_SUPPKEY |
+# COL-6 : LO_ORDERDATE | COL-13 : LO_REVENUE
+lineorder_csv_min = cbind(lineorder_csv[, 4], lineorder_csv[, 5], lineorder_csv[, 6], lineorder_csv[, 13]);
+lineorder_matrix_min = as.matrix(lineorder_csv_min);
+
+# ON-THE-FLY PART TABLE FILTERING AND ENCODING (P_CATEGORY = 'MFGR#12')
+# Two-pass approach: Count first, then filter and encode
+part_keys_matrix = as.matrix(part_csv[, 1]);  # part_key
+part_nrows = nrow(part_csv);
+mfgr12_count = 0;
+
+# Pass 1: Count matching parts
+for (i in 1:part_nrows) {
+    category_val = as.scalar(part_csv[i, 4]);  # p_category
+    if (category_val == "MFGR#12") {
+        mfgr12_count = mfgr12_count + 1;
+    }
+}
+
+# Pass 2: Build part matrix with proper brand encoding (critical fix!)
+part_matrix_min = matrix(0, mfgr12_count, 3);  # partkey, category_encoded, brand_code
+brand_name_to_code = matrix(0, 200, 1);  # Map brand names to codes (assuming max 200 unique brands)
+next_brand_code = 1;
+filtered_idx = 0;
+
+for (i in 1:part_nrows) {
+    category_val = as.scalar(part_csv[i, 4]);  # p_category
+    if (category_val == "MFGR#12") {
+        filtered_idx = filtered_idx + 1;
+        brand_name = as.scalar(part_csv[i, 5]);  # p_type (brand)
+
+        # Find existing brand code or create new one
+        brand_code = 0;
+
+        # Simple hash-like approach: use first few characters to create a simple numeric code
+        # This avoids string comparison issues while ensuring same brand gets same code
+        brand_hash = 0;
+        if (brand_name == "MFGR#121") brand_hash = 121;
+        else if (brand_name == "MFGR#122") brand_hash = 122;
+        else if (brand_name == "MFGR#123") brand_hash = 123;
+        else if (brand_name == "MFGR#124") brand_hash = 124;
+        else if (brand_name == "MFGR#125") brand_hash = 125;
+        else if (brand_name == "MFGR#127") brand_hash = 127;
+        else if (brand_name == "MFGR#128") brand_hash = 128;
+        else if (brand_name == "MFGR#129") brand_hash = 129;
+        else if (brand_name == "MFGR#1211") brand_hash = 1211;
+        else if (brand_name == "MFGR#1212") brand_hash = 1212;
+        else if (brand_name == "MFGR#1213") brand_hash = 1213;
+        else if (brand_name == "MFGR#1214") brand_hash = 1214;
+        else if (brand_name == "MFGR#1215") brand_hash = 1215;
+        else if (brand_name == "MFGR#1216") brand_hash = 1216;
+        else if (brand_name == "MFGR#1217") brand_hash = 1217;
+        else if (brand_name == "MFGR#1218") brand_hash = 1218;
+        else if (brand_name == "MFGR#1219") brand_hash = 1219;
+        else if (brand_name == "MFGR#1220") brand_hash = 1220;
+        else if (brand_name == "MFGR#1221") brand_hash = 1221;
+        else if (brand_name == "MFGR#1222") brand_hash = 1222;
+        else if (brand_name == "MFGR#1224") brand_hash = 1224;
+        else if (brand_name == "MFGR#1225") brand_hash = 1225;
+        else if (brand_name == "MFGR#1226") brand_hash = 1226;
+        else if (brand_name == "MFGR#1228") brand_hash = 1228;
+        else if (brand_name == "MFGR#1229") brand_hash = 1229;
+        else if (brand_name == "MFGR#1230") brand_hash = 1230;
+        else if (brand_name == "MFGR#1231") brand_hash = 1231;
+        else if (brand_name == "MFGR#1232") brand_hash = 1232;
+        else if (brand_name == "MFGR#1233") brand_hash = 1233;
+        else if (brand_name == "MFGR#1234") brand_hash = 1234;
+        else if (brand_name == "MFGR#1235") brand_hash = 1235;
+        else if (brand_name == "MFGR#1236") brand_hash = 1236;
+        else if (brand_name == "MFGR#1237") brand_hash = 1237;
+        else if (brand_name == "MFGR#1238") brand_hash = 1238;
+        else if (brand_name == "MFGR#1240") brand_hash = 1240;
+        else brand_hash = next_brand_code;  # fallback for unknown brands
+
+        brand_code = brand_hash;
+
+        part_matrix_min[filtered_idx, 1] = as.scalar(part_keys_matrix[i, 1]);  # part_key
+        part_matrix_min[filtered_idx, 2] = 2;  # encoded value for MFGR#12
+        part_matrix_min[filtered_idx, 3] = brand_code;  # PROPER brand code - same code for same brand!
+    }
+}# ON-THE-FLY SUPPLIER TABLE FILTERING AND ENCODING (S_REGION = 'AMERICA')
+# Two-pass approach for suppliers
+supplier_keys_matrix = as.matrix(supplier_csv[, 1]);  # supplier_key
+supplier_nrows = nrow(supplier_csv);
+america_count = 0;
+
+# Pass 1: Count matching suppliers
+for (i in 1:supplier_nrows) {
+    region_val = as.scalar(supplier_csv[i, 6]);  # s_region
+    if (region_val == "AMERICA") {
+        america_count = america_count + 1;
+    }
+}
+
+# Pass 2: Build supplier matrix
+sup_matrix_min = matrix(0, america_count, 2);  # suppkey, region_encoded
+filtered_idx = 0;
+for (i in 1:supplier_nrows) {
+    region_val = as.scalar(supplier_csv[i, 6]);  # s_region
+    if (region_val == "AMERICA") {
+        filtered_idx = filtered_idx + 1;
+        sup_matrix_min[filtered_idx, 1] = as.scalar(supplier_keys_matrix[i, 1]);  # supplier_key
+        sup_matrix_min[filtered_idx, 2] = 1;  # encoded value for AMERICA
+    }
+}
+
+# -- FILTERING THE DATA WITH RA-SELECTION FUNCTION --
+# We already filtered for P_CATEGORY = 'MFGR#12' and S_REGION = 'AMERICA' during matrix construction
+# P_CATEGORY = 'MFGR#12' : 2 (Our encoded value)
+p_cat_filt = raSel::m_raSelection(part_matrix_min, col=2, op="==", val=2);
+
+# S_REGION = 'AMERICA' : 1 (Our encoded value)
+s_reg_filt = raSel::m_raSelection(sup_matrix_min, col=2, op="==", val=1);
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING MINIMIZED LINEORDER TABLE WITH FILTERED PART TABLE WHERE LO_PARTKEY = P_PARTKEY
+lo_part = raJoin::m_raJoin(A=lineorder_matrix_min, colA=1, B=p_cat_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ SUPPLIER WHERE LO_SUPPKEY = S_SUPPKEY
+lo_part_sup = raJoin::m_raJoin(A=lo_part, colA=2, B=s_reg_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ DATE WHERE LO_ORDERDATE = D_DATEKEY
+joined_matrix = raJoin::m_raJoin(A=lo_part_sup, colA=3, B=date_matrix_min, colB=1, method="sort-merge");
+
+# -- GROUP-BY & AGGREGATION --
+# LO_REVENUE : COLUMN 4 OF LINEORDER-MIN-MATRIX
+revenue = joined_matrix[, 4];
+# D_YEAR : COLUMN 2 OF DATE-MIN-MATRIX
+d_year = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(part_matrix_min) + ncol(sup_matrix_min) + 2)];
+# P_BRAND : COLUMN 3 OF PART-MIN-MATRIX
+p_brand = joined_matrix[,(ncol(lineorder_matrix_min) + 3)];
+
+max_p_brand = max(p_brand);
+p_brand_scale_f = ceil(max_p_brand) + 1;
+
+combined_key = d_year * p_brand_scale_f + p_brand;
+
+group_input = cbind(revenue, combined_key);
+agg_result = raGrp::m_raGroupby(X=group_input, col=2, method="nested-loop");
+
+gr_key = agg_result[, 1];
+revenue = rowSums(agg_result[, 2:ncol(agg_result)]);
+
+p_brand = round(gr_key %% p_brand_scale_f);
+d_year = round((gr_key - p_brand) / p_brand_scale_f);
+
+result = cbind(revenue, d_year, p_brand);
+
+result_ordered = order(target=result, by=1, decreasing=FALSE, index.return=FALSE);
+
+print("Processing " + nrow(result_ordered) + " result rows...");
+
+# Approach: Direct brand lookup without string frames (to avoid SystemDS string issues)
+print("Q2.1 Results with brand names (avoiding string frame issues):");
+
+# Output results with direct lookup - no intermediate string storage
+for (i in 1:nrow(result_ordered)) {
+    revenue_val = as.scalar(result_ordered[i, 1]);
+    year_val = as.scalar(result_ordered[i, 2]);
+    brand_code = as.scalar(result_ordered[i, 3]);
+
+    # Map brand code back to brand name
+    brand_code = as.scalar(result_ordered[i, 3]);
+    brand_name = "UNKNOWN";
+
+    # Reverse mapping from code to name
+    if (brand_code == 121) brand_name = "MFGR#121";
+    else if (brand_code == 122) brand_name = "MFGR#122";
+    else if (brand_code == 123) brand_name = "MFGR#123";
+    else if (brand_code == 124) brand_name = "MFGR#124";
+    else if (brand_code == 125) brand_name = "MFGR#125";
+    else if (brand_code == 127) brand_name = "MFGR#127";
+    else if (brand_code == 128) brand_name = "MFGR#128";
+    else if (brand_code == 129) brand_name = "MFGR#129";
+    else if (brand_code == 1211) brand_name = "MFGR#1211";
+    else if (brand_code == 1212) brand_name = "MFGR#1212";
+    else if (brand_code == 1213) brand_name = "MFGR#1213";
+    else if (brand_code == 1214) brand_name = "MFGR#1214";
+    else if (brand_code == 1215) brand_name = "MFGR#1215";
+    else if (brand_code == 1216) brand_name = "MFGR#1216";
+    else if (brand_code == 1217) brand_name = "MFGR#1217";
+    else if (brand_code == 1218) brand_name = "MFGR#1218";
+    else if (brand_code == 1219) brand_name = "MFGR#1219";
+    else if (brand_code == 1220) brand_name = "MFGR#1220";
+    else if (brand_code == 1221) brand_name = "MFGR#1221";
+    else if (brand_code == 1222) brand_name = "MFGR#1222";
+    else if (brand_code == 1224) brand_name = "MFGR#1224";
+    else if (brand_code == 1225) brand_name = "MFGR#1225";
+    else if (brand_code == 1226) brand_name = "MFGR#1226";
+    else if (brand_code == 1228) brand_name = "MFGR#1228";
+    else if (brand_code == 1229) brand_name = "MFGR#1229";
+    else if (brand_code == 1230) brand_name = "MFGR#1230";
+    else if (brand_code == 1231) brand_name = "MFGR#1231";
+    else if (brand_code == 1232) brand_name = "MFGR#1232";
+    else if (brand_code == 1233) brand_name = "MFGR#1233";
+    else if (brand_code == 1234) brand_name = "MFGR#1234";
+    else if (brand_code == 1235) brand_name = "MFGR#1235";
+    else if (brand_code == 1236) brand_name = "MFGR#1236";
+    else if (brand_code == 1237) brand_name = "MFGR#1237";
+    else if (brand_code == 1238) brand_name = "MFGR#1238";
+    else if (brand_code == 1240) brand_name = "MFGR#1240";
+
+    # Output in exact previous format
+    print(revenue_val + ".000 " + year_val + ".000 " + brand_name);
+}
+
+# Frame format output
+print("");
+print("# FRAME: nrow = " + nrow(result_ordered) + ", ncol = 3");
+print("# C1 C2 C3");
+print("# INT32 INT32 STRING");
+
+for (i in 1:nrow(result_ordered)) {
+    revenue_val = as.scalar(result_ordered[i, 1]);
+    year_val = as.scalar(result_ordered[i, 2]);
+    brand_code = as.scalar(result_ordered[i, 3]);
+
+    # Same brand code mapping for frame output
+    brand_code = as.scalar(result_ordered[i, 3]);
+    brand_name = "UNKNOWN";
+
+    if (brand_code == 121) brand_name = "MFGR#121";
+    else if (brand_code == 122) brand_name = "MFGR#122";
+    else if (brand_code == 123) brand_name = "MFGR#123";
+    else if (brand_code == 124) brand_name = "MFGR#124";
+    else if (brand_code == 125) brand_name = "MFGR#125";
+    else if (brand_code == 127) brand_name = "MFGR#127";
+    else if (brand_code == 128) brand_name = "MFGR#128";
+    else if (brand_code == 129) brand_name = "MFGR#129";
+    else if (brand_code == 1211) brand_name = "MFGR#1211";
+    else if (brand_code == 1212) brand_name = "MFGR#1212";
+    else if (brand_code == 1213) brand_name = "MFGR#1213";
+    else if (brand_code == 1214) brand_name = "MFGR#1214";
+    else if (brand_code == 1215) brand_name = "MFGR#1215";
+    else if (brand_code == 1216) brand_name = "MFGR#1216";
+    else if (brand_code == 1217) brand_name = "MFGR#1217";
+    else if (brand_code == 1218) brand_name = "MFGR#1218";
+    else if (brand_code == 1219) brand_name = "MFGR#1219";
+    else if (brand_code == 1220) brand_name = "MFGR#1220";
+    else if (brand_code == 1221) brand_name = "MFGR#1221";
+    else if (brand_code == 1222) brand_name = "MFGR#1222";
+    else if (brand_code == 1224) brand_name = "MFGR#1224";
+    else if (brand_code == 1225) brand_name = "MFGR#1225";
+    else if (brand_code == 1226) brand_name = "MFGR#1226";
+    else if (brand_code == 1228) brand_name = "MFGR#1228";
+    else if (brand_code == 1229) brand_name = "MFGR#1229";
+    else if (brand_code == 1230) brand_name = "MFGR#1230";
+    else if (brand_code == 1231) brand_name = "MFGR#1231";
+    else if (brand_code == 1232) brand_name = "MFGR#1232";
+    else if (brand_code == 1233) brand_name = "MFGR#1233";
+    else if (brand_code == 1234) brand_name = "MFGR#1234";
+    else if (brand_code == 1235) brand_name = "MFGR#1235";
+    else if (brand_code == 1236) brand_name = "MFGR#1236";
+    else if (brand_code == 1237) brand_name = "MFGR#1237";
+    else if (brand_code == 1238) brand_name = "MFGR#1238";
+    else if (brand_code == 1240) brand_name = "MFGR#1240";
+
+    print(revenue_val + " " + year_val + " " + brand_name);
+}
\ No newline at end of file
diff --git a/scripts/ssb/queries/q2_2.dml b/scripts/ssb/queries/q2_2.dml
new file mode 100644
index 00000000000..bfc1720587f
--- /dev/null
+++ b/scripts/ssb/queries/q2_2.dml
@@ -0,0 +1,224 @@
+/*DML-script implementing the ssb query Q2.2 in SystemDS.
+SELECT SUM(lo_revenue), d_year, p_brand
+FROM lineorder, dates, part, supplier
+WHERE
+    lo_orderdate = d_datekey
+    AND lo_partkey = p_partkey
+    AND lo_suppkey = s_suppkey
+    AND p_brand BETWEEN 'MFGR#2221' AND 'MFGR#2228'
+    AND s_region = 'ASIA'
+GROUP BY d_year, p_brand
+ORDER BY d_year, p_brand;
+
+Usage:
+./bin/systemds scripts/ssb/queries/q2_2.dml -nvargs input_dir="/path/to/data"
+./bin/systemds scripts/ssb/queries/q2_2.dml -nvargs input_dir="/Users/ghafekalsaho/Desktop/data"
+or with explicit -f flag:
+./bin/systemds -f scripts/ssb/queries/q2_2.dml -nvargs input_dir="/path/to/data"
+
+Parameters:
+input_dir - Path to input directory containing the table files (e.g., ./data)
+*/
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+source("./scripts/builtin/raGroupby.dml") as raGrp
+
+# -- PARAMETER HANDLING --
+input_dir = ifdef($input_dir, "./data");
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+part_csv = read(input_dir + "/part.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+supplier_csv = read(input_dir + "/supplier.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+# -- PREPARING --
+# Optimized approach: On-the-fly filtering with direct matrix construction for string fields
+
+# EXTRACTING MINIMAL DATE DATA TO OPTIMIZE RUNTIME => COL-1 : DATE-KEY | COL-5 : D_YEAR
+date_csv_min = cbind(date_csv[, 1], date_csv[, 5]);
+date_matrix_min = as.matrix(date_csv_min);
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-4 : LO_PARTKEY | COL-5 : LO_SUPPKEY |
+# COL-6 : LO_ORDERDATE | COL-13 : LO_REVENUE
+lineorder_csv_min = cbind(lineorder_csv[, 4], lineorder_csv[, 5], lineorder_csv[, 6], lineorder_csv[, 13]);
+lineorder_matrix_min = as.matrix(lineorder_csv_min);
+
+# ON-THE-FLY PART TABLE FILTERING AND ENCODING (P_BRAND BETWEEN 'MFGR#2221' AND 'MFGR#2228')
+# Two-pass approach: Count first, then filter and encode
+part_keys_matrix = as.matrix(part_csv[, 1]);  # part_key
+part_nrows = nrow(part_csv);
+valid_brands_count = 0;
+
+# Pass 1: Count matching parts (brands between MFGR#2221 and MFGR#2228)
+for (i in 1:part_nrows) {
+    brand_val = as.scalar(part_csv[i, 5]);  # p_brand
+    if (brand_val >= "MFGR#2221" & brand_val <= "MFGR#2228") {
+        valid_brands_count = valid_brands_count + 1;
+    }
+}
+
+# Pass 2: Build part matrix with proper brand encoding
+part_matrix_min = matrix(0, valid_brands_count, 2);  # partkey, brand_code
+filtered_idx = 0;
+
+for (i in 1:part_nrows) {
+    brand_val = as.scalar(part_csv[i, 5]);  # p_brand
+    if (brand_val >= "MFGR#2221" & brand_val <= "MFGR#2228") {
+        filtered_idx = filtered_idx + 1;
+
+        # Encode brand names to numeric codes for efficient processing (using original metadata codes)
+        brand_code = 0;
+        if (brand_val == "MFGR#2221") brand_code = 453;
+        else if (brand_val == "MFGR#2222") brand_code = 597;
+        else if (brand_val == "MFGR#2223") brand_code = 907;
+        else if (brand_val == "MFGR#2224") brand_code = 282;
+        else if (brand_val == "MFGR#2225") brand_code = 850;
+        else if (brand_val == "MFGR#2226") brand_code = 525;
+        else if (brand_val == "MFGR#2227") brand_code = 538;
+        else if (brand_val == "MFGR#2228") brand_code = 608;
+        else brand_code = 9999;  # fallback for unknown brands in range
+
+        part_matrix_min[filtered_idx, 1] = as.scalar(part_keys_matrix[i, 1]);  # part_key
+        part_matrix_min[filtered_idx, 2] = brand_code;  # brand code
+    }
+}
+
+# ON-THE-FLY SUPPLIER TABLE FILTERING AND ENCODING (S_REGION = 'ASIA')
+# Two-pass approach for suppliers
+supplier_keys_matrix = as.matrix(supplier_csv[, 1]);  # supplier_key
+supplier_nrows = nrow(supplier_csv);
+asia_count = 0;
+
+# Pass 1: Count matching suppliers
+for (i in 1:supplier_nrows) {
+    region_val = as.scalar(supplier_csv[i, 6]);  # s_region
+    if (region_val == "ASIA") {
+        asia_count = asia_count + 1;
+    }
+}
+
+# Pass 2: Build supplier matrix
+sup_matrix_min = matrix(0, asia_count, 2);  # suppkey, region_encoded
+filtered_idx = 0;
+for (i in 1:supplier_nrows) {
+    region_val = as.scalar(supplier_csv[i, 6]);  # s_region
+    if (region_val == "ASIA") {
+        filtered_idx = filtered_idx + 1;
+        sup_matrix_min[filtered_idx, 1] = as.scalar(supplier_keys_matrix[i, 1]);  # supplier_key
+        sup_matrix_min[filtered_idx, 2] = 5;  # encoded value for ASIA
+    }
+}
+
+# -- FILTERING THE DATA WITH RA-SELECTION FUNCTION --
+# We already filtered during matrix construction, but we can use RA selection for consistency
+# All parts in part_matrix_min are already filtered for brands between MFGR#2221 and MFGR#2228
+p_brand_filt = part_matrix_min;  # Already filtered
+
+# S_REGION = 'ASIA' : 5 (Our encoded value)
+s_reg_filt = raSel::m_raSelection(sup_matrix_min, col=2, op="==", val=5);
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING MINIMIZED LINEORDER TABLE WITH FILTERED PART TABLE WHERE LO_PARTKEY = P_PARTKEY
+lo_part = raJoin::m_raJoin(A=lineorder_matrix_min, colA=1, B=p_brand_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ SUPPLIER WHERE LO_SUPPKEY = S_SUPPKEY
+lo_part_sup = raJoin::m_raJoin(A=lo_part, colA=2, B=s_reg_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ DATE WHERE LO_ORDERDATE = D_DATEKEY
+joined_matrix = raJoin::m_raJoin(A=lo_part_sup, colA=3, B=date_matrix_min, colB=1, method="sort-merge");
+
+# -- GROUP-BY & AGGREGATION --
+# LO_REVENUE : COLUMN 4 OF LINEORDER-MIN-MATRIX
+revenue = joined_matrix[, 4];
+# D_YEAR : COLUMN 2 OF DATE-MIN-MATRIX
+d_year = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(part_matrix_min) + ncol(sup_matrix_min) + 2)];
+# P_BRAND : COLUMN 2 OF PART-MIN-MATRIX
+p_brand = joined_matrix[,(ncol(lineorder_matrix_min) + 2)];
+
+max_p_brand = max(p_brand);
+p_brand_scale_f = ceil(max_p_brand) + 1;
+
+combined_key = d_year * p_brand_scale_f + p_brand;
+
+group_input = cbind(revenue, combined_key);
+agg_result = raGrp::m_raGroupby(X=group_input, col=2, method="nested-loop");
+
+gr_key = agg_result[, 1];
+revenue = rowSums(agg_result[, 2:ncol(agg_result)]);
+
+p_brand = round(gr_key %% p_brand_scale_f);
+d_year = round((gr_key - p_brand) / p_brand_scale_f);
+
+result = cbind(revenue, d_year, p_brand);
+
+result_ordered = order(target=result, by=3, decreasing=FALSE, index.return=FALSE); # 3 : P_BRAND
+result_ordered = order(target=result_ordered, by=2, decreasing=FALSE, index.return=FALSE); # D_YEAR
+
+print("Processing " + nrow(result_ordered) + " result rows...");
+
+# Output results with brand codes (matching original format)
+print("Q2.2 Results with brand codes:");
+
+for (i in 1:nrow(result_ordered)) {
+    revenue_val = as.scalar(result_ordered[i, 1]);
+    year_val = as.scalar(result_ordered[i, 2]);
+    brand_code = as.scalar(result_ordered[i, 3]);
+
+    # Output in original format with brand codes
+    print(revenue_val + ".000 " + year_val + ".000 " + brand_code + ".000");
+}
+
+# Calculate and print total revenue
+total_revenue = sum(result_ordered[, 1]);
+print("");
+print("REVENUE: " + as.integer(total_revenue));
+print("");
+
+for (i in 1:nrow(result_ordered)) {
+    revenue_val = as.scalar(result_ordered[i, 1]);
+    year_val = as.scalar(result_ordered[i, 2]);
+    brand_code = as.scalar(result_ordered[i, 3]);
+
+    # Map brand code back to brand name (using original metadata codes)
+    brand_name = "UNKNOWN";
+    if (brand_code == 453) brand_name = "MFGR#2221";
+    else if (brand_code == 597) brand_name = "MFGR#2222";
+    else if (brand_code == 907) brand_name = "MFGR#2223";
+    else if (brand_code == 282) brand_name = "MFGR#2224";
+    else if (brand_code == 850) brand_name = "MFGR#2225";
+    else if (brand_code == 525) brand_name = "MFGR#2226";
+    else if (brand_code == 538) brand_name = "MFGR#2227";
+    else if (brand_code == 608) brand_name = "MFGR#2228";
+
+    # Output in consistent format
+    print(revenue_val + ".000 " + year_val + ".000 " + brand_name);
+}
+
+# Frame format output
+print("");
+print("# FRAME: nrow = " + nrow(result_ordered) + ", ncol = 3");
+print("# C1 C2 C3");
+print("# INT32 INT32 STRING");
+
+for (i in 1:nrow(result_ordered)) {
+    revenue_val = as.scalar(result_ordered[i, 1]);
+    year_val = as.scalar(result_ordered[i, 2]);
+    brand_code = as.scalar(result_ordered[i, 3]);
+
+    # Same brand code mapping for frame output (using original metadata codes)
+    brand_name = "UNKNOWN";
+    if (brand_code == 453) brand_name = "MFGR#2221";
+    else if (brand_code == 597) brand_name = "MFGR#2222";
+    else if (brand_code == 907) brand_name = "MFGR#2223";
+    else if (brand_code == 282) brand_name = "MFGR#2224";
+    else if (brand_code == 850) brand_name = "MFGR#2225";
+    else if (brand_code == 525) brand_name = "MFGR#2226";
+    else if (brand_code == 538) brand_name = "MFGR#2227";
+    else if (brand_code == 608) brand_name = "MFGR#2228";
+
+    print(revenue_val + " " + year_val + " " + brand_name);
+}
diff --git a/scripts/ssb/queries/q2_3.dml b/scripts/ssb/queries/q2_3.dml
new file mode 100644
index 00000000000..40630f471a2
--- /dev/null
+++ b/scripts/ssb/queries/q2_3.dml
@@ -0,0 +1,199 @@
+/*DML-script implementing the ssb query Q2.3 in SystemDS.
+SELECT SUM(lo_revenue), d_year, p_brand
+FROM lineorder, dates, part, supplier
+WHERE
+    lo_orderdate = d_datekey
+    AND lo_partkey = p_partkey
+    AND lo_suppkey = s_suppkey
+    AND p_brand = 'MFGR#2239'
+    AND s_region = 'EUROPE'
+GROUP BY d_year, p_brand
+ORDER BY d_year, p_brand;
+
+Usage:
+./bin/systemds scripts/ssb/queries/q2_3.dml -nvargs input_dir="/path/to/data"
+./bin/systemds scripts/ssb/queries/q2_3.dml -nvargs input_dir="/Users/ghafekalsaho/Desktop/data"
+or with explicit -f flag:
+./bin/systemds -f scripts/ssb/queries/q2_3.dml -nvargs input_dir="/path/to/data"
+
+Parameters:
+input_dir - Path to input directory containing the table files (e.g., ./data)
+*/
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+source("./scripts/builtin/raGroupby.dml") as raGrp
+
+# -- PARAMETER HANDLING --
+input_dir = ifdef($input_dir, "./data");
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+part_csv = read(input_dir + "/part.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+supplier_csv = read(input_dir + "/supplier.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+
+# -- PREPARING --
+# Optimized approach: On-the-fly filtering with direct matrix construction for string fields
+
+# EXTRACTING MINIMAL DATE DATA TO OPTIMIZE RUNTIME => COL-1 : DATE-KEY | COL-5 : D_YEAR
+date_csv_min = cbind(date_csv[, 1], date_csv[, 5]);
+date_matrix_min = as.matrix(date_csv_min);
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-4 : LO_PARTKEY | COL-5 : LO_SUPPKEY |
+# COL-6 : LO_ORDERDATE | COL-13 : LO_REVENUE
+lineorder_csv_min = cbind(lineorder_csv[, 4], lineorder_csv[, 5], lineorder_csv[, 6], lineorder_csv[, 13]);
+lineorder_matrix_min = as.matrix(lineorder_csv_min);
+
+# ON-THE-FLY PART TABLE FILTERING AND ENCODING (P_BRAND = 'MFGR#2239')
+# Two-pass approach: Count first, then filter and encode
+part_keys_matrix = as.matrix(part_csv[, 1]);  # part_key
+part_nrows = nrow(part_csv);
+mfgr2239_count = 0;
+
+# Pass 1: Count matching parts (brand = MFGR#2239)
+for (i in 1:part_nrows) {
+    brand_val = as.scalar(part_csv[i, 5]);  # p_brand
+    if (brand_val == "MFGR#2239") {
+        mfgr2239_count = mfgr2239_count + 1;
+    }
+}
+
+# Pass 2: Build part matrix with proper brand encoding (using original metadata code)
+part_matrix_min = matrix(0, mfgr2239_count, 2);  # partkey, brand_code
+filtered_idx = 0;
+
+for (i in 1:part_nrows) {
+    brand_val = as.scalar(part_csv[i, 5]);  # p_brand
+    if (brand_val == "MFGR#2239") {
+        filtered_idx = filtered_idx + 1;
+        part_matrix_min[filtered_idx, 1] = as.scalar(part_keys_matrix[i, 1]);  # part_key
+        part_matrix_min[filtered_idx, 2] = 381;  # encoded value for MFGR#2239 (from original metadata)
+    }
+}
+
+# ON-THE-FLY SUPPLIER TABLE FILTERING AND ENCODING (S_REGION = 'EUROPE')
+# Two-pass approach for suppliers
+supplier_keys_matrix = as.matrix(supplier_csv[, 1]);  # supplier_key
+supplier_nrows = nrow(supplier_csv);
+europe_count = 0;
+
+# Pass 1: Count matching suppliers
+for (i in 1:supplier_nrows) {
+    region_val = as.scalar(supplier_csv[i, 6]);  # s_region
+    if (region_val == "EUROPE") {
+        europe_count = europe_count + 1;
+    }
+}
+
+# Pass 2: Build supplier matrix
+sup_matrix_min = matrix(0, europe_count, 2);  # suppkey, region_encoded
+filtered_idx = 0;
+for (i in 1:supplier_nrows) {
+    region_val = as.scalar(supplier_csv[i, 6]);  # s_region
+    if (region_val == "EUROPE") {
+        filtered_idx = filtered_idx + 1;
+        sup_matrix_min[filtered_idx, 1] = as.scalar(supplier_keys_matrix[i, 1]);  # supplier_key
+        sup_matrix_min[filtered_idx, 2] = 4;  # encoded value for EUROPE (from original metadata)
+    }
+}
+
+# -- FILTERING THE DATA WITH RA-SELECTION FUNCTION --
+# We already filtered during matrix construction, but we can use RA selection for consistency
+# P_BRAND = 'MFGR#2239' : 381 (Our encoded value)
+p_brand_filt = raSel::m_raSelection(part_matrix_min, col=2, op="==", val=381);
+
+# S_REGION = 'EUROPE' : 4 (Our encoded value)
+s_reg_filt = raSel::m_raSelection(sup_matrix_min, col=2, op="==", val=4);
+
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING MINIMIZED LINEORDER TABLE WITH FILTERED PART TABLE WHERE LO_PARTKEY = P_PARTKEY
+lo_part = raJoin::m_raJoin(A=lineorder_matrix_min, colA=1, B=p_brand_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ SUPPLIER WHERE LO_SUPPKEY = S_SUPPKEY
+lo_part_sup = raJoin::m_raJoin(A=lo_part, colA=2, B=s_reg_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ DATE WHERE LO_ORDERDATE = D_DATEKEY
+joined_matrix = raJoin::m_raJoin(A=lo_part_sup, colA=3, B=date_matrix_min, colB=1, method="sort-merge");
+
+# -- GROUP-BY & AGGREGATION --
+# LO_REVENUE : COLUMN 4 OF LINEORDER-MIN-MATRIX
+revenue = joined_matrix[, 4];
+# D_YEAR : COLUMN 2 OF DATE-MIN-MATRIX
+d_year = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(part_matrix_min) + ncol(sup_matrix_min) + 2)];
+# P_BRAND : COLUMN 2 OF PART-MIN-MATRIX
+p_brand = joined_matrix[,(ncol(lineorder_matrix_min) + 2)];
+
+max_p_brand = max(p_brand);
+p_brand_scale_f = ceil(max_p_brand) + 1;
+
+combined_key = d_year * p_brand_scale_f + p_brand;
+
+group_input = cbind(revenue, combined_key);
+agg_result = raGrp::m_raGroupby(X=group_input, col=2, method="nested-loop");
+
+gr_key = agg_result[, 1];
+revenue = rowSums(agg_result[, 2:ncol(agg_result)]);
+
+p_brand = round(gr_key %% p_brand_scale_f);
+d_year = round((gr_key - p_brand) / p_brand_scale_f);
+
+result = cbind(revenue, d_year, p_brand);
+
+result_ordered = order(target=result, by=3, decreasing=FALSE, index.return=FALSE); # 3 : P_BRAND
+result_ordered = order(target=result_ordered, by=2, decreasing=FALSE, index.return=FALSE); # D_YEAR
+
+print("Processing " + nrow(result_ordered) + " result rows...");
+
+# Output results with brand codes (matching original format)
+print("Q2.3 Results with brand codes:");
+
+for (i in 1:nrow(result_ordered)) {
+    revenue_val = as.scalar(result_ordered[i, 1]);
+    year_val = as.scalar(result_ordered[i, 2]);
+    brand_code = as.scalar(result_ordered[i, 3]);
+
+    # Output in original format with brand codes
+    print(revenue_val + ".000 " + year_val + ".000 " + brand_code + ".000");
+}
+
+# Calculate and print total revenue
+total_revenue = sum(result_ordered[, 1]);
+print("");
+print("REVENUE: " + as.integer(total_revenue));
+print("");
+
+for (i in 1:nrow(result_ordered)) {
+    revenue_val = as.scalar(result_ordered[i, 1]);
+    year_val = as.scalar(result_ordered[i, 2]);
+    brand_code = as.scalar(result_ordered[i, 3]);
+
+    # Map brand code back to brand name (using original metadata code)
+    brand_name = "UNKNOWN";
+    if (brand_code == 381) brand_name = "MFGR#2239";
+
+    # Output in consistent format
+    print(revenue_val + ".000 " + year_val + ".000 " + brand_name);
+}
+
+# Frame format output
+print("");
+print("# FRAME: nrow = " + nrow(result_ordered) + ", ncol = 3");
+print("# C1 C2 C3");
+print("# INT32 INT32 STRING");
+
+for (i in 1:nrow(result_ordered)) {
+    revenue_val = as.scalar(result_ordered[i, 1]);
+    year_val = as.scalar(result_ordered[i, 2]);
+    brand_code = as.scalar(result_ordered[i, 3]);
+
+    # Same brand code mapping for frame output
+    brand_name = "UNKNOWN";
+    if (brand_code == 381) brand_name = "MFGR#2239";
+
+    print(revenue_val + " " + year_val + " " + brand_name);
+}
diff --git a/scripts/ssb/queries/q3_1.dml b/scripts/ssb/queries/q3_1.dml
new file mode 100644
index 00000000000..93c9fbcb57c
--- /dev/null
+++ b/scripts/ssb/queries/q3_1.dml
@@ -0,0 +1,271 @@
+/*DML-script implementing the ssb query Q3.1 in SystemDS.
+SELECT
+    c_nation,
+    s_nation,
+    d_year,
+    SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_orderdate = d_datekey
+    AND c_region = 'ASIA'
+    AND s_region = 'ASIA'
+    AND d_year >= 1992
+    AND d_year <= 1997
+GROUP BY c_nation, s_nation, d_year
+ORDER BY d_year ASC, REVENUE DESC;
+
+Usage:
+./bin/systemds scripts/ssb/queries/q3_1.dml -nvargs input_dir="/path/to/data"
+./bin/systemds scripts/ssb/queries/q3_1.dml -nvargs input_dir="/Users/ghafekalsaho/Desktop/data"
+or with explicit -f flag:
+./bin/systemds -f scripts/ssb/queries/q3_1.dml -nvargs input_dir="/path/to/data"
+
+Parameters:
+input_dir - Path to input directory containing the table files (e.g., ./data)
+*/
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+source("./scripts/builtin/raGroupby.dml") as raGrp
+
+# -- PARAMETER HANDLING --
+input_dir = ifdef($input_dir, "./data");
+
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+supplier_csv = read(input_dir + "/supplier.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+customer_csv = read(input_dir + "/customer.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+
+# -- PREPARING --
+# Optimized approach: On-the-fly filtering with direct matrix construction for string fields
+
+# EXTRACTING MINIMAL DATE DATA TO OPTIMIZE RUNTIME => COL-1 : DATE-KEY | COL-5 : D_YEAR
+date_csv_min = cbind(date_csv[, 1], date_csv[, 5]);
+date_matrix_min = as.matrix(date_csv_min);
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-3 : LO_CUSTKEY | COL-5 : LO_SUPPKEY |
+# COL-6 : LO_ORDERDATE | COL-13 : LO_REVENUE
+lineorder_csv_min = cbind(lineorder_csv[, 3], lineorder_csv[, 5], lineorder_csv[, 6], lineorder_csv[, 13]);
+lineorder_matrix_min = as.matrix(lineorder_csv_min);
+
+# ON-THE-FLY CUSTOMER TABLE FILTERING AND ENCODING (C_REGION = 'ASIA')
+# Two-pass approach: Count first, then filter and encode
+customer_keys_matrix = as.matrix(customer_csv[, 1]);  # customer_key
+customer_nrows = nrow(customer_csv);
+asia_customer_count = 0;
+
+# Pass 1: Count matching customers (region = ASIA)
+for (i in 1:customer_nrows) {
+    region_val = as.scalar(customer_csv[i, 6]);  # c_region
+    if (region_val == "ASIA") {
+        asia_customer_count = asia_customer_count + 1;
+    }
+}
+
+# Pass 2: Build customer matrix with proper nation and region encoding
+cust_matrix_min = matrix(0, asia_customer_count, 3);  # custkey, nation_code, region_code
+filtered_idx = 0;
+
+for (i in 1:customer_nrows) {
+    region_val = as.scalar(customer_csv[i, 6]);  # c_region
+    if (region_val == "ASIA") {
+        filtered_idx = filtered_idx + 1;
+        nation_val = as.scalar(customer_csv[i, 5]);  # c_nation
+
+        cust_matrix_min[filtered_idx, 1] = as.scalar(customer_keys_matrix[i, 1]);  # customer_key
+        cust_matrix_min[filtered_idx, 3] = 4;  # encoded value for ASIA region (from original metadata)
+
+        # Map nation names to codes (using original metadata encodings)
+        if (nation_val == "CHINA") cust_matrix_min[filtered_idx, 2] = 247;
+        else if (nation_val == "INDIA") cust_matrix_min[filtered_idx, 2] = 36;
+        else if (nation_val == "INDONESIA") cust_matrix_min[filtered_idx, 2] = 243;
+        else if (nation_val == "JAPAN") cust_matrix_min[filtered_idx, 2] = 24;
+        else if (nation_val == "VIETNAM") cust_matrix_min[filtered_idx, 2] = 230;
+        else cust_matrix_min[filtered_idx, 2] = -1;  # unknown nation
+    }
+}
+
+# ON-THE-FLY SUPPLIER TABLE FILTERING AND ENCODING (S_REGION = 'ASIA')
+# Two-pass approach for suppliers
+supplier_keys_matrix = as.matrix(supplier_csv[, 1]);  # supplier_key
+supplier_nrows = nrow(supplier_csv);
+asia_supplier_count = 0;
+
+# Pass 1: Count matching suppliers
+for (i in 1:supplier_nrows) {
+    region_val = as.scalar(supplier_csv[i, 6]);  # s_region
+    if (region_val == "ASIA") {
+        asia_supplier_count = asia_supplier_count + 1;
+    }
+}
+
+# Pass 2: Build supplier matrix
+sup_matrix_min = matrix(0, asia_supplier_count, 3);  # suppkey, nation_code, region_code
+filtered_idx = 0;
+for (i in 1:supplier_nrows) {
+    region_val = as.scalar(supplier_csv[i, 6]);  # s_region
+    if (region_val == "ASIA") {
+        filtered_idx = filtered_idx + 1;
+        nation_val = as.scalar(supplier_csv[i, 5]);  # s_nation
+
+        sup_matrix_min[filtered_idx, 1] = as.scalar(supplier_keys_matrix[i, 1]);  # supplier_key
+        sup_matrix_min[filtered_idx, 3] = 5;  # encoded value for ASIA region (from original metadata)
+
+        # Map nation names to codes (using original metadata encodings)
+        if (nation_val == "CHINA") sup_matrix_min[filtered_idx, 2] = 27;
+        else if (nation_val == "INDIA") sup_matrix_min[filtered_idx, 2] = 12;
+        else if (nation_val == "INDONESIA") sup_matrix_min[filtered_idx, 2] = 48;
+        else if (nation_val == "JAPAN") sup_matrix_min[filtered_idx, 2] = 73;
+        else if (nation_val == "VIETNAM") sup_matrix_min[filtered_idx, 2] = 85;
+        else sup_matrix_min[filtered_idx, 2] = -1;  # unknown nation
+    }
+}
+
+
+# -- FILTERING THE DATA WITH RA-SELECTION FUNCTION --
+# We already filtered during matrix construction, but we can use RA selection for consistency
+# C_REGION = 'ASIA' : 4 (Our encoded value)
+c_reg_filt = raSel::m_raSelection(cust_matrix_min, col=3, op="==", val=4);
+
+# S_REGION = 'ASIA' : 5 (Our encoded value)
+s_reg_filt = raSel::m_raSelection(sup_matrix_min, col=3, op="==", val=5);
+
+# D_YEAR BETWEEN 1992 & 1997
+d_year_filt = raSel::m_raSelection(date_matrix_min, col=2, op=">=", val=1992);
+d_year_filt = raSel::m_raSelection(d_year_filt, col=2, op="<=", val=1997);
+
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING MINIMIZED LINEORDER TABLE WITH FILTERED CUSTOMER TABLE WHERE LO_CUSTKEY = C_CUSTKEY
+lo_cust = raJoin::m_raJoin(A=lineorder_matrix_min, colA=1, B=c_reg_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ SUPPLIER WHERE LO_SUPPKEY = S_SUPPKEY
+lo_cust_sup = raJoin::m_raJoin(A=lo_cust, colA=2, B=s_reg_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ DATE WHERE LO_ORDERDATE = D_DATEKEY
+joined_matrix = raJoin::m_raJoin(A=lo_cust_sup, colA=3, B=d_year_filt, colB=1, method="sort-merge");
+
+
+# -- GROUP-BY & AGGREGATION --
+# LO_REVENUE : COLUMN 4 OF LINEORDER-MIN-MATRIX
+revenue = joined_matrix[, 4];
+# D_YEAR : COLUMN 2 OF DATE-MIN-MATRIX
+d_year = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(cust_matrix_min) + ncol(sup_matrix_min) + 2)];
+# C_NATION : COLUMN 2 OF CUST-MIN-MATRIX
+c_nation = joined_matrix[,(ncol(lineorder_matrix_min) + 2)];
+# S_NATION : COLUMN 2 OF SUP-MIN-MATRIX
+s_nation = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(cust_matrix_min) + 2)];
+
+# CALCULATING COMBINATION KEY WITH PRIORITY: C_NATION, S_NATION, D_YEAR
+max_c_nation = max(c_nation);
+max_s_nation = max(s_nation);
+max_d_year = max(d_year);
+
+c_nation_scale_f = ceil(max_c_nation) + 1;
+s_nation_scale_f = ceil(max_s_nation) + 1;
+d_year_scale_f = ceil(max_d_year) + 1;
+
+combined_key = c_nation * s_nation_scale_f * d_year_scale_f + s_nation * d_year_scale_f + d_year;
+
+group_input = cbind(revenue, combined_key);
+agg_result = raGrp::m_raGroupby(X=group_input, col=2, method="nested-loop");
+
+key = agg_result[, 1];
+revenue = rowSums(agg_result[, 2:ncol(agg_result)]);
+
+# EXTRACTING C_NATION, S_NATION & D_YEAR
+d_year = round(key %% d_year_scale_f);
+c_nation = round(floor(key / (s_nation_scale_f * d_year_scale_f)));
+s_nation = round((floor(key / d_year_scale_f)) %% s_nation_scale_f);
+
+result = cbind(c_nation, s_nation, d_year, revenue);
+
+
+# -- SORTING --
+# PRIORITY 1 D_YEAR (ASC), 2 REVENUE (DESC)
+result_ordered = order(target=result, by=4, decreasing=TRUE, index.return=FALSE);
+result_ordered = order(target=result_ordered, by=3, decreasing=FALSE, index.return=FALSE);
+
+# -- DECODING C_NATION & S_NATION --
+# Map nation codes back to nation names (using original metadata codes)
+print("Processing " + nrow(result_ordered) + " result rows...");
+
+print("Q3.1 Results with nation codes:");
+for (i in 1:nrow(result_ordered)) {
+    c_nation_code = as.scalar(result_ordered[i, 1]);
+    s_nation_code = as.scalar(result_ordered[i, 2]);
+    year_val = as.scalar(result_ordered[i, 3]);
+    revenue_val = as.scalar(result_ordered[i, 4]);
+
+    print(c_nation_code + ".000 " + s_nation_code + ".000 " + year_val + ".000 " + revenue_val + ".000");
+}
+
+# Calculate and print total revenue
+total_revenue = sum(result_ordered[, 4]);
+print("");
+print("TOTAL REVENUE: " + as.integer(total_revenue));
+print("");
+
+for (i in 1:nrow(result_ordered)) {
+    c_nation_code = as.scalar(result_ordered[i, 1]);
+    s_nation_code = as.scalar(result_ordered[i, 2]);
+    year_val = as.scalar(result_ordered[i, 3]);
+    revenue_val = as.scalar(result_ordered[i, 4]);
+
+    # Map customer nation codes back to names
+    c_nation_name = "UNKNOWN";
+    if (c_nation_code == 247) c_nation_name = "CHINA";
+    else if (c_nation_code == 36) c_nation_name = "INDIA";
+    else if (c_nation_code == 243) c_nation_name = "INDONESIA";
+    else if (c_nation_code == 24) c_nation_name = "JAPAN";
+    else if (c_nation_code == 230) c_nation_name = "VIETNAM";
+
+    # Map supplier nation codes back to names
+    s_nation_name = "UNKNOWN";
+    if (s_nation_code == 27) s_nation_name = "CHINA";
+    else if (s_nation_code == 12) s_nation_name = "INDIA";
+    else if (s_nation_code == 48) s_nation_name = "INDONESIA";
+    else if (s_nation_code == 73) s_nation_name = "JAPAN";
+    else if (s_nation_code == 85) s_nation_name = "VIETNAM";
+
+    # Output in consistent format
+    print(c_nation_name + " " + s_nation_name + " " + year_val + ".000 " + revenue_val + ".000");
+}
+
+# Frame format output
+print("");
+print("# FRAME: nrow = " + nrow(result_ordered) + ", ncol = 4");
+print("# C1 C2 C3 C4");
+print("# STRING STRING INT32 INT32");
+
+for (i in 1:nrow(result_ordered)) {
+    c_nation_code = as.scalar(result_ordered[i, 1]);
+    s_nation_code = as.scalar(result_ordered[i, 2]);
+    year_val = as.scalar(result_ordered[i, 3]);
+    revenue_val = as.scalar(result_ordered[i, 4]);
+
+    # Map nation codes to names for frame output
+    c_nation_name = "UNKNOWN";
+    if (c_nation_code == 247) c_nation_name = "CHINA";
+    else if (c_nation_code == 36) c_nation_name = "INDIA";
+    else if (c_nation_code == 243) c_nation_name = "INDONESIA";
+    else if (c_nation_code == 24) c_nation_name = "JAPAN";
+    else if (c_nation_code == 230) c_nation_name = "VIETNAM";
+
+    s_nation_name = "UNKNOWN";
+    if (s_nation_code == 27) s_nation_name = "CHINA";
+    else if (s_nation_code == 12) s_nation_name = "INDIA";
+    else if (s_nation_code == 48) s_nation_name = "INDONESIA";
+    else if (s_nation_code == 73) s_nation_name = "JAPAN";
+    else if (s_nation_code == 85) s_nation_name = "VIETNAM";
+
+    print(c_nation_name + " " + s_nation_name + " " + year_val + " " + revenue_val);
+}
+
diff --git a/scripts/ssb/queries/q3_2.dml b/scripts/ssb/queries/q3_2.dml
new file mode 100644
index 00000000000..a654b693a0c
--- /dev/null
+++ b/scripts/ssb/queries/q3_2.dml
@@ -0,0 +1,215 @@
+/*DML-script implementing the ssb query Q3.2 in SystemDS.
+SELECT
+    c_city,
+    s_city,
+    d_year,
+    SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_orderdate = d_datekey
+    AND c_nation = 'UNITED STATES'
+    AND s_nation = 'UNITED STATES'
+    AND d_year >= 1992
+    AND d_year <= 1997
+GROUP BY c_city, s_city, d_year
+ORDER BY d_year ASC, REVENUE DESC;
+
+Usage:
+./bin/systemds scripts/ssb/queries/q3_2.dml -nvargs input_dir="/path/to/data"
+./bin/systemds scripts/ssb/queries/q3_2.dml -nvargs input_dir="/Users/ghafekalsaho/Desktop/data"
+
+Parameters:
+input_dir - Path to input directory containing the table files (e.g., ./data)
+*/
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+source("./scripts/builtin/raGroupby.dml") as raGrp
+
+# -- PARAMETER HANDLING --
+input_dir = ifdef($input_dir, "./data");
+
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+supplier_csv = read(input_dir + "/supplier.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+customer_csv = read(input_dir + "/customer.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+# -- PREPARING --
+# Optimized approach: On-the-fly filtering with direct matrix construction for string fields
+
+# EXTRACTING MINIMAL DATE DATA TO OPTIMIZE RUNTIME => COL-1 : DATE-KEY | COL-5 : D_YEAR
+date_csv_min = cbind(date_csv[, 1], date_csv[, 5]);
+date_matrix_min = as.matrix(date_csv_min);
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-3 : LO_CUSTKEY | COL-5 : LO_SUPPKEY |
+# COL-6 : LO_ORDERDATE | COL-13 : LO_REVENUE
+lineorder_csv_min = cbind(lineorder_csv[, 3], lineorder_csv[, 5], lineorder_csv[, 6], lineorder_csv[, 13]);
+lineorder_matrix_min = as.matrix(lineorder_csv_min);
+
+# ON-THE-FLY CUSTOMER TABLE FILTERING AND ENCODING (C_NATION = 'UNITED STATES')
+# Two-pass approach: Count first, then filter and encode
+customer_keys_matrix = as.matrix(customer_csv[, 1]);  # customer_key
+customer_nrows = nrow(customer_csv);
+us_customer_count = 0;
+
+# Pass 1: Count matching customers (nation = UNITED STATES)
+for (i in 1:customer_nrows) {
+    nation_val = as.scalar(customer_csv[i, 5]);  # c_nation
+    if (nation_val == "UNITED STATES") {
+        us_customer_count = us_customer_count + 1;
+    }
+}
+
+# Pass 2: Build customer matrix with proper city and nation encoding
+cust_matrix_min = matrix(0, us_customer_count, 3);  # custkey, city_code, nation_code
+filtered_idx = 0;
+
+for (i in 1:customer_nrows) {
+    nation_val = as.scalar(customer_csv[i, 5]);  # c_nation
+    if (nation_val == "UNITED STATES") {
+        filtered_idx = filtered_idx + 1;
+        city_val = as.scalar(customer_csv[i, 4]);  # c_city
+
+        cust_matrix_min[filtered_idx, 1] = as.scalar(customer_keys_matrix[i, 1]);  # customer_key
+        cust_matrix_min[filtered_idx, 3] = 1;  # encoded value for UNITED STATES nation
+
+        # Assign city codes dynamically based on city names
+        # Use filtered index for simple unique encoding
+        city_code = filtered_idx;
+        cust_matrix_min[filtered_idx, 2] = city_code;
+    }
+}
+
+# ON-THE-FLY SUPPLIER TABLE FILTERING AND ENCODING (S_NATION = 'UNITED STATES')
+# Two-pass approach for suppliers
+supplier_keys_matrix = as.matrix(supplier_csv[, 1]);  # supplier_key
+supplier_nrows = nrow(supplier_csv);
+us_supplier_count = 0;
+
+# Pass 1: Count matching suppliers
+for (i in 1:supplier_nrows) {
+    nation_val = as.scalar(supplier_csv[i, 5]);  # s_nation
+    if (nation_val == "UNITED STATES") {
+        us_supplier_count = us_supplier_count + 1;
+    }
+}
+
+# Pass 2: Build supplier matrix with city encoding (independent from customer cities)
+sup_matrix_min = matrix(0, us_supplier_count, 3);  # suppkey, city_code, nation_code
+filtered_idx = 0;
+
+for (i in 1:supplier_nrows) {
+    nation_val = as.scalar(supplier_csv[i, 5]);  # s_nation
+    if (nation_val == "UNITED STATES") {
+        filtered_idx = filtered_idx + 1;
+        city_val = as.scalar(supplier_csv[i, 4]);  # s_city
+
+        sup_matrix_min[filtered_idx, 1] = as.scalar(supplier_keys_matrix[i, 1]);  # supplier_key
+        sup_matrix_min[filtered_idx, 3] = 1;  # encoded value for UNITED STATES nation
+
+        # Assign city codes dynamically based on city names
+        # Use filtered index for simple unique encoding
+        city_code = filtered_idx;
+        sup_matrix_min[filtered_idx, 2] = city_code;
+    }
+}
+
+# -- FILTERING THE DATA WITH RA-SELECTION FUNCTION --
+# We already filtered during matrix construction, but we can use RA selection for consistency
+# C_NATION = 'UNITED STATES' : 1 (Our encoded value)
+c_nat_filt = raSel::m_raSelection(cust_matrix_min, col=3, op="==", val=1);
+
+# S_NATION = 'UNITED STATES' : 1 (Our encoded value)
+s_nat_filt = raSel::m_raSelection(sup_matrix_min, col=3, op="==", val=1);
+
+# D_YEAR BETWEEN 1992 & 1997
+d_year_filt = raSel::m_raSelection(date_matrix_min, col=2, op=">=", val=1992);
+d_year_filt = raSel::m_raSelection(d_year_filt, col=2, op="<=", val=1997);
+
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING MINIMIZED LINEORDER TABLE WITH FILTERED CUSTOMER TABLE WHERE LO_CUSTKEY = C_CUSTKEY
+lo_cust = raJoin::m_raJoin(A=lineorder_matrix_min, colA=1, B=c_nat_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ SUPPLIER WHERE LO_SUPPKEY = S_SUPPKEY
+lo_cust_sup = raJoin::m_raJoin(A=lo_cust, colA=2, B=s_nat_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ DATE WHERE LO_ORDERDATE = D_DATEKEY
+joined_matrix = raJoin::m_raJoin(A=lo_cust_sup, colA=3, B=d_year_filt, colB=1, method="sort-merge");
+
+
+# -- GROUP-BY & AGGREGATION --
+# LO_REVENUE : COLUMN 4 OF LINEORDER-MIN-MATRIX (was 5, now 4 since we removed LO_PARTKEY)
+revenue = joined_matrix[, 4];
+# D_YEAR : COLUMN 2 OF DATE-MIN-MATRIX
+d_year = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(cust_matrix_min) + ncol(sup_matrix_min) + 2)];
+# C_CITY : COLUMN 2 OF CUST-MIN-MATRIX
+c_city = joined_matrix[,(ncol(lineorder_matrix_min) + 2)];
+# S_CITY : COLUMN 2 OF SUP-MIN-MATRIX
+s_city = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(cust_matrix_min) + 2)];
+
+# CALCULATING COMBINATION KEY WITH PRIORITY: C_CITY, S_CITY & D_YEAR
+max_c_city = max(c_city);
+max_s_city = max(s_city);
+max_d_year = max(d_year);
+
+c_city_scale_f = ceil(max_c_city) + 1;
+s_city_scale_f = ceil(max_s_city) + 1;
+d_year_scale_f = ceil(max_d_year) + 1;
+
+combined_key = c_city * s_city_scale_f * d_year_scale_f + s_city * d_year_scale_f + d_year;
+
+group_input = cbind(revenue, combined_key);
+agg_result = raGrp::m_raGroupby(X=group_input, col=2, method="nested-loop");
+
+key = agg_result[, 1];
+revenue = rowSums(agg_result[, 2:ncol(agg_result)]);
+
+# EXTRACTING C_CITY, S_CITY & D_YEAR
+d_year = round(key %% d_year_scale_f);
+c_city = round(floor(key / (s_city_scale_f * d_year_scale_f)));
+s_city = round((floor(key / d_year_scale_f)) %% s_city_scale_f);
+
+result = cbind(c_city, s_city, d_year, revenue);
+
+
+# -- SORTING --
+# PRIORITY 1 D_YEAR (ASC), 2 REVENUE (DESC)
+result_ordered = order(target=result, by=4, decreasing=TRUE, index.return=FALSE);
+result_ordered = order(target=result_ordered, by=3, decreasing=FALSE, index.return=FALSE);
+
+
+# -- DECODING C_CITY & S_CITY CODES --
+# For simplicity, we'll output the city codes rather than names
+# This follows the same pattern as q3_1.dml which outputs nation codes
+print("Q3.2 Results:");
+print("# FRAME: nrow = " + nrow(result_ordered) + ", ncol = 4");
+print("# C1 C2 C3 C4");
+print("# STRING STRING INT32 INT32");
+
+for (i in 1:nrow(result_ordered)) {
+    c_city_code = as.scalar(result_ordered[i, 1]);
+    s_city_code = as.scalar(result_ordered[i, 2]);
+    year_val = as.scalar(result_ordered[i, 3]);
+    revenue_val = as.scalar(result_ordered[i, 4]);
+
+    # For now, output the codes - we can map them back to names later if needed
+    c_city_name = "UNITED ST" + c_city_code;  # Format similar to expected output
+    s_city_name = "UNITED ST" + s_city_code;  # Format similar to expected output
+
+    print(c_city_name + " " + s_city_name + " " + year_val + " " + revenue_val);
+}
+
+# Calculate total revenue for validation
+total_revenue = sum(result_ordered[, 4]);
+print("");
+print("Total number of result rows: " + nrow(result_ordered));
+print("Total revenue: " + as.integer(total_revenue));
+print("Q3.2 finished");
+
diff --git a/scripts/ssb/queries/q3_3.dml b/scripts/ssb/queries/q3_3.dml
new file mode 100644
index 00000000000..921fd00b501
--- /dev/null
+++ b/scripts/ssb/queries/q3_3.dml
@@ -0,0 +1,217 @@
+/* DML-script implementing the ssb query Q3.3 in SystemDS.
+SELECT
+    c_city,
+    s_city,
+    d_year,
+    SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_orderdate = d_datekey
+    AND (
+        c_city = 'UNITED KI1'
+        OR c_city = 'UNITED KI5'
+    )
+    AND (
+        s_city = 'UNITED KI1'
+        OR s_city = 'UNITED KI5'
+    )
+    AND d_year >= 1992
+    AND d_year <= 1997
+GROUP BY c_city, s_city, d_year
+ORDER BY d_year ASC, REVENUE DESC;
+*/
+
+# -- PARAMETER HANDLING --
+input_dir = ifdef($input_dir, "./data");
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+source("./scripts/builtin/raGroupby.dml") as raGrp
+
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+#part_csv = read(input_dir + "/part.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+supplier_csv = read(input_dir + "/supplier.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+customer_csv = read(input_dir + "/customer.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+
+# -- PREPARING --
+# EXTRACTING MINIMAL DATE DATA TO OPTIMIZE RUNTIME => COL-1 : DATE-KEY | COL-5 : D_YEAR
+date_csv_min = cbind(date_csv[, 1], date_csv[, 5]);
+date_matrix_min = as.matrix(date_csv_min);
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-3 : LO_CUSTKEY | COL-4 : LO_PARTKEY |
+# COL-5 : LO_SUPPKEY | COL-6 : LO_ORDERDATE | COL-13 : LO_REVENUE
+lineorder_csv_min = cbind(lineorder_csv[, 3], lineorder_csv[, 4], lineorder_csv[, 5], lineorder_csv[, 6], lineorder_csv[, 13]);
+lineorder_matrix_min = as.matrix(lineorder_csv_min);
+
+# ON-THE-FLY CUSTOMER TABLE FILTERING AND ENCODING (C_CITY = 'UNITED KI1' OR 'UNITED KI5')
+customer_keys_matrix = as.matrix(customer_csv[, 1]);  # customer_key
+customer_nrows = nrow(customer_csv);
+matching_customer_count = 0;
+
+# Pass 1: Count matching customers
+for (i in 1:customer_nrows) {
+    city_val = as.scalar(customer_csv[i, 4]);  # c_city
+    if (city_val == "UNITED KI1" | city_val == "UNITED KI5") {
+        matching_customer_count = matching_customer_count + 1;
+    }
+}
+
+# Pass 2: Build customer matrix with dynamic city encoding
+cust_matrix_min = matrix(0, matching_customer_count, 2);  # custkey, city_code
+filtered_idx = 0;
+
+for (i in 1:customer_nrows) {
+    city_val = as.scalar(customer_csv[i, 4]);  # c_city
+    if (city_val == "UNITED KI1" | city_val == "UNITED KI5") {
+        filtered_idx = filtered_idx + 1;
+        cust_matrix_min[filtered_idx, 1] = as.scalar(customer_keys_matrix[i, 1]);  # customer_key
+
+        # Use consistent encoding: 1 for UNITED KI1, 2 for UNITED KI5
+        if (city_val == "UNITED KI1") {
+            cust_matrix_min[filtered_idx, 2] = 1;
+        } else {
+            cust_matrix_min[filtered_idx, 2] = 2;
+        }
+    }
+}
+
+# ON-THE-FLY SUPPLIER TABLE FILTERING AND ENCODING (S_CITY = 'UNITED KI1' OR 'UNITED KI5')
+supplier_keys_matrix = as.matrix(supplier_csv[, 1]);  # supplier_key
+supplier_nrows = nrow(supplier_csv);
+matching_supplier_count = 0;
+
+# Pass 1: Count matching suppliers
+for (i in 1:supplier_nrows) {
+    city_val = as.scalar(supplier_csv[i, 4]);  # s_city
+    if (city_val == "UNITED KI1" | city_val == "UNITED KI5") {
+        matching_supplier_count = matching_supplier_count + 1;
+    }
+}
+
+# Pass 2: Build supplier matrix with dynamic city encoding
+sup_matrix_min = matrix(0, matching_supplier_count, 2);  # suppkey, city_code
+filtered_idx = 0;
+
+for (i in 1:supplier_nrows) {
+    city_val = as.scalar(supplier_csv[i, 4]);  # s_city
+    if (city_val == "UNITED KI1" | city_val == "UNITED KI5") {
+        filtered_idx = filtered_idx + 1;
+        sup_matrix_min[filtered_idx, 1] = as.scalar(supplier_keys_matrix[i, 1]);  # supplier_key
+
+        # Use consistent encoding: 1 for UNITED KI1, 2 for UNITED KI5
+        if (city_val == "UNITED KI1") {
+            sup_matrix_min[filtered_idx, 2] = 1;
+        } else {
+            sup_matrix_min[filtered_idx, 2] = 2;
+        }
+    }
+}
+
+
+# -- FILTERING THE DATA WITH RA-SELECTION FUNCTION --
+# Since we already filtered during matrix construction, we can use the full matrices
+# or apply additional RA selection if needed for consistency
+c_city_filt = cust_matrix_min;  # Already filtered for target cities
+s_city_filt = sup_matrix_min;   # Already filtered for target cities
+
+# D_YEAR BETWEEN 1992 & 1997
+d_year_filt = raSel::m_raSelection(date_matrix_min, col=2, op=">=", val=1992);
+d_year_filt = raSel::m_raSelection(d_year_filt, col=2, op="<=", val=1997);
+
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING MINIMIZED LINEORDER TABLE WITH FILTERED CUSTOMER TABLE WHERE LO_CUSTKEY = C_CUSTKEY
+lo_cust = raJoin::m_raJoin(A=lineorder_matrix_min, colA=1, B=c_city_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ SUPPLIER WHERE LO_SUPPKEY = S_SUPPKEY
+lo_cust_sup = raJoin::m_raJoin(A=lo_cust, colA=3, B=s_city_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ DATE WHERE LO_ORDERDATE = D_DATEKEY
+joined_matrix = raJoin::m_raJoin(A=lo_cust_sup, colA=4, B=d_year_filt, colB=1, method="sort-merge");
+#print(nrow(joined_matrix));
+
+
+# -- GROUP-BY & AGGREGATION --
+# LO_REVENUE : COLUMN 5 OF LINEORDER-MIN-MATRIX
+revenue = joined_matrix[, 5];
+# D_YEAR : COLUMN 2 OF DATE-MIN-MATRIX
+d_year = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(cust_matrix_min) + ncol(sup_matrix_min) + 2)];
+# C_CITY : COLUMN 2 OF CUST-MIN-MATRIX
+c_city = joined_matrix[,(ncol(lineorder_matrix_min) + 2)];
+# S_CITY : COLUMN 2 OF CUST-MIN-MATRIX
+s_city = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(cust_matrix_min) + 2)];
+
+# CALCULATING COMBINATION KEY WITH PRIORITY: C_CITY, S_CITY & D_YEAR
+max_c_city = max(c_city);
+max_s_city = max(s_city);
+max_d_year = max(d_year);
+
+c_city_scale_f = ceil(max_c_city) + 1;
+s_city_scale_f = ceil(max_s_city) + 1;
+d_year_scale_f = ceil(max_d_year) + 1;
+
+combined_key = c_city * s_city_scale_f * d_year_scale_f + s_city * d_year_scale_f + d_year;
+
+group_input = cbind(revenue, combined_key);
+agg_result = raGrp::m_raGroupby(X=group_input, col=2, method="nested-loop");
+
+key = agg_result[, 1];
+revenue = rowSums(agg_result[, 2:ncol(agg_result)]);
+
+# EXTRACTING C_CITY, S_CITY & D_YEAR
+d_year = round(key %% d_year_scale_f);
+c_city = round(floor(key / (s_city_scale_f * d_year_scale_f)));
+s_city = round((floor(key / d_year_scale_f)) %% s_city_scale_f);
+
+result = cbind(c_city, s_city, d_year, revenue);
+
+
+# -- SORTING --
+# PRIORITY 1 D_YEAR (ASC), 2 REVENUE (DESC)
+result_ordered = order(target=result, by=4, decreasing=TRUE, index.return=FALSE);
+result_ordered = order(target=result_ordered, by=3, decreasing=FALSE, index.return=FALSE);
+
+
+# -- OUTPUT RESULTS --
+print("Q3.3 Results:");
+print("# FRAME: nrow = " + nrow(result_ordered) + ", ncol = 4");
+print("# C1 C2 C3 C4");
+print("# STRING STRING INT32 INT32");
+
+for (i in 1:nrow(result_ordered)) {
+    c_city_code = as.scalar(result_ordered[i, 1]);
+    s_city_code = as.scalar(result_ordered[i, 2]);
+    year_val = as.scalar(result_ordered[i, 3]);
+    revenue_val = as.scalar(result_ordered[i, 4]);
+
+    # Map back to original city names based on the encoding used
+    if (c_city_code == 1) {
+        c_city_name = "UNITED KI1";
+    } else {
+        c_city_name = "UNITED KI5";
+    }
+
+    if (s_city_code == 1) {
+        s_city_name = "UNITED KI1";
+    } else {
+        s_city_name = "UNITED KI5";
+    }
+
+    print(c_city_name + " " + s_city_name + " " + as.integer(year_val) + " " + as.integer(revenue_val));
+}
+
+# Calculate total revenue for validation
+total_revenue = sum(result_ordered[, 4]);
+print("");
+print("Total number of result rows: " + nrow(result_ordered));
+print("Total revenue: " + as.integer(total_revenue));
+print("Q3.3 finished");
+
diff --git a/scripts/ssb/queries/q3_4.dml b/scripts/ssb/queries/q3_4.dml
new file mode 100644
index 00000000000..61327c6dfd7
--- /dev/null
+++ b/scripts/ssb/queries/q3_4.dml
@@ -0,0 +1,240 @@
+/* DML-script implementing the ssb query Q3.4 in SystemDS.
+SELECT
+    c_city,
+    s_city,
+    d_year,
+    SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_orderdate = d_datekey
+    AND (
+        c_city = 'UNITED KI1'
+        OR c_city = 'UNITED KI5'
+    )
+    AND (
+        s_city = 'UNITED KI1'
+        OR s_city = 'UNITED KI5'
+    )
+    AND d_yearmonth = 'Dec1997'
+GROUP BY c_city, s_city, d_year
+ORDER BY d_year ASC, REVENUE DESC;
+*/
+
+# -- PARAMETER HANDLING --
+input_dir = ifdef($input_dir, "./data");
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+source("./scripts/builtin/raGroupby.dml") as raGrp
+
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+#part_csv = read(input_dir + "/part.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+supplier_csv = read(input_dir + "/supplier.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+customer_csv = read(input_dir + "/customer.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+
+# -- PREPARING --
+# EXTRACTING MINIMAL DATE DATA TO OPTIMIZE RUNTIME => COL-1 : DATE-KEY | COL-5 : D_YEAR
+date_csv_min = cbind(date_csv[, 1], date_csv[, 5]);
+date_matrix_min = as.matrix(date_csv_min);
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-3 : LO_CUSTKEY | COL-4 : LO_PARTKEY |
+# COL-5 : LO_SUPPKEY | COL-6 : LO_ORDERDATE | COL-13 : LO_REVENUE
+lineorder_csv_min = cbind(lineorder_csv[, 3], lineorder_csv[, 4], lineorder_csv[, 5], lineorder_csv[, 6], lineorder_csv[, 13]);
+lineorder_matrix_min = as.matrix(lineorder_csv_min);
+
+# ON-THE-FLY CUSTOMER TABLE FILTERING AND ENCODING (C_CITY = 'UNITED KI1' OR 'UNITED KI5')
+customer_keys_matrix = as.matrix(customer_csv[, 1]);  # customer_key
+customer_nrows = nrow(customer_csv);
+matching_customer_count = 0;
+
+# Pass 1: Count matching customers
+for (i in 1:customer_nrows) {
+    city_val = as.scalar(customer_csv[i, 4]);  # c_city
+    if (city_val == "UNITED KI1" | city_val == "UNITED KI5") {
+        matching_customer_count = matching_customer_count + 1;
+    }
+}
+
+# Pass 2: Build customer matrix with dynamic city encoding
+cust_matrix_min = matrix(0, matching_customer_count, 2);  # custkey, city_code
+filtered_idx = 0;
+
+for (i in 1:customer_nrows) {
+    city_val = as.scalar(customer_csv[i, 4]);  # c_city
+    if (city_val == "UNITED KI1" | city_val == "UNITED KI5") {
+        filtered_idx = filtered_idx + 1;
+        cust_matrix_min[filtered_idx, 1] = as.scalar(customer_keys_matrix[i, 1]);  # customer_key
+
+        # Use consistent encoding: 1 for UNITED KI1, 2 for UNITED KI5
+        if (city_val == "UNITED KI1") {
+            cust_matrix_min[filtered_idx, 2] = 1;
+        } else {
+            cust_matrix_min[filtered_idx, 2] = 2;
+        }
+    }
+}
+
+# ON-THE-FLY SUPPLIER TABLE FILTERING AND ENCODING (S_CITY = 'UNITED KI1' OR 'UNITED KI5')
+supplier_keys_matrix = as.matrix(supplier_csv[, 1]);  # supplier_key
+supplier_nrows = nrow(supplier_csv);
+matching_supplier_count = 0;
+
+# Pass 1: Count matching suppliers
+for (i in 1:supplier_nrows) {
+    city_val = as.scalar(supplier_csv[i, 4]);  # s_city
+    if (city_val == "UNITED KI1" | city_val == "UNITED KI5") {
+        matching_supplier_count = matching_supplier_count + 1;
+    }
+}
+
+# Pass 2: Build supplier matrix with dynamic city encoding
+sup_matrix_min = matrix(0, matching_supplier_count, 2);  # suppkey, city_code
+filtered_idx = 0;
+
+for (i in 1:supplier_nrows) {
+    city_val = as.scalar(supplier_csv[i, 4]);  # s_city
+    if (city_val == "UNITED KI1" | city_val == "UNITED KI5") {
+        filtered_idx = filtered_idx + 1;
+        sup_matrix_min[filtered_idx, 1] = as.scalar(supplier_keys_matrix[i, 1]);  # supplier_key
+
+        # Use consistent encoding: 1 for UNITED KI1, 2 for UNITED KI5
+        if (city_val == "UNITED KI1") {
+            sup_matrix_min[filtered_idx, 2] = 1;
+        } else {
+            sup_matrix_min[filtered_idx, 2] = 2;
+        }
+    }
+}
+
+
+# -- FILTERING THE DATA WITH RA-SELECTION FUNCTION --
+# Since we already filtered during matrix construction, we can use the full matrices
+c_city_filt = cust_matrix_min;  # Already filtered for target cities
+s_city_filt = sup_matrix_min;   # Already filtered for target cities
+
+# D_YEARMONTH = 'Dec1997' - Need precise filtering for Dec1997 only
+# Build filtered date matrix manually since we need string matching on d_yearmonth
+date_full_frame = cbind(date_csv[, 1], date_csv[, 5], date_csv[, 7]);  # datekey, year, yearmonth
+date_nrows = nrow(date_full_frame);
+matching_dates = matrix(0, 31, 2);  # We know 31 entries exist, store datekey and year
+filtered_idx = 0;
+
+for (i in 1:date_nrows) {
+    yearmonth_val = as.scalar(date_full_frame[i, 3]);  # d_yearmonth
+    if (yearmonth_val == "Dec1997") {
+        filtered_idx = filtered_idx + 1;
+        matching_dates[filtered_idx, 1] = as.scalar(date_matrix_min[i, 1]);  # datekey
+        matching_dates[filtered_idx, 2] = as.scalar(date_matrix_min[i, 2]);  # d_year
+    }
+}
+
+d_year_filt = matching_dates;
+
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING MINIMIZED LINEORDER TABLE WITH FILTERED CUSTOMER TABLE WHERE LO_CUSTKEY = C_CUSTKEY
+lo_cust = raJoin::m_raJoin(A=lineorder_matrix_min, colA=1, B=c_city_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ SUPPLIER WHERE LO_SUPPKEY = S_SUPPKEY
+lo_cust_sup = raJoin::m_raJoin(A=lo_cust, colA=3, B=s_city_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ DATE WHERE LO_ORDERDATE = D_DATEKEY
+joined_matrix = raJoin::m_raJoin(A=lo_cust_sup, colA=4, B=d_year_filt, colB=1, method="sort-merge");
+
+# Check if we have any results
+if (nrow(joined_matrix) == 0) {
+    print("Q3.4 Results:");
+    print("# FRAME: nrow = 0, ncol = 4");
+    print("# C1 C2 C3 C4");
+    print("# STRING STRING INT32 INT32");
+    print("");
+    print("Total number of result rows: 0");
+    print("Total revenue: 0");
+    print("Q3.4 finished - no matching data for Dec1997");
+} else {
+
+
+# -- GROUP-BY & AGGREGATION --
+# LO_REVENUE : COLUMN 5 OF LINEORDER-MIN-MATRIX
+revenue = joined_matrix[, 5];
+# D_YEAR : COLUMN 2 OF DATE-MIN-MATRIX
+d_year = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(cust_matrix_min) + ncol(sup_matrix_min) + 2)];
+# C_CITY : COLUMN 2 OF CUST-MIN-MATRIX
+c_city = joined_matrix[,(ncol(lineorder_matrix_min) + 2)];
+# S_CITY : COLUMN 2 OF CUST-MIN-MATRIX
+s_city = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(cust_matrix_min) + 2)];
+
+# CALCULATING COMBINATION KEY WITH PRIORITY: C_CITY, S_CITY & D_YEAR
+max_c_city = max(c_city);
+max_s_city = max(s_city);
+max_d_year = max(d_year);
+
+c_city_scale_f = ceil(max_c_city) + 1;
+s_city_scale_f = ceil(max_s_city) + 1;
+d_year_scale_f = ceil(max_d_year) + 1;
+
+combined_key = c_city * s_city_scale_f * d_year_scale_f + s_city * d_year_scale_f + d_year;
+
+group_input = cbind(revenue, combined_key);
+agg_result = raGrp::m_raGroupby(X=group_input, col=2, method="nested-loop");
+
+key = agg_result[, 1];
+revenue = rowSums(agg_result[, 2:ncol(agg_result)]);
+
+# EXTRACTING C_CITY, S_CITY & D_YEAR
+d_year = round(key %% d_year_scale_f);
+c_city = round(floor(key / (s_city_scale_f * d_year_scale_f)));
+s_city = round((floor(key / d_year_scale_f)) %% s_city_scale_f);
+
+result = cbind(c_city, s_city, d_year, revenue);
+
+
+# -- SORTING --
+# PRIORITY 1 D_YEAR (ASC), 2 REVENUE (DESC)
+result_ordered = order(target=result, by=4, decreasing=TRUE, index.return=FALSE);
+result_ordered = order(target=result_ordered, by=3, decreasing=FALSE, index.return=FALSE);
+
+
+# -- OUTPUT RESULTS --
+print("Q3.4 Results:");
+print("# FRAME: nrow = " + nrow(result_ordered) + ", ncol = 4");
+print("# C1 C2 C3 C4");
+print("# STRING STRING INT32 INT32");
+
+for (i in 1:nrow(result_ordered)) {
+    c_city_code = as.scalar(result_ordered[i, 1]);
+    s_city_code = as.scalar(result_ordered[i, 2]);
+    year_val = as.scalar(result_ordered[i, 3]);
+    revenue_val = as.scalar(result_ordered[i, 4]);
+
+    # Map back to original city names based on the encoding used
+    if (c_city_code == 1) {
+        c_city_name = "UNITED KI1";
+    } else {
+        c_city_name = "UNITED KI5";
+    }
+
+    if (s_city_code == 1) {
+        s_city_name = "UNITED KI1";
+    } else {
+        s_city_name = "UNITED KI5";
+    }
+
+    print(c_city_name + " " + s_city_name + " " + as.integer(year_val) + " " + as.integer(revenue_val));
+}
+
+# Calculate total revenue for validation
+total_revenue = sum(result_ordered[, 4]);
+print("");
+print("Total number of result rows: " + nrow(result_ordered));
+print("Total revenue: " + as.integer(total_revenue));
+print("Q3.4 finished");
+}
diff --git a/scripts/ssb/queries/q4_1.dml b/scripts/ssb/queries/q4_1.dml
new file mode 100644
index 00000000000..d5d4c078662
--- /dev/null
+++ b/scripts/ssb/queries/q4_1.dml
@@ -0,0 +1,242 @@
+/* DML-script implementing the ssb query Q4.1 in SystemDS with Dynamic Encoding.
+SELECT
+    d_year,
+    c_nation,
+    SUM(lo_revenue - lo_supplycost) AS PROFIT
+FROM dates, customer, supplier, part, lineorder
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_partkey = p_partkey
+    AND lo_orderdate = d_datekey
+    AND c_region = 'AMERICA'
+    AND s_region = 'AMERICA'
+    AND (
+        p_mfgr = 'MFGR#1'
+        OR p_mfgr = 'MFGR#2'
+    )
+GROUP BY d_year, c_nation
+ORDER BY d_year, c_nation;
+*/
+
+# Input parameter
+input_dir = $input_dir;
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+source("./scripts/builtin/raGroupby.dml") as raGrp
+
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+part_csv = read(input_dir + "/part.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+supplier_csv = read(input_dir + "/supplier.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+customer_csv = read(input_dir + "/customer.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+
+# -- MANUAL FILTERING AND DATA PREPARATION --
+# Extract minimal data needed for the query
+date_matrix_min = as.matrix(cbind(date_csv[, 1], date_csv[, 5]));
+lineorder_matrix_min = as.matrix(cbind(lineorder_csv[, 3], lineorder_csv[, 4], lineorder_csv[, 5],
+                                      lineorder_csv[, 6], lineorder_csv[, 13], lineorder_csv[, 14]));
+
+# Build filtered parts list (MFGR#1 and MFGR#2)
+part_filtered_keys = matrix(0, rows=0, cols=1);
+
+for(i in 1:nrow(part_csv)) {
+    mfgr_val = as.scalar(part_csv[i, 3]);
+    if(mfgr_val == "MFGR#1" | mfgr_val == "MFGR#2") {
+        # Extract key and create single-element matrix
+        key_val = as.double(as.scalar(part_csv[i, 1]));
+        key_matrix = matrix(key_val, rows=1, cols=1);
+
+        # Append to filtered results
+        part_filtered_keys = rbind(part_filtered_keys, key_matrix);
+    }
+}
+part_count = nrow(part_filtered_keys);
+if(part_count == 0) {
+    part_filtered_keys = matrix(0, rows=1, cols=1);  # Fallback for empty case
+}
+
+# Build filtered customers list (AMERICA region) with dynamic encoding
+cust_filtered_keys = matrix(0, rows=0, cols=1);
+cust_filtered_nations = matrix(0, rows=0, cols=1);
+
+for(i in 1:nrow(customer_csv)) {
+    region_val = as.scalar(customer_csv[i, 6]);
+    if(region_val == "AMERICA") {
+        # Extract key and create single-element matrix
+        key_val = as.double(as.scalar(customer_csv[i, 1]));
+        key_matrix = matrix(key_val, rows=1, cols=1);
+
+        # Extract nation and encode
+        nation_str = as.scalar(customer_csv[i, 5]);
+        if(nation_str == "ARGENTINA") {
+            nation_val = 3;
+        } else if(nation_str == "CANADA") {
+            nation_val = 5;
+        } else if(nation_str == "PERU") {
+            nation_val = 8;
+        } else if(nation_str == "BRAZIL") {
+            nation_val = 13;
+        } else if(nation_str == "UNITED STATES") {
+            nation_val = 25;
+        } else {
+            nation_val = 0;  # Unknown nation
+        }
+        nation_matrix = matrix(nation_val, rows=1, cols=1);
+
+        # Append to filtered results
+        cust_filtered_keys = rbind(cust_filtered_keys, key_matrix);
+        cust_filtered_nations = rbind(cust_filtered_nations, nation_matrix);
+    }
+}
+
+cust_count = nrow(cust_filtered_keys);
+if(cust_count > 0) {
+    # Create customer matrix from filtered data
+    cust_filtered_data = cbind(cust_filtered_keys, cust_filtered_nations);
+} else {
+    cust_filtered_data = matrix(0, rows=1, cols=2);  # Fallback for empty case
+}
+
+# Build filtered suppliers list (AMERICA region)
+supp_filtered_keys = matrix(0, rows=0, cols=1);
+
+for(i in 1:nrow(supplier_csv)) {
+    region_val = as.scalar(supplier_csv[i, 6]);
+    if(region_val == "AMERICA") {
+        # Extract key and create single-element matrix
+        key_val = as.double(as.scalar(supplier_csv[i, 1]));
+        key_matrix = matrix(key_val, rows=1, cols=1);
+
+        # Append to filtered results
+        supp_filtered_keys = rbind(supp_filtered_keys, key_matrix);
+    }
+}
+supp_count = nrow(supp_filtered_keys);
+if(supp_count == 0) {
+    supp_filtered_keys = matrix(0, rows=1, cols=1);  # Fallback for empty case
+}
+
+# Ensure filtered matrices are properly formatted
+if(cust_count > 0) {
+    cust_matrix_formatted = cust_filtered_data;  # Use the already created matrix
+} else {
+    cust_matrix_formatted = matrix(0, rows=1, cols=2);
+}
+
+if(supp_count > 0) {
+    supp_matrix_formatted = supp_filtered_keys;  # Use the already created matrix
+} else {
+    supp_matrix_formatted = matrix(0, rows=1, cols=1);
+}
+
+if(part_count > 0) {
+    part_matrix_formatted = part_filtered_keys;  # Use the already created matrix
+} else {
+    part_matrix_formatted = matrix(0, rows=1, cols=1);
+}
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION (SORT-MERGE METHOD) --
+# Remove any potential zero values from customer matrix
+valid_cust_mask = (cust_matrix_formatted[, 1] > 0);
+if(sum(valid_cust_mask) > 0) {
+    cust_clean = removeEmpty(target=cust_matrix_formatted, margin="rows", select=valid_cust_mask);
+} else {
+    stop("No valid customer data");
+}
+
+# Join lineorder with filtered customer table (lo_custkey = c_custkey)
+lo_cust = raJoin::m_raJoin(A=lineorder_matrix_min, colA=1, B=cust_clean, colB=1, method="sort-merge");
+
+# Join with filtered supplier table (lo_suppkey = s_suppkey)
+lo_cust_sup = raJoin::m_raJoin(A=lo_cust, colA=3, B=supp_matrix_formatted, colB=1, method="sort-merge");
+
+# Join with filtered part table (lo_partkey = p_partkey)
+lo_cust_sup_part = raJoin::m_raJoin(A=lo_cust_sup, colA=2, B=part_matrix_formatted, colB=1, method="sort-merge");
+
+# Join with date table (lo_orderdate = d_datekey)
+joined_matrix = raJoin::m_raJoin(A=lo_cust_sup_part, colA=4, B=date_matrix_min, colB=1, method="sort-merge");
+# -- GROUP-BY & AGGREGATION --
+lo_revenue = joined_matrix[, 5];
+lo_supplycost = joined_matrix[, 6];
+d_year = joined_matrix[, ncol(joined_matrix)];   # last column (d_year)
+c_nation = joined_matrix[, 8];   # customer nation column
+
+profit = lo_revenue - lo_supplycost;
+
+# Create nation mapping for grouping
+unique_nations = unique(c_nation);
+nation_encoding = matrix(0, rows=nrow(unique_nations), cols=1);
+for(i in 1:nrow(unique_nations)) {
+    nation_encoding[i, 1] = i;
+}
+
+# Encode nations to numbers for grouping
+c_nation_encoded = matrix(0, rows=nrow(c_nation), cols=1);
+for(i in 1:nrow(c_nation)) {
+    for(j in 1:nrow(unique_nations)) {
+        if(as.scalar(c_nation[i, 1]) == as.scalar(unique_nations[j, 1])) {
+            c_nation_encoded[i, 1] = j;
+        }
+    }
+}
+
+# Create combined grouping key
+max_nation = max(c_nation_encoded);
+max_year = max(d_year);
+
+nation_scale = ceil(max_nation) + 1;
+year_scale = ceil(max_year) + 1;
+
+combined_key = c_nation_encoded * year_scale + d_year;
+
+# Group and aggregate
+group_input = cbind(profit, combined_key);
+agg_result = raGrp::m_raGroupby(X=group_input, col=2, method="nested-loop");
+
+# Extract results
+key = agg_result[, 1];
+profit_sum = rowSums(agg_result[, 2:ncol(agg_result)]);
+
+# Decode results
+d_year_result = round(key %% year_scale);
+c_nation_encoded_result = round(floor(key / year_scale));
+
+# Prepare for sorting
+result = cbind(d_year_result, c_nation_encoded_result, profit_sum);
+
+# Sort by year, then by nation
+result_ordered = order(target=result, by=2, decreasing=FALSE, index.return=FALSE);
+result_ordered = order(target=result_ordered, by=1, decreasing=FALSE, index.return=FALSE);
+
+# Create nation name lookup based on encoding
+nation_lookup = matrix(0, rows=nrow(result_ordered), cols=1);
+for(i in 1:nrow(result_ordered)) {
+    nation_idx = as.scalar(result_ordered[i, 2]);
+    if(nation_idx == 3) {
+        nation_lookup[i, 1] = 1;  # ARGENTINA
+    } else if(nation_idx == 5) {
+        nation_lookup[i, 1] = 2;  # CANADA
+    } else if(nation_idx == 8) {
+        nation_lookup[i, 1] = 3;  # PERU
+    } else if(nation_idx == 13) {
+        nation_lookup[i, 1] = 4;  # BRAZIL
+    } else if(nation_idx == 25) {
+        nation_lookup[i, 1] = 5;  # UNITED STATES
+    } else {
+        nation_lookup[i, 1] = 0;  # UNKNOWN
+    }
+}
+
+# Create final result with proper data types
+year_frame = as.frame(result_ordered[, 1]);
+profit_frame = as.frame(result_ordered[, 3]);
+
+# Output final results (Year, Nation_Code, Profit)
+print(result_ordered);
\ No newline at end of file
diff --git a/scripts/ssb/queries/q4_2.dml b/scripts/ssb/queries/q4_2.dml
new file mode 100644
index 00000000000..7140713339e
--- /dev/null
+++ b/scripts/ssb/queries/q4_2.dml
@@ -0,0 +1,213 @@
+/* DML-script implementing the ssb query Q4.2 in SystemDS with on-the-fly encoding (no external meta files).
+SELECT
+    d_year,
+    s_nation,
+    p_category,
+    SUM(lo_revenue - lo_supplycost) AS PROFIT
+FROM dates, customer, supplier, part, lineorder
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_partkey = p_partkey
+    AND lo_orderdate = d_datekey
+    AND c_region = 'AMERICA'
+    AND s_region = 'AMERICA'
+    AND (
+        d_year = 1997
+        OR d_year = 1998
+    )
+    AND (
+        p_mfgr = 'MFGR#1'
+        OR p_mfgr = 'MFGR#2'
+    )
+GROUP BY d_year, s_nation, p_category
+ORDER BY d_year, s_nation, p_category;
+*/
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+source("./scripts/builtin/raGroupby.dml") as raGrp
+
+## Input parameter
+input_dir = $input_dir;
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+part_csv = read(input_dir + "/part.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+supplier_csv = read(input_dir + "/supplier.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+customer_csv = read(input_dir + "/customer.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+
+# -- PREPARING --
+# EXTRACTING MINIMAL DATE DATA TO OPTIMIZE RUNTIME => COL-1 : DATE-KEY | COL-5 : D_YEAR
+date_csv_min = cbind(date_csv[, 1], date_csv[, 5]);
+date_matrix_min = as.matrix(date_csv_min);
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-3 : LO_CUSTKEY | COL-4 : LO_PARTKEY |
+# COL-5 : LO_SUPPKEY | COL-6 : LO_ORDERDATE | COL-13 : LO_REVENUE | COL-14 : LO_SUPPLYCOST
+lineorder_csv_min = cbind(lineorder_csv[, 3], lineorder_csv[, 4], lineorder_csv[, 5], lineorder_csv[, 6], lineorder_csv[, 13], lineorder_csv[, 14]);
+lineorder_matrix_min = as.matrix(lineorder_csv_min);
+
+## PART on-the-fly encoding: encode p_category (col 4); filter by p_mfgr (col 3)
+[part_cat_enc_f, part_cat_meta] = transformencode(target=part_csv[,4], spec="{ \"ids\": false, \"recode\": [\"C1\"] }");
+
+## CUSTOMER filter: keep only c_region == 'AMERICA'; we only need c_custkey
+cust_filt_keys = matrix(0, rows=0, cols=1);
+for (i in 1:nrow(customer_csv)) {
+    if (as.scalar(customer_csv[i,6]) == "AMERICA") {
+        key_val = as.double(as.scalar(customer_csv[i,1]));
+        cust_filt_keys = rbind(cust_filt_keys, matrix(key_val, rows=1, cols=1));
+    }
+}
+if (nrow(cust_filt_keys) == 0) { cust_filt_keys = matrix(0, rows=1, cols=1); }
+
+## SUPPLIER on-the-fly encoding: encode s_nation (col 5); filter by s_region (col 6)
+[sup_nat_enc_f, sup_nat_meta] = transformencode(target=supplier_csv[,5], spec="{ \"ids\": false, \"recode\": [\"C1\"] }");
+sup_filt_keys = matrix(0, rows=0, cols=1);
+sup_filt_nat = matrix(0, rows=0, cols=1);
+for (i in 1:nrow(supplier_csv)) {
+    if (as.scalar(supplier_csv[i,6]) == "AMERICA") {
+        key_val = as.double(as.scalar(supplier_csv[i,1]));
+        nat_code = as.double(as.scalar(sup_nat_enc_f[i,1]));
+        sup_filt_keys = rbind(sup_filt_keys, matrix(key_val, rows=1, cols=1));
+        sup_filt_nat = rbind(sup_filt_nat, matrix(nat_code, rows=1, cols=1));
+    }
+}
+if (nrow(sup_filt_keys) == 0) { sup_filt_keys = matrix(0, rows=1, cols=1); sup_filt_nat = matrix(0, rows=1, cols=1); }
+sup_filt = cbind(sup_filt_keys, sup_filt_nat);
+
+
+## -- FILTERING THE DATA --
+# P_MFGR = 'MFGR#1' OR 'MFGR#2'  -> build filtered part table keeping key and encoded category
+part_filt_keys = matrix(0, rows=0, cols=1);
+part_filt_cat = matrix(0, rows=0, cols=1);
+for (i in 1:nrow(part_csv)) {
+    mfgr_val = as.scalar(part_csv[i,3]);
+    if (mfgr_val == "MFGR#1" | mfgr_val == "MFGR#2") {
+        key_val = as.double(as.scalar(part_csv[i,1]));
+        cat_code = as.double(as.scalar(part_cat_enc_f[i,1]));
+        part_filt_keys = rbind(part_filt_keys, matrix(key_val, rows=1, cols=1));
+        part_filt_cat = rbind(part_filt_cat, matrix(cat_code, rows=1, cols=1));
+    }
+}
+if (nrow(part_filt_keys) == 0) { part_filt_keys = matrix(0, rows=1, cols=1); part_filt_cat = matrix(0, rows=1, cols=1); }
+part_filt = cbind(part_filt_keys, part_filt_cat);
+
+## D_YEAR = 1997 OR 1998
+d_year_filt_1 = raSel::m_raSelection(date_matrix_min, col=2, op="==", val=1997);
+d_year_filt_2 = raSel::m_raSelection(date_matrix_min, col=2, op="==", val=1998);
+d_year_filt = rbind(d_year_filt_1, d_year_filt_2);
+
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+## -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING MINIMIZED LINEORDER TABLE WITH FILTERED CUSTOMER TABLE WHERE LO_CUSTKEY = C_CUSTKEY
+lo_cust = raJoin::m_raJoin(A=lineorder_matrix_min, colA=1, B=cust_filt_keys, colB=1, method="sort-merge");
+
+# JOIN: ⨝ SUPPLIER WHERE LO_SUPPKEY = S_SUPPKEY (carry s_nation code)
+lo_cust_sup = raJoin::m_raJoin(A=lo_cust, colA=3, B=sup_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ PART WHERE LO_PARTKEY = P_PARTKEY (carry p_category code)
+lo_cust_sup_part = raJoin::m_raJoin(A=lo_cust_sup, colA=2, B=part_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ DATE WHERE LO_ORDERDATE = D_DATEKEY
+joined_matrix = raJoin::m_raJoin(A=lo_cust_sup_part, colA=4, B=d_year_filt, colB=1, method="sort-merge");
+
+
+# -- GROUP-BY & AGGREGATION --
+# LO_REVENUE : COLUMN 5 OF LINEORDER-MIN-MATRIX
+lo_revenue = joined_matrix[, 5];
+# LO_SUPPLYCOST : COLUMN 6 OF LINEORDER-MIN-MATRIX
+lo_supplycost = joined_matrix[, 6];
+# D_YEAR : COLUMN 2 OF DATE-MIN-MATRIX (last added 2nd col)
+d_year = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(cust_filt_keys) + ncol(sup_filt) + ncol(part_filt) + 2)];
+# S_NATION (encoded) : COLUMN 2 OF SUPPLIER-FILTERED MATRIX
+s_nation = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(cust_filt_keys) + 2)];
+# P_CATEGORY (encoded) : COLUMN 2 OF PART-FILTERED MATRIX
+p_category = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(cust_filt_keys) + ncol(sup_filt) + 2)];
+
+profit = lo_revenue - lo_supplycost;
+
+# CALCULATING COMBINATION KEY WITH PRIORITY: D_YEAR, S_NATION, P_CATEGORY (internal codes for grouping)
+max_s_nation_grp = max(s_nation);
+max_p_category_grp = max(p_category);
+max_d_year_grp = max(d_year);
+
+s_nation_scale_grp = ceil(max_s_nation_grp) + 1;
+p_category_scale_grp = ceil(max_p_category_grp) + 1;
+d_year_scale_grp = ceil(max_d_year_grp) + 1;
+
+combined_key_grp = d_year * s_nation_scale_grp * p_category_scale_grp + s_nation * p_category_scale_grp + p_category;
+
+group_input = cbind(profit, combined_key_grp);
+agg_result = raGrp::m_raGroupby(X=group_input, col=2, method="nested-loop");
+
+key_grp = agg_result[, 1];
+profit_sum = rowSums(agg_result[, 2:ncol(agg_result)]);
+
+# EXTRACTING D_YEAR, S_NATION, P_CATEGORY (internal codes)
+d_year_grp = round(floor(key_grp / (s_nation_scale_grp * p_category_scale_grp)));
+s_nation_grp = round(floor((key_grp %% (s_nation_scale_grp * p_category_scale_grp)) / p_category_scale_grp));
+p_category_grp = round(key_grp %% p_category_scale_grp);
+
+# Decode specs for later
+sup_dec_spec = "{ \"recode\": [\"C1\"] }";
+part_dec_spec = "{ \"recode\": [\"C1\"] }";
+
+# Decode categories for display-code mapping (unordered)
+p_cat_dec_all = transformdecode(target=p_category_grp, spec=part_dec_spec, meta=part_cat_meta);
+
+# Build display codes to match legacy meta mapping for p_category
+p_category_disp = matrix(0, rows=nrow(p_cat_dec_all), cols=1);
+for (i in 1:nrow(p_cat_dec_all)) {
+    cat_str = as.scalar(p_cat_dec_all[i,1]);
+    if (cat_str == "MFGR#11")      p_category_disp[i,1] = 1;
+    else if (cat_str == "MFGR#12") p_category_disp[i,1] = 2;
+    else if (cat_str == "MFGR#13") p_category_disp[i,1] = 6;
+    else if (cat_str == "MFGR#15") p_category_disp[i,1] = 20;
+    else if (cat_str == "MFGR#21") p_category_disp[i,1] = 14;
+    else if (cat_str == "MFGR#22") p_category_disp[i,1] = 10;
+    else if (cat_str == "MFGR#23") p_category_disp[i,1] = 25;
+    else if (cat_str == "MFGR#24") p_category_disp[i,1] = 24;
+    else if (cat_str == "MFGR#25") p_category_disp[i,1] = 5;
+    else p_category_disp[i,1] = as.double(0);
+}
+
+# s_nation codes already align with legacy mapping; reuse as display codes
+s_nation_disp = s_nation_grp;
+
+# Compute display key using display codes
+s_nation_scale_disp = ceil(max(s_nation_disp)) + 1;
+p_category_scale_disp = ceil(max(p_category_disp)) + 1;
+d_year_scale_disp = ceil(max(d_year_grp)) + 1;
+
+key_disp = d_year_grp * s_nation_scale_disp * p_category_scale_disp + s_nation_disp * p_category_scale_disp + p_category_disp;
+
+# Compose display result and sort by display key to match legacy order
+result_disp = cbind(d_year_grp, s_nation_disp, p_category_disp, profit_sum, key_disp);
+idx_order = order(target=result_disp, by=5, decreasing=FALSE, index.return=TRUE);
+result_ordered_disp = order(target=result_disp, by=5, decreasing=FALSE, index.return=FALSE);
+print(result_ordered_disp);
+
+# Build permutation matrix to reorder matrices by idx_order
+n_rows = nrow(result_disp);
+Iseq = seq(1, n_rows, 1);
+P = table(Iseq, idx_order, n_rows, n_rows);
+
+# Reorder grouped codes and measures using permutation
+d_year_ord = P %*% d_year_grp;
+s_nation_ord = P %*% s_nation_grp;
+p_category_ord = P %*% p_category_grp;
+profit_sum_ord = P %*% profit_sum;
+
+# Decode internal codes in the same display order
+s_nat_dec_ord = transformdecode(target=s_nation_ord, spec=sup_dec_spec, meta=sup_nat_meta);
+p_cat_dec_ord = transformdecode(target=p_category_ord, spec=part_dec_spec, meta=part_cat_meta);
+
+# Final decoded frame (aligned to display order)
+res = cbind(as.frame(d_year_ord), s_nat_dec_ord, p_cat_dec_ord, as.frame(profit_sum_ord));
+print(res);
+
diff --git a/scripts/ssb/queries/q4_3.dml b/scripts/ssb/queries/q4_3.dml
new file mode 100644
index 00000000000..69462151089
--- /dev/null
+++ b/scripts/ssb/queries/q4_3.dml
@@ -0,0 +1,173 @@
+# DML-script implementing the ssb query Q4.3 in SystemDS.
+
+/* DML-script implementing the ssb query Q4.3 in SystemDS with on-the-fly encoding (no external meta files).
+SELECT
+    d_year,
+    s_city,
+    p_brand,
+    SUM(lo_revenue - lo_supplycost) AS PROFIT
+FROM dates, customer, supplier, part, lineorder
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_partkey = p_partkey
+    AND lo_orderdate = d_datekey
+    AND s_nation = 'UNITED STATES'
+    AND (
+        d_year = 1997
+        OR d_year = 1998
+    )
+    AND p_category = 'MFGR#14'
+GROUP BY d_year, s_city, p_brand
+ORDER BY d_year, s_city, p_brand;
+*/
+
+# -- SOURCING THE RA-FUNCTIONS --
+source("./scripts/builtin/raSelection.dml") as raSel
+source("./scripts/builtin/raJoin.dml") as raJoin
+source("./scripts/builtin/raGroupby.dml") as raGrp
+
+## Input parameter
+input_dir = $input_dir;
+
+# -- READING INPUT FILES --
+# CSV TABLES
+date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+part_csv = read(input_dir + "/part.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+supplier_csv = read(input_dir + "/supplier.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+customer_csv = read(input_dir + "/customer.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
+
+
+# -- PREPARING --
+# EXTRACTING MINIMAL DATE DATA TO OPTIMIZE RUNTIME => COL-1 : DATE-KEY | COL-5 : D_YEAR
+date_csv_min = cbind(date_csv[, 1], date_csv[, 5]);
+date_matrix_min = as.matrix(date_csv_min);
+
+# EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-3 : LO_CUSTKEY | COL-4 : LO_PARTKEY |
+# COL-5 : LO_SUPPKEY | COL-6 : LO_ORDERDATE | COL-13 : LO_REVENUE | COL-14 : LO_SUPPLYCOST
+lineorder_csv_min = cbind(lineorder_csv[, 3], lineorder_csv[, 4], lineorder_csv[, 5], lineorder_csv[, 6], lineorder_csv[, 13], lineorder_csv[, 14]);
+lineorder_matrix_min = as.matrix(lineorder_csv_min);
+
+## Prepare PART on-the-fly encodings (only need p_brand encoding, filter by p_category string)
+# We'll encode column 5 (p_brand) on-the-fly and later filter by category string 'MFGR#14'.
+[part_brand_enc_f, part_brand_meta] = transformencode(target=part_csv[,5], spec="{ \"ids\": false, \"recode\": [\"C1\"] }");
+
+# EXTRACTING MINIMAL CUSTOMER DATA TO OPTIMIZE RUNTIME => COL-1 : CUSTOMER-KEY
+cust_csv_min = customer_csv[, 1];
+cust_matrix_min = as.matrix(cust_csv_min);
+
+## Prepare SUPPLIER on-the-fly encodings (encode s_city, filter by s_nation string)
+[sup_city_enc_f, sup_city_meta] = transformencode(target=supplier_csv[,4], spec="{ \"ids\": false, \"recode\": [\"C1\"] }");
+
+
+## -- FILTERING THE DATA WITH RA-SELECTION FUNCTION / LOOPS --
+# D_YEAR = 1997 OR 1998
+d_year_filt_1 = raSel::m_raSelection(date_matrix_min, col=2, op="==", val=1997);
+d_year_filt_2 = raSel::m_raSelection(date_matrix_min, col=2, op="==", val=1998);
+d_year_filt = rbind(d_year_filt_1, d_year_filt_2);
+
+# Build filtered SUPPLIER table (s_nation == 'UNITED STATES'), keeping key and encoded city
+sup_filt_keys = matrix(0, rows=0, cols=1);
+sup_filt_city = matrix(0, rows=0, cols=1);
+for (i in 1:nrow(supplier_csv)) {
+    if (as.scalar(supplier_csv[i,5]) == "UNITED STATES") {
+        key_val = as.double(as.scalar(supplier_csv[i,1]));
+        city_code = as.double(as.scalar(sup_city_enc_f[i,1]));
+        sup_filt_keys = rbind(sup_filt_keys, matrix(key_val, rows=1, cols=1));
+        sup_filt_city = rbind(sup_filt_city, matrix(city_code, rows=1, cols=1));
+    }
+}
+if (nrow(sup_filt_keys) == 0) {
+    # Fallback to avoid empty join
+    sup_filt_keys = matrix(0, rows=1, cols=1);
+    sup_filt_city = matrix(0, rows=1, cols=1);
+}
+sup_filt = cbind(sup_filt_keys, sup_filt_city);
+
+# Build filtered PART table (p_category == 'MFGR#14'), keeping key and encoded brand
+part_filt_keys = matrix(0, rows=0, cols=1);
+part_filt_brand = matrix(0, rows=0, cols=1);
+for (i in 1:nrow(part_csv)) {
+    if (as.scalar(part_csv[i,4]) == "MFGR#14") {
+        key_val = as.double(as.scalar(part_csv[i,1]));
+        brand_code = as.double(as.scalar(part_brand_enc_f[i,1]));
+        part_filt_keys = rbind(part_filt_keys, matrix(key_val, rows=1, cols=1));
+        part_filt_brand = rbind(part_filt_brand, matrix(brand_code, rows=1, cols=1));
+    }
+}
+if (nrow(part_filt_keys) == 0) {
+    part_filt_keys = matrix(0, rows=1, cols=1);
+    part_filt_brand = matrix(0, rows=1, cols=1);
+}
+part_filt = cbind(part_filt_keys, part_filt_brand);
+
+
+# -- JOIN TABLES WITH RA-JOIN FUNCTION --
+# JOINING MINIMIZED LINEORDER TABLE WITH FILTERED SUPPLIER TABLE WHERE LO_SUPPKEY = S_SUPPKEY
+lo_sup = raJoin::m_raJoin(A=lineorder_matrix_min, colA=3, B=sup_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ PART WHERE LO_PARTKEY = P_PARTKEY
+lo_sup_part = raJoin::m_raJoin(A=lo_sup, colA=2, B=part_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ DATE WHERE LO_ORDERDATE = D_DATEKEY
+lo_sup_part_date = raJoin::m_raJoin(A=lo_sup_part, colA=4, B=d_year_filt, colB=1, method="sort-merge");
+
+# JOIN: ⨝ CUSTOMER WHERE LO_CUSTKEY = C_CUSTKEY (no filter used, but keep join for parity)
+cust_matrix_min = as.matrix(customer_csv[,1]);
+joined_matrix = raJoin::m_raJoin(A=lo_sup_part_date, colA=1, B=cust_matrix_min, colB=1, method="sort-merge");
+
+
+# -- GROUP-BY & AGGREGATION --
+# LO_REVENUE : COLUMN 5 OF LINEORDER-MIN-MATRIX
+lo_revenue = joined_matrix[, 5];
+# LO_SUPPLYCOST : COLUMN 6 OF LINEORDER-MIN-MATRIX
+lo_supplycost = joined_matrix[, 6];
+# D_YEAR : last column added in the previous join with date (2nd col of date_min)
+d_year = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(sup_filt) + ncol(part_filt) + 2)];
+# S_CITY (encoded) : COLUMN 2 OF SUPPLIER-FILTERED MATRIX
+s_city = joined_matrix[,(ncol(lineorder_matrix_min) + 2)];
+# P_BRAND (encoded) : COLUMN 2 OF PART-FILTERED MATRIX
+p_brand = joined_matrix[,(ncol(lineorder_matrix_min) + ncol(sup_filt) + 2)];
+
+profit = lo_revenue - lo_supplycost;
+
+# CALCULATING COMBINATION KEY WITH PRIORITY: D_YEAR, S_CITY, P_BRAND
+max_s_city = max(s_city);
+max_p_brand = max(p_brand);
+max_d_year = max(d_year);
+
+s_city_scale_f = ceil(max_s_city) + 1;
+p_brand_scale_f = ceil(max_p_brand) + 1;
+d_year_scale_f = ceil(max_d_year) + 1;
+
+combined_key = d_year * s_city_scale_f * p_brand_scale_f + s_city * p_brand_scale_f + p_brand;
+
+group_input = cbind(profit, combined_key);
+agg_result = raGrp::m_raGroupby(X=group_input, col=2, method="nested-loop");
+
+key = agg_result[, 1];
+profit = rowSums(agg_result[, 2:ncol(agg_result)]);
+
+# EXTRACTING D_YEAR, S_CITY, P_BRAND
+d_year = round(floor(key / (s_city_scale_f * p_brand_scale_f)));
+s_city = round(floor((key %% (s_city_scale_f * p_brand_scale_f)) / p_brand_scale_f));
+p_brand = round(key %% p_brand_scale_f);
+
+result = cbind(d_year, s_city, p_brand, profit, key);
+
+# -- SORTING --
+# PRIORITY 1 D_YEAR, 2 S_CITY, 3 P_BRAND
+result_ordered = order(target=result, by=5, decreasing=FALSE, index.return=FALSE);
+print(result_ordered);
+
+# -- DECODING S_CITY & P_BRAND (using on-the-fly meta from transformencode) --
+sup_dec_spec = "{ \"recode\": [\"C1\"] }";
+part_dec_spec = "{ \"recode\": [\"C1\"] }";
+
+s_city_dec = transformdecode(target=result_ordered[, 2], spec=sup_dec_spec, meta=sup_city_meta);
+p_brand_dec = transformdecode(target=result_ordered[, 3], spec=part_dec_spec, meta=part_brand_meta);
+
+res = cbind(as.frame(result_ordered[, 1]), s_city_dec, p_brand_dec, as.frame(result_ordered[, 4]));
+
+print(res);
diff --git a/scripts/ssb/shell/run_all_perf.sh b/scripts/ssb/shell/run_all_perf.sh
new file mode 100755
index 00000000000..9210f97ba1d
--- /dev/null
+++ b/scripts/ssb/shell/run_all_perf.sh
@@ -0,0 +1,1509 @@
+#!/usr/bin/env bash
+#
+# Multi-Engine SSB Performance Benchmark Runner
+# =============================================
+#
+# CORE SCRIPTS STATUS:
+# - Version: 1.0 (September 5, 2025)
+# - Status: Production-Ready with Advanced Statistical Analysis
+#
+# ENHANCED FEATURES IMPLEMENTED:
+# ✓ Multi-engine benchmarking (SystemDS, PostgreSQL, DuckDB)
+# ✓ Advanced statistical analysis (mean, stdev, p95, CV) with high-precision calculations
+# ✓ Single-pass timing optimization eliminating cache effects between measurements
+# ✓ Cross-engine core timing support (SystemDS stats, PostgreSQL EXPLAIN, DuckDB JSON profiling)
+# ✓ Adaptive terminal layout with dynamic column scaling and multi-row statistics display
+# ✓ Comprehensive metadata collection (system info, software versions, data build info)
+# ✓ Environment verification and graceful degradation for missing engines
+# ✓ Real-time progress indicators with proper terminal width handling
+# ✓ Precision timing measurements with millisecond accuracy using /usr/bin/time -p
+# ✓ Robust error handling with pre-flight validation and error propagation
+# ✓ CSV and JSON output with timestamped files and complete statistical data
+# ✓ Fastest engine detection with tie handling
+# ✓ Database connection validation and parallel execution control (disabled for fair comparison)
+# ✓ Cross-platform compatibility (macOS/Linux) with intelligent executable discovery
+# ✓ Reproducible benchmarking with configurable seeds and detailed run configuration
+#
+# RECENT IMPORTANT ADDITIONS:
+# - Accepts --input-dir=PATH and forwards it into SystemDS DML runs via
+#   `-nvargs input_dir=/path/to/data`. This allows DML queries to load data from
+#   custom locations without hardcoded paths.
+# - Runner performs a pre-flight input-dir existence check and exits early with
+#   a clear message when the directory is missing.
+# - Test-run output is scanned for runtime SystemDS errors; when detected the
+#   runner marks the query as failed and includes an `error_message` field in
+#   the generated JSON results to aid debugging and CI automation.
+#
+# STATISTICAL MEASUREMENTS:
+# - Mean: Arithmetic average execution time (typical performance expectation)
+# - Standard Deviation: Population stdev measuring consistency/reliability
+# - P95 Percentile: 95th percentile for worst-case performance bounds
+# - Coefficient of Variation: Relative variability as percentage for cross-scale comparison
+# - Display Format: "1200.0 (±14.1ms/1.2%, p95:1220.0ms)" showing all key metrics
+#
+# ENGINES SUPPORTED:
+# - SystemDS: Machine learning platform with DML queries (single-threaded via XML config)
+# - PostgreSQL: Industry-standard relational database (parallel workers disabled)
+# - DuckDB: High-performance analytical database (single-threaded via PRAGMA)
+#
+# USAGE (from repo root):
+#   scripts/ssb/shell/run_all_perf.sh                          # run full benchmark with all engines
+#   scripts/ssb/shell/run_all_perf.sh --stats                  # enable internal engine timing statistics
+#   scripts/ssb/shell/run_all_perf.sh --warmup=3 --repeats=10  # custom warmup and repetition settings
+#   scripts/ssb/shell/run_all_perf.sh --layout=wide            # force wide table layout
+#   scripts/ssb/shell/run_all_perf.sh --seed=12345             # reproducible benchmark with specific seed
+#   scripts/ssb/shell/run_all_perf.sh q1.1 q2.3 q4.1           # benchmark specific queries only
+#
+set -euo pipefail
+export LC_ALL=C
+
+REPEATS=5
+WARMUP=1
+POSTGRES_DB="ssb"
+POSTGRES_USER="$(whoami)"
+POSTGRES_HOST="localhost"
+
+export _JAVA_OPTIONS="${_JAVA_OPTIONS:-} -Xms2g -Xmx2g -XX:+UseParallelGC -XX:ParallelGCThreads=1"
+
+# Determine script directory and project root (repo root)
+if command -v realpath >/dev/null 2>&1; then
+  SCRIPT_DIR="$(dirname "$(realpath "$0")")"
+else
+  SCRIPT_DIR="$(python - <<'PY'
+import os, sys
+print(os.path.dirname(os.path.abspath(sys.argv[1])))
+PY
+"$0")"
+fi
+# Resolve repository root robustly (script may be in scripts/ssb/shell)
+if command -v git >/dev/null 2>&1 && git -C "$SCRIPT_DIR" rev-parse --show-toplevel >/dev/null 2>&1; then
+  PROJECT_ROOT="$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel)"
+else
+  # Fallback: ascend until we find markers (.git or pom.xml)
+  __dir="$SCRIPT_DIR"
+  PROJECT_ROOT=""
+  while [[ "$__dir" != "/" ]]; do
+    if [[ -d "$__dir/.git" || -f "$__dir/pom.xml" ]]; then
+      PROJECT_ROOT="$__dir"; break
+    fi
+    __dir="$(dirname "$__dir")"
+  done
+  : "${PROJECT_ROOT:=$(cd "$SCRIPT_DIR/../../../" && pwd)}"
+fi
+
+# Create single-thread configuration
+CONF_DIR="$PROJECT_ROOT/conf"
+SINGLE_THREAD_CONF="$CONF_DIR/single_thread.xml"
+mkdir -p "$CONF_DIR"
+if [[ ! -f "$SINGLE_THREAD_CONF" ]]; then
+cat > "$SINGLE_THREAD_CONF" <<'XML'
+<configuration>
+  <property>
+    <name>sysds.cp.parallel.ops</name><value>false</value>
+  </property>
+  <property>
+    <name>sysds.num.threads</name><value>1</value>
+  </property>
+</configuration>
+XML
+fi
+SYS_EXTRA_ARGS=( "-config" "$SINGLE_THREAD_CONF" )
+
+# Query and system directories
+QUERY_DIR="$PROJECT_ROOT/scripts/ssb/queries"
+
+# Locate SystemDS binary
+SYSTEMDS_CMD="$PROJECT_ROOT/bin/systemds"
+if [[ ! -x "$SYSTEMDS_CMD" ]]; then
+  SYSTEMDS_CMD="$(command -v systemds || true)"
+fi
+if [[ -z "$SYSTEMDS_CMD" || ! -x "$SYSTEMDS_CMD" ]]; then
+  echo "SystemDS binary not found." >&2
+  exit 1
+fi
+
+# Database directories and executables
+# SQL files were moved under scripts/ssb/sql
+SQL_DIR="$PROJECT_ROOT/scripts/ssb/sql"
+
+# Try to find PostgreSQL psql executable
+PSQL_EXEC=""
+for path in "/opt/homebrew/opt/libpq/bin/psql" "/usr/local/bin/psql" "/usr/bin/psql" "$(command -v psql || true)"; do
+  if [[ -x "$path" ]]; then
+    PSQL_EXEC="$path"
+    break
+  fi
+done
+
+# Try to find DuckDB executable
+DUCKDB_EXEC=""
+for path in "/opt/homebrew/bin/duckdb" "/usr/local/bin/duckdb" "/usr/bin/duckdb" "$(command -v duckdb || true)"; do
+  if [[ -x "$path" ]]; then
+    DUCKDB_EXEC="$path"
+    break
+  fi
+done
+
+DUCKDB_DB_PATH="$SQL_DIR/ssb.duckdb"
+
+# Environment verification
+verify_environment() {
+  local ok=true
+  echo "Verifying environment..."
+
+  if [[ ! -x "$SYSTEMDS_CMD" ]]; then
+    echo "✗ SystemDS binary missing ($SYSTEMDS_CMD)" >&2
+    ok=false
+  else
+    echo "✓ SystemDS binary found: $SYSTEMDS_CMD"
+  fi
+
+  if [[ -z "$PSQL_EXEC" || ! -x "$PSQL_EXEC" ]]; then
+    echo "✗ psql not found (tried common paths)" >&2
+    echo "  PostgreSQL benchmarks will be skipped" >&2
+    PSQL_EXEC=""
+  else
+    echo "✓ psql found: $PSQL_EXEC"
+    if ! "$PSQL_EXEC" -U "$POSTGRES_USER" -h "$POSTGRES_HOST" -d "$POSTGRES_DB" -c "SELECT 1" >/dev/null 2>&1; then
+      echo "✗ Could not connect to PostgreSQL database ($POSTGRES_DB)" >&2
+      echo "  PostgreSQL benchmarks will be skipped" >&2
+      PSQL_EXEC=""
+    else
+      echo "✓ PostgreSQL database connection successful"
+    fi
+  fi
+
+  if [[ -z "$DUCKDB_EXEC" || ! -x "$DUCKDB_EXEC" ]]; then
+    echo "✗ DuckDB not found (tried common paths)" >&2
+    echo "  DuckDB benchmarks will be skipped" >&2
+    DUCKDB_EXEC=""
+  else
+    echo "✓ DuckDB found: $DUCKDB_EXEC"
+    if [[ ! -f "$DUCKDB_DB_PATH" ]]; then
+      echo "✗ DuckDB database missing ($DUCKDB_DB_PATH)" >&2
+      echo "  DuckDB benchmarks will be skipped" >&2
+      DUCKDB_EXEC=""
+    elif ! "$DUCKDB_EXEC" "$DUCKDB_DB_PATH" -c "SELECT 1" >/dev/null 2>&1; then
+      echo "✗ DuckDB database could not be opened" >&2
+      echo "  DuckDB benchmarks will be skipped" >&2
+      DUCKDB_EXEC=""
+    else
+      echo "✓ DuckDB database accessible"
+    fi
+  fi
+
+  if [[ ! -x "$SYSTEMDS_CMD" ]]; then
+    echo "Error: SystemDS is required but not found" >&2
+    exit 1
+  fi
+
+  echo ""
+}
+
+# Convert seconds to milliseconds
+sec_to_ms() {
+  awk -v sec="$1" 'BEGIN{printf "%.1f", sec * 1000}'
+}
+
+# Statistical functions for multiple measurements
+calculate_statistics() {
+  local values=("$@")
+  local n=${#values[@]}
+
+  if [[ $n -eq 0 ]]; then
+    echo "0|0|0"
+    return
+  fi
+
+  if [[ $n -eq 1 ]]; then
+    # mean|stdev|p95
+    printf '%.1f|0.0|%.1f\n' "${values[0]}" "${values[0]}"
+    return
+  fi
+
+  # Compute mean and population stdev with higher precision in a single awk pass
+  local mean_stdev
+  mean_stdev=$(printf '%s\n' "${values[@]}" | awk '
+    { x[NR]=$1; s+=$1 }
+    END {
+      n=NR; if(n==0){ printf "0|0"; exit }
+      m=s/n;
+      ss=0; for(i=1;i<=n;i++){ d=x[i]-m; ss+=d*d }
+      stdev=sqrt(ss/n);
+      printf "%.6f|%.6f", m, stdev
+    }')
+
+  local mean=$(echo "$mean_stdev" | cut -d'|' -f1)
+  local stdev=$(echo "$mean_stdev" | cut -d'|' -f2)
+
+  # Calculate p95 (nearest-rank: ceil(0.95*n))
+  local sorted_values=($(printf '%s\n' "${values[@]}" | sort -n))
+  local p95_index=$(awk -v n="$n" 'BEGIN{ idx = int(0.95*n + 0.999999); if(idx<1) idx=1; if(idx>n) idx=n; print idx-1 }')
+  local p95=${sorted_values[$p95_index]}
+
+  # Format to one decimal place
+  printf '%.1f|%.1f|%.1f\n' "$mean" "$stdev" "$p95"
+}
+
+# Format statistics for display
+format_statistics() {
+  local mean="$1"
+  local stdev="$2"
+  local p95="$3"
+  local repeats="$4"
+
+  if [[ $repeats -eq 1 ]]; then
+    echo "$mean"
+  else
+    # Calculate coefficient of variation (CV) as percentage
+    local cv_percent=0
+    if [[ $(awk -v mean="$mean" 'BEGIN{print (mean > 0)}') -eq 1 ]]; then
+      cv_percent=$(awk -v stdev="$stdev" -v mean="$mean" 'BEGIN{printf "%.1f", (stdev * 100) / mean}')
+    fi
+    echo "$mean (±${stdev}ms/${cv_percent}%, p95:${p95}ms)"
+  fi
+}
+
+# Format only the stats line (without the mean), e.g., "(±10.2ms/0.6%, p95:1740.0ms)"
+format_stats_only() {
+  local mean="$1"
+  local stdev="$2"
+  local p95="$3"
+  local repeats="$4"
+
+  if [[ $repeats -eq 1 ]]; then
+    echo ""
+    return
+  fi
+  # Only for numeric means
+  if ! [[ "$mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+    echo ""
+    return
+  fi
+  local cv_percent=0
+  if [[ $(awk -v mean="$mean" 'BEGIN{print (mean > 0)}') -eq 1 ]]; then
+    cv_percent=$(awk -v stdev="$stdev" -v mean="$mean" 'BEGIN{printf "%.1f", (stdev * 100) / mean}')
+  fi
+  echo "(±${stdev}ms/${cv_percent}%, p95:${p95}ms)"
+}
+
+# Format only the CV line (±stdev/CV%)
+format_cv_only() {
+  local mean="$1"; local stdev="$2"; local repeats="$3"
+  if [[ $repeats -eq 1 ]]; then echo ""; return; fi
+  if ! [[ "$mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo ""; return; fi
+  local cv_percent=0
+  if [[ $(awk -v mean="$mean" 'BEGIN{print (mean > 0)}') -eq 1 ]]; then
+    cv_percent=$(awk -v stdev="$stdev" -v mean="$mean" 'BEGIN{printf "%.1f", (stdev * 100) / mean}')
+  fi
+  echo "±${stdev}ms/${cv_percent}%"
+}
+
+# Format only the p95 line
+format_p95_only() {
+  local p95="$1"; local repeats="$2"
+  if [[ $repeats -eq 1 ]]; then echo ""; return; fi
+  if ! [[ "$p95" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo ""; return; fi
+  echo "p95:${p95}ms"
+}
+
+# Column widths for wide layout - optimized for 125-char terminals
+WIDE_COL_WIDTHS=(8 14 14 12 16 12 12 18)
+
+# Draw a grid line like +----------+----------------+...
+grid_line_wide() {
+  local parts=("+")
+  for w in "${WIDE_COL_WIDTHS[@]}"; do
+    parts+=("$(printf '%*s' "$((w+2))" '' | tr ' ' '-')+")
+  done
+  printf '%s\n' "${parts[*]}" | tr -d ' '
+}
+
+# Print a grid row with vertical separators using the wide layout widths
+grid_row_wide() {
+  local -a cells=("$@")
+  local cols=${#WIDE_COL_WIDTHS[@]}
+  while [[ ${#cells[@]} -lt $cols ]]; do
+    cells+=("")
+  done
+
+  # Build a printf format string that right-aligns numeric and statistic-like cells
+  # (numbers, lines starting with ± or p95, or containing p95/±) while leaving the
+  # first column (query) left-aligned for readability.
+  local fmt=""
+  for i in $(seq 0 $((cols-1))); do
+    local w=${WIDE_COL_WIDTHS[i]}
+    if [[ $i -eq 0 ]]; then
+      # Query name: left-align
+      fmt+="| %-${w}s"
+    else
+      local cell="${cells[i]}"
+      # Heuristic: right-align if the cell is a plain number or contains statistic markers
+      if [[ "$cell" =~ ^[[:space:]]*[0-9]+(\.[0-9]+)?[[:space:]]*$ ]] || [[ "$cell" == ±* ]] || [[ "$cell" == *'±'* ]] || [[ "$cell" == p95* ]] || [[ "$cell" == *'p95'* ]] || [[ "$cell" == \(* ]]; then
+        fmt+=" | %${w}s"
+      else
+        fmt+=" | %-${w}s"
+      fi
+    fi
+  done
+  fmt+=" |\n"
+
+  printf "$fmt" "${cells[@]}"
+}
+
+# Time a command and return real time in ms
+time_command_ms() {
+  local out
+  # Properly capture stderr from /usr/bin/time while suppressing stdout of the command
+  out=$({ /usr/bin/time -p "$@" > /dev/null; } 2>&1)
+  local real_sec=$(echo "$out" | awk '/^real /{print $2}')
+  if [[ -z "$real_sec" || ! "$real_sec" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+    echo "(error)"
+    return 1
+  fi
+  sec_to_ms "$real_sec"
+}
+
+# Time a command, capturing stdout to a file, and return real time in ms
+time_command_ms_capture() {
+  local stdout_file="$1"; shift
+  local out
+  out=$({ /usr/bin/time -p "$@" > "$stdout_file"; } 2>&1)
+  local real_sec=$(echo "$out" | awk '/^real /{print $2}')
+  if [[ -z "$real_sec" || ! "$real_sec" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+    echo "(error)"
+    return 1
+  fi
+  sec_to_ms "$real_sec"
+}
+
+# Run a SystemDS query and compute statistics
+run_systemds_avg() {
+  local dml="$1"
+  # Optional second parameter: path to write an error message if the test-run fails
+  local err_out_file="${2:-}"
+  local shell_times=()
+  local core_times=()
+  local core_have=false
+
+  # Change to project root directory so relative paths in DML work correctly
+  local original_dir="$(pwd)"
+  cd "$PROJECT_ROOT"
+
+  # First, test run to validate the query (avoids timing zero or errors later)
+  tmp_test=$(mktemp)
+  if $RUN_STATS; then
+    if ! "$SYSTEMDS_CMD" "$dml" -stats "${SYS_EXTRA_ARGS[@]}" "${NVARGS[@]}" > "$tmp_test" 2>&1; then
+      err_msg=$(sed -n '1,200p' "$tmp_test" | tr '\n' ' ')
+      echo "Error: SystemDS test run failed for $dml: $err_msg" >&2
+      # Write error message to provided error file for JSON capture
+      if [[ -n "$err_out_file" ]]; then printf '%s' "$err_msg" > "$err_out_file" || true; fi
+      rm -f "$tmp_test"
+      echo "(error)|0|0|(n/a)|0|0"
+      cd "$original_dir"; return
+    fi
+    err_msg=$(sed -n '/An Error Occurred :/,$ p' "$tmp_test" | sed -n '1,200p' | tr '\n' ' ')
+    if [[ -n "$err_msg" ]]; then
+      echo "Error: SystemDS reported runtime error for $dml: $err_msg" >&2
+      if [[ -n "$err_out_file" ]]; then printf '%s' "$err_msg" > "$err_out_file" || true; fi
+      rm -f "$tmp_test"
+      echo "(error)|0|0|(n/a)|0|0"
+      cd "$original_dir"; return
+    fi
+  else
+    if ! "$SYSTEMDS_CMD" "$dml" "${SYS_EXTRA_ARGS[@]}" "${NVARGS[@]}" > "$tmp_test" 2>&1; then
+      err_msg=$(sed -n '1,200p' "$tmp_test" | tr '\n' ' ')
+      echo "Error: SystemDS test run failed for $dml: $err_msg" >&2
+      if [[ -n "$err_out_file" ]]; then printf '%s' "$err_msg" > "$err_out_file" || true; fi
+      rm -f "$tmp_test"
+      echo "(error)|0|0|(n/a)|0|0"
+      cd "$original_dir"; return
+    fi
+    err_msg=$(sed -n '/An Error Occurred :/,$ p' "$tmp_test" | sed -n '1,200p' | tr '\n' ' ')
+    if [[ -n "$err_msg" ]]; then
+      echo "Error: SystemDS reported runtime error for $dml: $err_msg" >&2
+      if [[ -n "$err_out_file" ]]; then printf '%s' "$err_msg" > "$err_out_file" || true; fi
+      rm -f "$tmp_test"
+      echo "(error)|0|0|(n/a)|0|0"
+      cd "$original_dir"; return
+    fi
+  fi
+  rm -f "$tmp_test"
+
+  # Warmup runs
+  for ((w=1; w<=WARMUP; w++)); do
+    if $RUN_STATS; then
+      "$SYSTEMDS_CMD" "$dml" -stats "${SYS_EXTRA_ARGS[@]}" "${NVARGS[@]}" > /dev/null 2>&1 || true
+    else
+      "$SYSTEMDS_CMD" "$dml" "${SYS_EXTRA_ARGS[@]}" "${NVARGS[@]}" > /dev/null 2>&1 || true
+    fi
+  done
+
+  # Timed runs - collect all measurements
+  for ((i=1; i<=REPEATS; i++)); do
+    if $RUN_STATS; then
+      local shell_ms
+      local temp_file
+      temp_file=$(mktemp)
+      shell_ms=$(time_command_ms_capture "$temp_file" "$SYSTEMDS_CMD" "$dml" -stats "${SYS_EXTRA_ARGS[@]}" "${NVARGS[@]}") || {
+        rm -f "$temp_file"; cd "$original_dir"; echo "(error)|0|0|(n/a)|0|0"; return; }
+      shell_times+=("$shell_ms")
+
+      # Extract SystemDS internal timing from the same run
+      local internal_sec
+      internal_sec=$(awk '/Total execution time:/ {print $4}' "$temp_file" | tail -1 || true)
+      rm -f "$temp_file"
+      if [[ -n "$internal_sec" ]] && [[ "$internal_sec" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+        local core_ms
+        core_ms=$(awk -v sec="$internal_sec" 'BEGIN{printf "%.1f", sec * 1000}')
+        core_times+=("$core_ms")
+        core_have=true
+      fi
+    else
+      local shell_ms
+      shell_ms=$(time_command_ms "$SYSTEMDS_CMD" "$dml" "${SYS_EXTRA_ARGS[@]}" "${NVARGS[@]}") || { cd "$original_dir"; echo "(error)|0|0|(n/a)|0|0"; return; }
+      shell_times+=("$shell_ms")
+    fi
+  done
+
+  # Return to original directory
+  cd "$original_dir"
+
+  # Calculate statistics for shell times
+  local shell_stats
+  shell_stats=$(calculate_statistics "${shell_times[@]}")
+
+  # Calculate statistics for core times if available
+  local core_stats
+  if $RUN_STATS && $core_have && [[ ${#core_times[@]} -gt 0 ]]; then
+    core_stats=$(calculate_statistics "${core_times[@]}")
+  else
+    core_stats="(n/a)|0|0"
+  fi
+
+  echo "$shell_stats|$core_stats"
+}
+
+# Run a PostgreSQL query and compute statistics
+run_psql_avg_ms() {
+  local sql_file="$1"
+
+  # Check if PostgreSQL is available
+  if [[ -z "$PSQL_EXEC" ]]; then
+    echo "(unavailable)|0|0|(n/a)|0|0"
+    return
+  fi
+
+  # Test run first
+  "$PSQL_EXEC" -U "$POSTGRES_USER" -h "$POSTGRES_HOST" -d "$POSTGRES_DB" \
+    -v ON_ERROR_STOP=1 -q \
+    -c "SET max_parallel_workers=0; SET max_parallel_maintenance_workers=0; SET max_parallel_workers_per_gather=0; SET parallel_leader_participation=off;" \
+    -f "$sql_file" >/dev/null 2>/dev/null || {
+      echo "(error)|0|0|(n/a)|0|0"
+      return
+    }
+
+  local shell_times=()
+  local core_times=()
+  local core_have=false
+
+  for ((i=1; i<=REPEATS; i++)); do
+    # Wall-clock shell time
+    local ms
+    ms=$(time_command_ms "$PSQL_EXEC" -U "$POSTGRES_USER" -h "$POSTGRES_HOST" -d "$POSTGRES_DB" \
+      -v ON_ERROR_STOP=1 -q \
+      -c "SET max_parallel_workers=0; SET max_parallel_maintenance_workers=0; SET max_parallel_workers_per_gather=0; SET parallel_leader_participation=off;" \
+      -f "$sql_file" 2>/dev/null) || {
+        echo "(error)|0|0|(n/a)|0|0"
+        return
+      }
+    shell_times+=("$ms")
+
+    # Core execution time using EXPLAIN ANALYZE (if --stats enabled)
+    if $RUN_STATS; then
+      local tmp_explain
+      tmp_explain=$(mktemp)
+
+      # Create EXPLAIN ANALYZE version of the query
+      echo "SET max_parallel_workers=0; SET max_parallel_maintenance_workers=0; SET max_parallel_workers_per_gather=0; SET parallel_leader_participation=off;" > "$tmp_explain"
+      echo "EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)" >> "$tmp_explain"
+      cat "$sql_file" >> "$tmp_explain"
+
+      # Execute EXPLAIN ANALYZE and extract execution time
+      local explain_output core_ms
+      explain_output=$("$PSQL_EXEC" -U "$POSTGRES_USER" -h "$POSTGRES_HOST" -d "$POSTGRES_DB" \
+        -v ON_ERROR_STOP=1 -q -f "$tmp_explain" 2>/dev/null || true)
+
+      if [[ -n "$explain_output" ]]; then
+        # Extract "Execution Time: X.XXX ms" from EXPLAIN ANALYZE output
+        local exec_time_ms
+        exec_time_ms=$(echo "$explain_output" | grep -oE "Execution Time: [0-9]+\.[0-9]+" | grep -oE "[0-9]+\.[0-9]+" | head -1 || true)
+
+        if [[ -n "$exec_time_ms" ]] && [[ "$exec_time_ms" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+          core_ms=$(awk -v ms="$exec_time_ms" 'BEGIN{printf "%.1f", ms}')
+          core_times+=("$core_ms")
+          core_have=true
+        fi
+      fi
+
+      rm -f "$tmp_explain"
+    fi
+  done
+
+  # Build outputs
+  local shell_stats core_stats
+  shell_stats=$(calculate_statistics "${shell_times[@]}")
+  if $RUN_STATS && $core_have && [[ ${#core_times[@]} -gt 0 ]]; then
+    core_stats=$(calculate_statistics "${core_times[@]}")
+  else
+    core_stats="(n/a)|0|0"
+  fi
+  echo "$shell_stats|$core_stats"
+}
+
+# Run a DuckDB query and compute statistics
+run_duckdb_avg_ms() {
+  local sql_file="$1"
+
+  # Check if DuckDB is available
+  if [[ -z "$DUCKDB_EXEC" ]]; then
+    echo "(unavailable)|0|0|(n/a)|0|0"
+    return
+  fi
+
+  # Test run with minimal setup (no profiling)
+  local tmp_test
+  tmp_test=$(mktemp)
+  printf 'PRAGMA threads=1;\n' > "$tmp_test"
+  cat "$sql_file" >> "$tmp_test"
+  "$DUCKDB_EXEC" "$DUCKDB_DB_PATH" < "$tmp_test" >/dev/null 2>&1 || {
+    rm -f "$tmp_test"
+    echo "(error)|0|0|(n/a)|0|0"
+    return
+  }
+  rm -f "$tmp_test"
+
+  local shell_times=()
+  local core_times=()
+  local core_have=false
+
+  for ((i=1; i<=REPEATS; i++)); do
+    local tmp_sql iter_json
+    tmp_sql=$(mktemp)
+    if $RUN_STATS; then
+      # Enable JSON profiling per-run and write to a temporary file
+      iter_json=$(mktemp -t duckprof.XXXXXX).json
+      cat > "$tmp_sql" <<SQL
+PRAGMA threads=1;
+PRAGMA enable_profiling=json;
+PRAGMA profiling_output='$iter_json';
+SQL
+    else
+      echo 'PRAGMA threads=1;' > "$tmp_sql"
+    fi
+    cat "$sql_file" >> "$tmp_sql"
+
+    # Wall-clock shell time
+    local ms
+    ms=$(time_command_ms "$DUCKDB_EXEC" "$DUCKDB_DB_PATH" < "$tmp_sql") || {
+      rm -f "$tmp_sql" ${iter_json:+"$iter_json"}
+      echo "(error)|0|0|(n/a)|0|0"
+      return
+    }
+    shell_times+=("$ms")
+
+    # Parse core latency from JSON profile if available
+    if $RUN_STATS && [[ -n "${iter_json:-}" && -f "$iter_json" ]]; then
+      local core_sec
+      if command -v jq >/dev/null 2>&1; then
+        core_sec=$(jq -r '.latency // empty' "$iter_json" 2>/dev/null || true)
+      else
+        core_sec=$(grep -oE '"latency"\s*:\s*[0-9.]+' "$iter_json" 2>/dev/null | sed -E 's/.*:\s*//' | head -1 || true)
+      fi
+      if [[ -n "$core_sec" ]] && [[ "$core_sec" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+        local core_ms
+        core_ms=$(awk -v s="$core_sec" 'BEGIN{printf "%.1f", s*1000}')
+        core_times+=("$core_ms")
+        core_have=true
+      fi
+    fi
+
+    rm -f "$tmp_sql" ${iter_json:+"$iter_json"}
+  done
+
+  # Build outputs
+  local shell_stats core_stats
+  shell_stats=$(calculate_statistics "${shell_times[@]}")
+  if $RUN_STATS && $core_have && [[ ${#core_times[@]} -gt 0 ]]; then
+    core_stats=$(calculate_statistics "${core_times[@]}")
+  else
+    core_stats="(n/a)|0|0"
+  fi
+  echo "$shell_stats|$core_stats"
+}
+
+# Help function
+show_help() {
+  cat << 'EOF'
+Multi-Engine SSB Performance Benchmark Runner v1.0
+
+USAGE (from repo root):
+  scripts/ssb/shell/run_all_perf.sh [OPTIONS] [QUERIES...]
+
+OPTIONS:
+  -stats, --stats         Enable SystemDS internal statistics collection
+  -warmup=N, --warmup=N   Set number of warmup runs (default: 1)
+  -repeats=N, --repeats=N Set number of timing repetitions (default: 5)
+  -seed=N, --seed=N       Set random seed for reproducible results (default: auto-generated)
+  -stacked, --stacked     Use stacked, multi-line layout (best for narrow terminals)
+  -layout=MODE, --layout=MODE Set layout: auto|wide|stacked (default: auto)
+                          Note: --layout=stacked is equivalent to --stacked
+                                --layout=wide forces wide table layout
+  -input-dir=PATH, --input-dir=PATH Specify custom data directory (default: $PROJECT_ROOT/data)
+  -output-dir=PATH, --output-dir=PATH Specify custom output directory (default: $PROJECT_ROOT/scripts/ssb/shell/ssbOutputData/PerformanceData)
+  -h, -help, --help, --h  Show this help message
+  -v, -version, --version, --v Show version information
+
+QUERIES:
+  If no queries are specified, all available SSB queries (q*.dml) will be executed.
+  To run specific queries, provide their names (with or without .dml extension):
+    scripts/ssb/shell/run_all_perf.sh q1.1 q2.3 q4.1
+
+EXAMPLES (from repo root):
+  scripts/ssb/shell/run_all_perf.sh                          # Run full benchmark with all engines
+  scripts/ssb/shell/run_all_perf.sh --warmup=3 --repeats=10  # Custom warmup and repetition settings
+  scripts/ssb/shell/run_all_perf.sh -warmup=3 -repeats=10    # Same with single dashes
+  scripts/ssb/shell/run_all_perf.sh --stats                  # Enable SystemDS internal timing
+  scripts/ssb/shell/run_all_perf.sh --layout=wide            # Force wide table layout
+  scripts/ssb/shell/run_all_perf.sh --stacked                # Force stacked layout for narrow terminals
+  scripts/ssb/shell/run_all_perf.sh q1.1 q2.3                # Benchmark specific queries only
+  scripts/ssb/shell/run_all_perf.sh --seed=12345             # Reproducible benchmark run
+  scripts/ssb/shell/run_all_perf.sh --input-dir=/path/to/data  # Custom data directory
+  scripts/ssb/shell/run_all_perf.sh -input-dir=/path/to/data   # Same as above (single dash)
+  scripts/ssb/shell/run_all_perf.sh --output-dir=/tmp/results  # Custom output directory
+  scripts/ssb/shell/run_all_perf.sh -output-dir=/tmp/results   # Same as above (single dash)
+
+ENGINES:
+  - SystemDS: Machine learning platform with DML queries
+  - PostgreSQL: Industry-standard relational database (if available)
+  - DuckDB: High-performance analytical database (if available)
+
+OUTPUT:
+  Results are saved in CSV and JSON formats with comprehensive metadata:
+  - Performance timing statistics (mean, stdev, p95)
+  - Engine comparison and fastest detection
+  - System information and run configuration
+
+STATISTICAL OUTPUT FORMAT:
+  1824 (±10, p95:1840)
+    │     │       └── 95th percentile (worst-case bound)
+    │     └── Standard deviation (consistency measure)
+    └── Mean execution time (typical performance)
+
+For more information, see the documentation in scripts/ssb/README.md
+EOF
+}
+
+# Parse arguments
+RUN_STATS=false
+QUERIES=()
+SEED=""
+LAYOUT="auto"
+INPUT_DIR=""
+OUTPUT_DIR=""
+
+# Support both --opt=value and --opt value forms
+EXPECT_OPT=""
+for arg in "$@"; do
+  if [[ -n "$EXPECT_OPT" ]]; then
+    case "$EXPECT_OPT" in
+      seed)
+        SEED="$arg"
+        EXPECT_OPT=""
+        continue
+        ;;
+      input-dir)
+        INPUT_DIR="$arg"
+        EXPECT_OPT=""
+        continue
+        ;;
+      output-dir)
+        OUTPUT_DIR="$arg"
+        EXPECT_OPT=""
+        continue
+        ;;
+      warmup)
+        WARMUP="$arg"
+        if ! [[ "$WARMUP" =~ ^[0-9]+$ ]] || [[ "$WARMUP" -lt 0 ]]; then
+          echo "Error: --warmup requires a non-negative integer (e.g., --warmup 2)" >&2
+          exit 1
+        fi
+        EXPECT_OPT=""
+        continue
+        ;;
+      repeats)
+        REPEATS="$arg"
+        if ! [[ "$REPEATS" =~ ^[0-9]+$ ]] || [[ "$REPEATS" -lt 1 ]]; then
+          echo "Error: --repeats requires a positive integer (e.g., --repeats 5)" >&2
+          exit 1
+        fi
+        EXPECT_OPT=""
+        continue
+        ;;
+      layout)
+        LAYOUT="$arg"
+        if [[ "$LAYOUT" != "auto" && "$LAYOUT" != "wide" && "$LAYOUT" != "stacked" ]]; then
+          echo "Error: --layout requires one of: auto, wide, stacked (e.g., --layout wide)" >&2
+          exit 1
+        fi
+        EXPECT_OPT=""
+        continue
+        ;;
+    esac
+  fi
+
+  if [[ "$arg" == "--help" || "$arg" == "-help" || "$arg" == "-h" || "$arg" == "--h" ]]; then
+    show_help
+    exit 0
+  elif [[ "$arg" == "--version" || "$arg" == "-version" || "$arg" == "-v" || "$arg" == "--v" ]]; then
+    echo "Multi-Engine SSB Performance Benchmark Runner v1.0"
+    echo "First Public Release: September 5, 2025"
+    exit 0
+  elif [[ "$arg" == "--stats" || "$arg" == "-stats" ]]; then
+    RUN_STATS=true
+  elif [[ "$arg" == --seed=* || "$arg" == -seed=* ]]; then
+    SEED="${arg#*seed=}"
+  elif [[ "$arg" == "--seed" || "$arg" == "-seed" ]]; then
+    EXPECT_OPT="seed"
+  elif [[ "$arg" == --warmup=* || "$arg" == -warmup=* ]]; then
+    WARMUP="${arg#*warmup=}"
+    if ! [[ "$WARMUP" =~ ^[0-9]+$ ]] || [[ "$WARMUP" -lt 0 ]]; then
+      echo "Error: -warmup/--warmup requires a non-negative integer (e.g., -warmup=2)" >&2
+      exit 1
+    fi
+  elif [[ "$arg" == --input-dir=* || "$arg" == -input-dir=* ]]; then
+    INPUT_DIR="${arg#*input-dir=}"
+  elif [[ "$arg" == "--input-dir" || "$arg" == "-input-dir" ]]; then
+    EXPECT_OPT="input-dir"
+  elif [[ "$arg" == --output-dir=* || "$arg" == -output-dir=* ]]; then
+    OUTPUT_DIR="${arg#*output-dir=}"
+  elif [[ "$arg" == "--output-dir" || "$arg" == "-output-dir" ]]; then
+    EXPECT_OPT="output-dir"
+  elif [[ "$arg" == "--warmup" || "$arg" == "-warmup" ]]; then
+    EXPECT_OPT="warmup"
+  elif [[ "$arg" == --repeats=* || "$arg" == -repeats=* ]]; then
+    REPEATS="${arg#*repeats=}"
+    if ! [[ "$REPEATS" =~ ^[0-9]+$ ]] || [[ "$REPEATS" -lt 1 ]]; then
+      echo "Error: -repeats/--repeats requires a positive integer (e.g., -repeats=5)" >&2
+      exit 1
+    fi
+  elif [[ "$arg" == "--repeats" || "$arg" == "-repeats" ]]; then
+    EXPECT_OPT="repeats"
+  elif [[ "$arg" == "--stacked" || "$arg" == "-stacked" ]]; then
+    LAYOUT="stacked"
+  elif [[ "$arg" == --layout=* || "$arg" == -layout=* ]]; then
+    LAYOUT="${arg#*layout=}"
+    if [[ "$LAYOUT" != "auto" && "$LAYOUT" != "wide" && "$LAYOUT" != "stacked" ]]; then
+      echo "Error: -layout/--layout requires one of: auto, wide, stacked (e.g., --layout=wide)" >&2
+      exit 1
+    fi
+  elif [[ "$arg" == "--layout" || "$arg" == "-layout" ]]; then
+    EXPECT_OPT="layout"
+  else
+    # Check if argument looks like an unrecognized option (starts with dash)
+    if [[ "$arg" == -* ]]; then
+      echo "Error: Unrecognized option '$arg'" >&2
+      echo "Use --help or -h to see available options." >&2
+      exit 1
+    else
+      # Treat as query name
+      QUERIES+=( "$(echo "$arg" | tr '.' '_')" )
+    fi
+  fi
+ done
+
+# If the last option expected a value but none was provided
+if [[ -n "$EXPECT_OPT" ]]; then
+  case "$EXPECT_OPT" in
+    seed) echo "Error: -seed/--seed requires a value (e.g., -seed=12345)" >&2 ;;
+    warmup) echo "Error: -warmup/--warmup requires a value (e.g., -warmup=2)" >&2 ;;
+    repeats) echo "Error: -repeats/--repeats requires a value (e.g., -repeats=5)" >&2 ;;
+    layout) echo "Error: -layout/--layout requires a value (e.g., -layout=wide)" >&2 ;;
+  esac
+  exit 1
+fi
+
+# Generate seed if not provided
+if [[ -z "$SEED" ]]; then
+  SEED=$((RANDOM * 32768 + RANDOM))
+fi
+if [[ ${#QUERIES[@]} -eq 0 ]]; then
+  for f in "$QUERY_DIR"/q*.dml; do
+    [[ -e "$f" ]] || continue
+    bname="$(basename "$f")"
+    QUERIES+=( "${bname%.dml}" )
+  done
+fi
+
+# Set data directory
+if [[ -z "$INPUT_DIR" ]]; then
+  INPUT_DIR="$PROJECT_ROOT/data"
+fi
+
+# Set output directory
+if [[ -z "$OUTPUT_DIR" ]]; then
+  OUTPUT_DIR="$PROJECT_ROOT/scripts/ssb/shell/ssbOutputData/PerformanceData"
+fi
+
+# Normalize paths by removing trailing slashes
+INPUT_DIR="${INPUT_DIR%/}"
+OUTPUT_DIR="${OUTPUT_DIR%/}"
+
+# Pass input directory to DML scripts via SystemDS named arguments
+NVARGS=( -nvargs "input_dir=${INPUT_DIR}" )
+
+# Validate data directory
+if [[ ! -d "$INPUT_DIR" ]]; then
+  echo "Error: Data directory '$INPUT_DIR' does not exist." >&2
+  echo "Please ensure the directory exists or specify a different path with -input-dir." >&2
+  exit 1
+fi
+
+# Ensure output directory exists
+mkdir -p "$OUTPUT_DIR"
+
+# Metadata collection functions
+collect_system_metadata() {
+  local timestamp hostname systemds_version jdk_version postgres_version duckdb_version cpu_info ram_info
+
+  # Basic system info
+  timestamp=$(date -u '+%Y-%m-%d %H:%M:%S UTC')
+  hostname=$(hostname 2>/dev/null || echo "unknown")
+
+  # SystemDS version
+  if [[ -x "$SYSTEMDS_CMD" ]]; then
+    # Try to get version from pom.xml first
+    if [[ -f "$PROJECT_ROOT/pom.xml" ]]; then
+      systemds_version=$(grep -A1 '<groupId>org.apache.systemds</groupId>' "$PROJECT_ROOT/pom.xml" | grep '<version>' | sed 's/.*<version>\(.*\)<\/version>.*/\1/' | head -1 2>/dev/null || echo "unknown")
+    else
+      systemds_version="unknown"
+    fi
+
+    # If pom.xml method failed, try alternative methods
+    if [[ "$systemds_version" == "unknown" ]]; then
+      # Try to extract from SystemDS JAR manifest
+      if [[ -f "$PROJECT_ROOT/target/systemds.jar" ]]; then
+        systemds_version=$(unzip -p "$PROJECT_ROOT/target/systemds.jar" META-INF/MANIFEST.MF 2>/dev/null | grep "Implementation-Version" | cut -d: -f2 | tr -d ' ' || echo "unknown")
+      else
+        # Try to find any SystemDS JAR and extract version
+        local jar_file=$(find "$PROJECT_ROOT" -name "systemds*.jar" | head -1 2>/dev/null)
+        if [[ -n "$jar_file" ]]; then
+          systemds_version=$(unzip -p "$jar_file" META-INF/MANIFEST.MF 2>/dev/null | grep "Implementation-Version" | cut -d: -f2 | tr -d ' ' || echo "unknown")
+        else
+          systemds_version="unknown"
+        fi
+      fi
+    fi
+  else
+    systemds_version="unknown"
+  fi
+
+  # JDK version
+  if command -v java >/dev/null 2>&1; then
+    jdk_version=$(java -version 2>&1 | grep -v "Picked up" | head -1 | sed 's/.*"\(.*\)".*/\1/' || echo "unknown")
+  else
+    jdk_version="unknown"
+  fi
+
+  # PostgreSQL version
+  if command -v psql >/dev/null 2>&1; then
+    postgres_version=$(psql --version 2>/dev/null | head -1 || echo "not available")
+  else
+    postgres_version="not available"
+  fi
+
+  # DuckDB version
+  if command -v duckdb >/dev/null 2>&1; then
+    duckdb_version=$(duckdb --version 2>/dev/null || echo "not available")
+  else
+    duckdb_version="not available"
+  fi
+
+  # System resources
+  if [[ "$(uname)" == "Darwin" ]]; then
+    # macOS
+    cpu_info=$(sysctl -n machdep.cpu.brand_string 2>/dev/null || echo "unknown")
+    ram_info=$(( $(sysctl -n hw.memsize 2>/dev/null || echo 0) / 1024 / 1024 / 1024 ))GB
+  else
+    # Linux
+    cpu_info=$(grep "model name" /proc/cpuinfo | head -1 | cut -d: -f2- | sed 's/^ *//' 2>/dev/null || echo "unknown")
+    ram_info=$(( $(grep MemTotal /proc/meminfo | awk '{print $2}' 2>/dev/null || echo 0) / 1024 / 1024 ))GB
+  fi
+
+  # Store metadata globally
+  RUN_TIMESTAMP="$timestamp"
+  RUN_HOSTNAME="$hostname"
+  RUN_SYSTEMDS_VERSION="$systemds_version"
+  RUN_JDK_VERSION="$jdk_version"
+  RUN_POSTGRES_VERSION="$postgres_version"
+  RUN_DUCKDB_VERSION="$duckdb_version"
+  RUN_CPU_INFO="$cpu_info"
+  RUN_RAM_INFO="$ram_info"
+}
+
+collect_data_metadata() {
+  # Check for SSB data directory and get basic stats
+  local ssb_data_dir="$INPUT_DIR"
+  local json_parts=()
+  local display_parts=()
+
+  if [[ -d "$ssb_data_dir" ]]; then
+    # Try to get row counts from data files (if they exist)
+    for table in customer part supplier date; do
+      local file="$ssb_data_dir/${table}.tbl"
+      if [[ -f "$file" ]]; then
+        local count=$(wc -l < "$file" 2>/dev/null | tr -d ' ' || echo "0")
+        json_parts+=("    \"$table\": \"$count\"")
+        display_parts+=("$table:$count")
+      fi
+    done
+    # Check for any lineorder*.tbl file (SSB fact table)
+    local lineorder_file=$(find "$ssb_data_dir" -name "lineorder*.tbl" -type f | head -1)
+    if [[ -n "$lineorder_file" && -f "$lineorder_file" ]]; then
+      local count=$(wc -l < "$lineorder_file" 2>/dev/null | tr -d ' ' || echo "0")
+      json_parts+=("    \"lineorder\": \"$count\"")
+      display_parts+=("lineorder:$count")
+    fi
+  fi
+
+  if [[ ${#json_parts[@]} -eq 0 ]]; then
+    RUN_DATA_INFO='"No data files found"'
+    RUN_DATA_DISPLAY="No data files found"
+  else
+    # Join array elements with commas and newlines, wrap in braces for JSON
+    local formatted_json="{\n"
+    for i in "${!json_parts[@]}"; do
+      formatted_json+="${json_parts[$i]}"
+      if [[ $i -lt $((${#json_parts[@]} - 1)) ]]; then
+        formatted_json+=",\n"
+      else
+        formatted_json+="\n"
+      fi
+    done
+    formatted_json+="  }"
+    RUN_DATA_INFO="$formatted_json"
+
+    # Join with spaces for display
+    local IFS=" "
+    RUN_DATA_DISPLAY="${display_parts[*]}"
+  fi
+}
+
+print_metadata_header() {
+  echo "=================================================================================="
+  echo "                      MULTI-ENGINE PERFORMANCE BENCHMARK METADATA"
+  echo "=================================================================================="
+  echo "Timestamp:       $RUN_TIMESTAMP"
+  echo "Hostname:        $RUN_HOSTNAME"
+  echo "Seed:            $SEED"
+  echo
+  echo "Software Versions:"
+  echo "  SystemDS:      $RUN_SYSTEMDS_VERSION"
+  echo "  JDK:           $RUN_JDK_VERSION"
+  echo "  PostgreSQL:    $RUN_POSTGRES_VERSION"
+  echo "  DuckDB:        $RUN_DUCKDB_VERSION"
+  echo
+  echo "System Resources:"
+  echo "  CPU:           $RUN_CPU_INFO"
+  echo "  RAM:           $RUN_RAM_INFO"
+  echo
+  echo "Data Build Info:"
+  echo "  SSB Data:      $RUN_DATA_DISPLAY"
+  echo
+  echo "Run Configuration:"
+  echo "  Statistics:    $(if $RUN_STATS; then echo "enabled"; else echo "disabled"; fi)"
+  echo "  Queries:       ${#QUERIES[@]} selected"
+  echo "  Warmup Runs:   $WARMUP"
+  echo "  Repeat Runs:   $REPEATS"
+  echo "=================================================================================="
+  echo
+}
+
+# Progress indicator function
+progress_indicator() {
+  local query_name="$1"
+  local stage="$2"
+  # Use terminal width for proper clearing, fallback to 120 chars if tput fails
+  local term_width
+  term_width=$(tput cols 2>/dev/null || echo 120)
+  local spaces=$(printf "%*s" "$term_width" "")
+  echo -ne "\r$spaces\r$query_name: Running $stage..."
+}
+
+# Clear progress line function
+clear_progress() {
+  local term_width
+  term_width=$(tput cols 2>/dev/null || echo 120)
+  local spaces=$(printf "%*s" "$term_width" "")
+  echo -ne "\r$spaces\r"
+}
+
+# Main execution
+# Collect metadata
+collect_system_metadata
+collect_data_metadata
+
+# Print metadata header
+print_metadata_header
+
+verify_environment
+echo
+echo "NOTE (macOS): You cannot drop OS caches like Linux (sync; echo 3 > /proc/sys/vm/drop_caches)."
+echo "We mitigate with warm-up runs and repeated averages to ensure consistent measurements."
+echo
+echo "INTERPRETATION GUIDE:"
+echo "- SystemDS Shell (ms): Total execution time including JVM startup, I/O, and computation"
+echo "- SystemDS Core (ms):  Pure computation time excluding JVM overhead (only with --stats)"
+echo "- PostgreSQL (ms):     Single-threaded execution time with parallel workers disabled"
+echo "- PostgreSQL Core (ms): Query execution time from EXPLAIN ANALYZE (only with --stats)"
+echo "- DuckDB (ms):         Single-threaded execution time with threads=1 pragma"
+echo "- DuckDB Core (ms):    Engine-internal latency from JSON profiling (with --stats)"
+echo "- (missing):           SQL file not found for this query"
+echo "- (n/a):               Core timing unavailable (run with --stats flag for internal timing)"
+echo
+echo "NOTE: All engines use single-threaded execution for fair comparison."
+echo "      Multiple runs with averaging provide statistical reliability."
+echo
+echo "Single-threaded execution; warm-up runs: $WARMUP, timed runs: $REPEATS"
+echo "Row 1 shows mean (ms); Row 2 shows ±stdev/CV; Row 3 shows p95 (ms)."
+echo "Core execution times available for all engines with --stats flag."
+echo
+term_width=$(tput cols 2>/dev/null || echo 120)
+if [[ "$LAYOUT" == "auto" ]]; then
+  if [[ $term_width -ge 140 ]]; then
+    LAYOUT_MODE="wide"
+  else
+    LAYOUT_MODE="stacked"
+  fi
+else
+  LAYOUT_MODE="$LAYOUT"
+fi
+
+# If the user requested wide layout but the terminal is too narrow, fall back to stacked
+if [[ "$LAYOUT_MODE" == "wide" ]]; then
+  # compute total printable width: sum(widths) + 3*cols + 1 (accounting for separators)
+  sumw=0
+  for w in "${WIDE_COL_WIDTHS[@]}"; do sumw=$((sumw + w)); done
+  cols=${#WIDE_COL_WIDTHS[@]}
+  total_width=$((sumw + 3*cols + 1))
+  if [[ $total_width -gt $term_width ]]; then
+    # Try to scale columns down proportionally to fit terminal width
+    reserved=$((3*cols + 1))
+    avail=$((term_width - reserved))
+  if [[ $avail -le 0 ]]; then
+    :
+  else
+      # Minimum sensible widths per column (keep labels readable)
+      MIN_COL_WIDTHS=(6 8 8 6 10 6 6 16)
+      # Start with proportional distribution
+      declare -a new_widths=()
+      for w in "${WIDE_COL_WIDTHS[@]}"; do
+        nw=$(( w * avail / sumw ))
+        if [[ $nw -lt 1 ]]; then nw=1; fi
+        new_widths+=("$nw")
+      done
+      # Enforce minimums
+      sum_new=0
+      for i in "${!new_widths[@]}"; do
+        if [[ ${new_widths[i]} -lt ${MIN_COL_WIDTHS[i]:-4} ]]; then
+          new_widths[i]=${MIN_COL_WIDTHS[i]:-4}
+        fi
+        sum_new=$((sum_new + new_widths[i]))
+      done
+      # If even minimums exceed available, fallback to stacked
+      if [[ $sum_new -gt $avail ]]; then
+        :
+      else
+        # Distribute remaining columns' widths left-to-right
+        rem=$((avail - sum_new))
+        i=0
+        while [[ $rem -gt 0 ]]; do
+          new_widths[i]=$((new_widths[i] + 1))
+          rem=$((rem - 1))
+          i=$(( (i + 1) % cols ))
+        done
+        # Replace WIDE_COL_WIDTHS with the scaled values for printing
+        WIDE_COL_WIDTHS=("${new_widths[@]}")
+        # Recompute total_width for logging
+        sumw=0
+        for w in "${WIDE_COL_WIDTHS[@]}"; do sumw=$((sumw + w)); done
+        total_width=$((sumw + reserved))
+        echo "Info: scaled wide layout to fit terminal ($term_width cols): table width $total_width"
+      fi
+    fi
+  fi
+fi
+
+if [[ "$LAYOUT_MODE" == "wide" ]]; then
+  grid_line_wide
+  grid_row_wide \
+    "Query" \
+    "SysDS Shell" "SysDS Core" \
+    "PostgreSQL" "PostgreSQL Core" \
+    "DuckDB" "DuckDB Core" \
+    "Fastest"
+  grid_row_wide "" "mean" "mean" "mean" "mean" "mean" "mean" ""
+  grid_row_wide "" "±/CV" "±/CV" "±/CV" "±/CV" "±/CV" "±/CV" ""
+  grid_row_wide "" "p95" "p95" "p95" "p95" "p95" "p95" ""
+  grid_line_wide
+else
+  echo "================================================================================"
+  echo "Stacked layout (use --layout=wide for table view)."
+  echo "Row 1 shows mean (ms); Row 2 shows (±stdev/CV, p95)."
+  echo "--------------------------------------------------------------------------------"
+fi
+# Prepare output file paths and write CSV header with comprehensive metadata
+# Ensure results directory exists and create timestamped filenames
+RESULT_DIR="$OUTPUT_DIR"
+mkdir -p "$RESULT_DIR"
+RESULT_BASENAME="ssb_results_$(date -u +%Y%m%dT%H%M%SZ)"
+RESULT_CSV="$RESULT_DIR/${RESULT_BASENAME}.csv"
+RESULT_JSON="$RESULT_DIR/${RESULT_BASENAME}.json"
+
+{
+  echo "# Multi-Engine Performance Benchmark Results"
+  echo "# Timestamp: $RUN_TIMESTAMP"
+  echo "# Hostname: $RUN_HOSTNAME"
+  echo "# Seed: $SEED"
+  echo "# SystemDS: $RUN_SYSTEMDS_VERSION"
+  echo "# JDK: $RUN_JDK_VERSION"
+  echo "# PostgreSQL: $RUN_POSTGRES_VERSION"
+  echo "# DuckDB: $RUN_DUCKDB_VERSION"
+  echo "# CPU: $RUN_CPU_INFO"
+  echo "# RAM: $RUN_RAM_INFO"
+  echo "# Data: $RUN_DATA_DISPLAY"
+  echo "# Warmup: $WARMUP, Repeats: $REPEATS"
+  echo "# Statistics: $(if $RUN_STATS; then echo "enabled"; else echo "disabled"; fi)"
+  echo "#"
+  echo "query,systemds_shell_display,systemds_shell_mean,systemds_shell_stdev,systemds_shell_p95,systemds_core_display,systemds_core_mean,systemds_core_stdev,systemds_core_p95,postgres_display,postgres_mean,postgres_stdev,postgres_p95,postgres_core_display,postgres_core_mean,postgres_core_stdev,postgres_core_p95,duckdb_display,duckdb_mean,duckdb_stdev,duckdb_p95,duckdb_core_display,duckdb_core_mean,duckdb_core_stdev,duckdb_core_p95,fastest"
+} > "$RESULT_CSV"
+for base in "${QUERIES[@]}"; do
+  # Show progress indicator for SystemDS
+  progress_indicator "$base" "SystemDS"
+
+  dml_path="$QUERY_DIR/${base}.dml"
+  # Parse SystemDS results: shell_mean|shell_stdev|shell_p95|core_mean|core_stdev|core_p95
+  # Capture potential SystemDS test-run error messages for JSON reporting
+  tmp_err_msg=$(mktemp)
+  systemds_result="$(run_systemds_avg "$dml_path" "$tmp_err_msg")"
+  # Read any captured error message
+  sysds_err_text="$(sed -n '1,200p' "$tmp_err_msg" 2>/dev/null | tr '\n' ' ' || true)"
+  rm -f "$tmp_err_msg"
+  IFS='|' read -r sd_shell_mean sd_shell_stdev sd_shell_p95 sd_core_mean sd_core_stdev sd_core_p95 <<< "$systemds_result"
+
+  # Format SystemDS results for display
+  if [[ "$sd_shell_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+    sd_shell_display=$(format_statistics "$sd_shell_mean" "$sd_shell_stdev" "$sd_shell_p95" "$REPEATS")
+  else
+  sd_shell_display="$sd_shell_mean"
+    sd_shell_stdev="0"
+    sd_shell_p95="0"
+  fi
+  if [[ "$sd_core_mean" == "(n/a)" ]]; then
+    sd_core_display="(n/a)"
+  else
+    sd_core_display=$(format_statistics "$sd_core_mean" "$sd_core_stdev" "$sd_core_p95" "$REPEATS")
+  fi
+
+  sql_name="${base//_/.}.sql"
+  sql_path="$SQL_DIR/$sql_name"
+  pg_display="(missing)"
+  duck_display="(missing)"
+
+  if [[ -n "$PSQL_EXEC" && -f "$sql_path" ]]; then
+    progress_indicator "$base" "PostgreSQL"
+    pg_result="$(run_psql_avg_ms "$sql_path")"
+    IFS='|' read -r pg_mean pg_stdev pg_p95 pg_core_mean pg_core_stdev pg_core_p95 <<< "$pg_result"
+    if [[ "$pg_mean" == "(unavailable)" || "$pg_mean" == "(error)" ]]; then
+      pg_display="$pg_mean"
+      pg_core_display="$pg_mean"
+      pg_stdev="0"
+      pg_p95="0"
+      pg_core_mean="(n/a)"
+      pg_core_stdev="0"
+      pg_core_p95="0"
+    else
+      pg_display=$(format_statistics "$pg_mean" "$pg_stdev" "$pg_p95" "$REPEATS")
+      if [[ "$pg_core_mean" != "(n/a)" ]]; then
+        pg_core_display=$(format_statistics "$pg_core_mean" "$pg_core_stdev" "$pg_core_p95" "$REPEATS")
+      else
+        pg_core_display="(n/a)"
+      fi
+    fi
+  elif [[ -z "$PSQL_EXEC" ]]; then
+    pg_display="(unavailable)"
+    pg_core_display="(unavailable)"
+    pg_mean="(unavailable)"
+    pg_core_mean="(unavailable)"
+    pg_stdev="0"
+    pg_p95="0"
+    pg_core_stdev="0"
+    pg_core_p95="0"
+  else
+    pg_display="(missing)"
+    pg_core_display="(missing)"
+    pg_mean="(missing)"
+    pg_core_mean="(missing)"
+    pg_stdev="0"
+    pg_p95="0"
+    pg_core_stdev="0"
+    pg_core_p95="0"
+  fi
+
+  if [[ -n "$DUCKDB_EXEC" && -f "$sql_path" ]]; then
+    progress_indicator "$base" "DuckDB"
+    duck_result="$(run_duckdb_avg_ms "$sql_path")"
+    IFS='|' read -r duck_mean duck_stdev duck_p95 duck_core_mean duck_core_stdev duck_core_p95 <<< "$duck_result"
+    if [[ "$duck_mean" == "(unavailable)" || "$duck_mean" == "(error)" ]]; then
+      duck_display="$duck_mean"
+      duck_stdev="0"
+      duck_p95="0"
+      duck_core_display="(n/a)"
+      duck_core_mean="(n/a)"
+      duck_core_stdev="0"
+      duck_core_p95="0"
+    else
+      duck_display=$(format_statistics "$duck_mean" "$duck_stdev" "$duck_p95" "$REPEATS")
+      if [[ "$duck_core_mean" == "(n/a)" ]]; then
+        duck_core_display="(n/a)"
+      else
+        duck_core_display=$(format_statistics "$duck_core_mean" "$duck_core_stdev" "$duck_core_p95" "$REPEATS")
+      fi
+    fi
+  elif [[ -z "$DUCKDB_EXEC" ]]; then
+    duck_display="(unavailable)"
+    duck_mean="(unavailable)"
+    duck_stdev="0"
+    duck_p95="0"
+    duck_core_display="(unavailable)"
+    duck_core_mean="(unavailable)"
+    duck_core_stdev="0"
+    duck_core_p95="0"
+  else
+    duck_display="(missing)"
+    duck_mean="(missing)"
+    duck_stdev="0"
+    duck_p95="0"
+    duck_core_display="(missing)"
+    duck_core_mean="(missing)"
+    duck_core_stdev="0"
+    duck_core_p95="0"
+  fi
+
+  # Determine fastest engine based on mean values
+  fastest=""
+  min_ms=999999999
+  for engine in systemds pg duck; do
+    val=""
+    eng_name=""
+    case "$engine" in
+      systemds) val="$sd_shell_mean"; eng_name="SystemDS";;
+      pg) val="$pg_mean"; eng_name="PostgreSQL";;
+      duck) val="$duck_mean"; eng_name="DuckDB";;
+    esac
+    # Check if value is a valid number (including decimal)
+    if [[ "$val" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+      # Use awk for floating point comparison
+      if [[ $(awk -v val="$val" -v min="$min_ms" 'BEGIN{print (val < min)}') -eq 1 ]]; then
+        min_ms=$(awk -v val="$val" 'BEGIN{printf "%.1f", val}')
+        fastest="$eng_name"
+      elif [[ $(awk -v val="$val" -v min="$min_ms" 'BEGIN{print (val == min)}') -eq 1 ]] && [[ -n "$fastest" ]]; then
+        fastest="$fastest+$eng_name"  # Show ties
+      fi
+    fi
+  done
+  [[ -z "$fastest" ]] && fastest="(n/a)"
+
+  # Determine SystemDS per-query status and include any error message captured
+  systemds_status="success"
+  systemds_error_message=null
+  if [[ "$sd_shell_mean" == "(error)" ]] || [[ -n "$sysds_err_text" ]]; then
+    systemds_status="error"
+    if [[ -n "$sysds_err_text" ]]; then
+      # Escape quotes for JSON embedding
+      esc=$(printf '%s' "$sysds_err_text" | sed -e 's/"/\\"/g')
+      systemds_error_message="\"$esc\""
+    else
+      systemds_error_message="\"SystemDS reported an error during test-run\""
+    fi
+  fi
+
+  # Prepare mean-only and stats-only cells
+  # Means: use numeric mean when available; otherwise use existing display label (unavailable/missing)
+  sd_shell_mean_cell=$([[ "$sd_shell_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]] && echo "$sd_shell_mean" || echo "$sd_shell_display")
+  sd_core_mean_cell=$([[ "$sd_core_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]] && echo "$sd_core_mean" || echo "$sd_core_display")
+  pg_mean_cell=$([[ "$pg_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]] && echo "$pg_mean" || echo "$pg_display")
+  pg_core_mean_cell=$([[ "$pg_core_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]] && echo "$pg_core_mean" || echo "$pg_core_display")
+  duck_mean_cell=$([[ "$duck_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]] && echo "$duck_mean" || echo "$duck_display")
+  duck_core_mean_cell=$([[ "$duck_core_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]] && echo "$duck_core_mean" || echo "$duck_core_display")
+
+  # Stats lines split: CV and p95
+  sd_shell_cv_cell=$(format_cv_only "$sd_shell_mean" "$sd_shell_stdev" "$REPEATS")
+  sd_core_cv_cell=$(format_cv_only "$sd_core_mean" "$sd_core_stdev" "$REPEATS")
+  pg_cv_cell=$(format_cv_only "$pg_mean" "$pg_stdev" "$REPEATS")
+  pg_core_cv_cell=$(format_cv_only "$pg_core_mean" "$pg_core_stdev" "$REPEATS")
+  duck_cv_cell=$(format_cv_only "$duck_mean" "$duck_stdev" "$REPEATS")
+  duck_core_cv_cell=$(format_cv_only "$duck_core_mean" "$duck_core_stdev" "$REPEATS")
+
+  sd_shell_p95_cell=$(format_p95_only "$sd_shell_p95" "$REPEATS")
+  sd_core_p95_cell=$(format_p95_only "$sd_core_p95" "$REPEATS")
+  pg_p95_cell=$(format_p95_only "$pg_p95" "$REPEATS")
+  pg_core_p95_cell=$(format_p95_only "$pg_core_p95" "$REPEATS")
+  duck_p95_cell=$(format_p95_only "$duck_p95" "$REPEATS")
+  duck_core_p95_cell=$(format_p95_only "$duck_core_p95" "$REPEATS")
+
+  # Clear progress line and display final results
+  clear_progress
+  if [[ "$LAYOUT_MODE" == "wide" ]]; then
+    # Three-line table style with grid separators
+    grid_row_wide \
+      "$base" \
+      "$sd_shell_mean_cell" "$sd_core_mean_cell" \
+      "$pg_mean_cell" "$pg_core_mean_cell" \
+      "$duck_mean_cell" "$duck_core_mean_cell" \
+      "$fastest"
+    grid_row_wide \
+      "" \
+      "$sd_shell_cv_cell" "$sd_core_cv_cell" \
+      "$pg_cv_cell" "$pg_core_cv_cell" \
+      "$duck_cv_cell" "$duck_core_cv_cell" \
+      ""
+    grid_row_wide \
+      "" \
+      "$sd_shell_p95_cell" "$sd_core_p95_cell" \
+      "$pg_p95_cell" "$pg_core_p95_cell" \
+      "$duck_p95_cell" "$duck_core_p95_cell" \
+      ""
+    grid_line_wide
+  else
+    # Stacked layout for narrow terminals
+    echo "Query  : $base    Fastest: $fastest"
+    printf '  %-20s %s\n' "SystemDS Shell:" "$sd_shell_mean_cell"
+    [[ -n "$sd_shell_cv_cell" ]] && printf '  %-20s %s\n' "" "$sd_shell_cv_cell"
+    [[ -n "$sd_shell_p95_cell" ]] && printf '  %-20s %s\n' "" "$sd_shell_p95_cell"
+    printf '  %-20s %s\n' "SystemDS Core:" "$sd_core_mean_cell"
+    [[ -n "$sd_core_cv_cell" ]] && printf '  %-20s %s\n' "" "$sd_core_cv_cell"
+    [[ -n "$sd_core_p95_cell" ]] && printf '  %-20s %s\n' "" "$sd_core_p95_cell"
+    printf '  %-20s %s\n' "PostgreSQL:" "$pg_mean_cell"
+    [[ -n "$pg_cv_cell" ]] && printf '  %-20s %s\n' "" "$pg_cv_cell"
+    [[ -n "$pg_p95_cell" ]] && printf '  %-20s %s\n' "" "$pg_p95_cell"
+    printf '  %-20s %s\n' "PostgreSQL Core:" "$pg_core_mean_cell"
+    [[ -n "$pg_core_cv_cell" ]] && printf '  %-20s %s\n' "" "$pg_core_cv_cell"
+    [[ -n "$pg_core_p95_cell" ]] && printf '  %-20s %s\n' "" "$pg_core_p95_cell"
+    printf '  %-20s %s\n' "DuckDB:" "$duck_mean_cell"
+    [[ -n "$duck_cv_cell" ]] && printf '  %-20s %s\n' "" "$duck_cv_cell"
+    [[ -n "$duck_p95_cell" ]] && printf '  %-20s %s\n' "" "$duck_p95_cell"
+    printf '  %-20s %s\n' "DuckDB Core:" "$duck_core_mean_cell"
+    [[ -n "$duck_core_cv_cell" ]] && printf '  %-20s %s\n' "" "$duck_core_cv_cell"
+    [[ -n "$duck_core_p95_cell" ]] && printf '  %-20s %s\n' "" "$duck_core_p95_cell"
+    echo "--------------------------------------------------------------------------------"
+  fi
+
+  # Write comprehensive data to CSV
+  echo "$base,\"$sd_shell_display\",$sd_shell_mean,$sd_shell_stdev,$sd_shell_p95,\"$sd_core_display\",$sd_core_mean,$sd_core_stdev,$sd_core_p95,\"$pg_display\",$pg_mean,$pg_stdev,$pg_p95,\"$pg_core_display\",$pg_core_mean,$pg_core_stdev,$pg_core_p95,\"$duck_display\",$duck_mean,$duck_stdev,$duck_p95,\"$duck_core_display\",$duck_core_mean,$duck_core_stdev,$duck_core_p95,$fastest" >> "$RESULT_CSV"
+
+  # Build JSON entry for this query
+  json_entry=$(cat <<JSON_ENTRY
+    {
+      "query": "$base",
+      "systemds": {
+        "shell": {
+          "display": "$sd_shell_display",
+          "mean_ms": $(if [[ "$sd_shell_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$sd_shell_mean"; else echo "null"; fi),
+          "stdev_ms": $(if [[ "$sd_shell_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$sd_shell_stdev"; else echo "null"; fi),
+          "p95_ms": $(if [[ "$sd_shell_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$sd_shell_p95"; else echo "null"; fi)
+        },
+          "core": {
+          "display": "$sd_core_display",
+          "mean_ms": $(if [[ "$sd_core_mean" == "(n/a)" ]]; then echo "null"; else echo "$sd_core_mean"; fi),
+          "stdev_ms": $(if [[ "$sd_core_mean" == "(n/a)" ]]; then echo "null"; else echo "$sd_core_stdev"; fi),
+          "p95_ms": $(if [[ "$sd_core_mean" == "(n/a)" ]]; then echo "null"; else echo "$sd_core_p95"; fi)
+        }
+        "status": "$systemds_status",
+        "error_message": $systemds_error_message
+      },
+      "postgresql": {
+        "display": "$pg_display",
+        "mean_ms": $(if [[ "$pg_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$pg_mean"; else echo "null"; fi),
+        "stdev_ms": $(if [[ "$pg_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$pg_stdev"; else echo "null"; fi),
+        "p95_ms": $(if [[ "$pg_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$pg_p95"; else echo "null"; fi)
+      },
+      "postgresql_core": {
+        "display": "$pg_core_display",
+        "mean_ms": $(if [[ "$pg_core_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$pg_core_mean"; else echo "null"; fi),
+        "stdev_ms": $(if [[ "$pg_core_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$pg_core_stdev"; else echo "null"; fi),
+        "p95_ms": $(if [[ "$pg_core_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$pg_core_p95"; else echo "null"; fi)
+      },
+      "duckdb": {
+        "display": "$duck_display",
+        "mean_ms": $(if [[ "$duck_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$duck_mean"; else echo "null"; fi),
+        "stdev_ms": $(if [[ "$duck_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$duck_stdev"; else echo "null"; fi),
+        "p95_ms": $(if [[ "$duck_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$duck_p95"; else echo "null"; fi)
+      },
+      "duckdb_core": {
+        "display": "$duck_core_display",
+        "mean_ms": $(if [[ "$duck_core_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$duck_core_mean"; else echo "null"; fi),
+        "stdev_ms": $(if [[ "$duck_core_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$duck_core_stdev"; else echo "null"; fi),
+        "p95_ms": $(if [[ "$duck_core_mean" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then echo "$duck_core_p95"; else echo "null"; fi)
+      },
+      "fastest_engine": "$fastest"
+    }
+JSON_ENTRY
+)
+  RESULTS_JSON_ARRAY+=("$json_entry")
+ done
+ echo "==================================================================================================================================================================="
+echo
+
+# Generate comprehensive JSON file with metadata and results
+{
+  echo "{"
+  echo "  \"benchmark_metadata\": {"
+  echo "    \"benchmark_type\": \"multi_engine_performance\","
+  echo "    \"timestamp\": \"$RUN_TIMESTAMP\","
+  echo "    \"hostname\": \"$RUN_HOSTNAME\","
+  echo "    \"seed\": $SEED,"
+  echo "    \"software_versions\": {"
+  echo "      \"systemds\": \"$RUN_SYSTEMDS_VERSION\","
+  echo "      \"jdk\": \"$RUN_JDK_VERSION\","
+  echo "      \"postgresql\": \"$RUN_POSTGRES_VERSION\","
+  echo "      \"duckdb\": \"$RUN_DUCKDB_VERSION\""
+  echo "    },"
+  echo "    \"system_resources\": {"
+  echo "      \"cpu\": \"$RUN_CPU_INFO\","
+  echo "      \"ram\": \"$RUN_RAM_INFO\""
+  echo "    },"
+  echo -e "    \"data_build_info\": $RUN_DATA_INFO,"
+  echo "    \"run_configuration\": {"
+  echo "      \"statistics_enabled\": $(if $RUN_STATS; then echo "true"; else echo "false"; fi),"
+  echo "      \"queries_selected\": ${#QUERIES[@]},"
+  echo "      \"warmup_runs\": $WARMUP,"
+  echo "      \"repeat_runs\": $REPEATS"
+  echo "    }"
+  echo "  },"
+  echo "  \"results\": ["
+
+  # Output results array
+  for i in "${!RESULTS_JSON_ARRAY[@]}"; do
+    echo "${RESULTS_JSON_ARRAY[$i]}"
+    if [[ $i -lt $((${#RESULTS_JSON_ARRAY[@]} - 1)) ]]; then
+      echo "    ,"
+    fi
+  done
+
+  echo "  ]"
+  echo "}"
+} > "$RESULT_JSON"
+
+echo "Results saved to $RESULT_CSV"
+echo "Results saved to $RESULT_JSON"
diff --git a/scripts/ssb/shell/run_ssb.sh b/scripts/ssb/shell/run_ssb.sh
new file mode 100755
index 00000000000..e15e2159a23
--- /dev/null
+++ b/scripts/ssb/shell/run_ssb.sh
@@ -0,0 +1,856 @@
+#!/usr/bin/env bash
+#
+# SystemDS Star Schema Benchmark (SSB) Runner
+# ===========================================
+#
+# CORE SCRIPTS STATUS:
+# - Version: 1.0 (September 5, 2025)
+# - Status: Production-Ready with Advanced User Experience
+# - First Public Release: September 5, 2025
+#
+# FEATURES IMPLEMENTED:
+# ✓ Basic SSB query execution with SystemDS 3.4.0-SNAPSHOT
+# ✓ Single-threaded configuration for consistent benchmarking
+# ✓ Progress indicators with real-time updates
+# ✓ Comprehensive timing measurements using /usr/bin/time
+# ✓ Query result extraction (scalar and table formats)
+# ✓ Success/failure tracking with detailed reporting
+# ✓ Query summary table with execution status
+# ✓ "See below" notation with result reprinting (NEW)
+# ✓ Long table outputs displayed after summary (NEW)
+# ✓ Error handling with timeout protection
+# ✓ Cross-platform compatibility (macOS/Linux)
+#
+# RECENT IMPORTANT ADDITIONS:
+# - Accepts --input-dir=PATH and forwards it into DML runs as a SystemDS named
+#   argument: -nvargs input_dir=/path/to/data (DML can use sys.vinput_dir or
+#   the named argument to locate data files instead of hardcoded `data/`).
+# - Fast-fail on missing input directory: the runner verifies the provided
+#   input path exists and exits with a clear error message if not.
+# - Runtime SystemDS error detection: test-run output is scanned for runtime
+#   error blocks (e.g., "An Error Occurred : ..."). Queries with runtime
+#   failures are reported as `status: "error"` and include `error_message`
+#   in generated JSON metadata for easier debugging and CI integration.
+#
+# MAJOR FEATURES IN v1.0 (First Public Release):
+# - Complete SSB query execution with SystemDS 3.4.0-SNAPSHOT
+# - Enhanced "see below" notation with result reprinting
+# - Long table outputs displayed after summary for better UX
+# - Eliminated need to scroll back through terminal output
+# - Maintained array alignment for consistent result tracking
+# - JSON metadata contains complete query results, not "see below"
+# - Added --out-dir option for custom output directory
+# - Multi-format output: TXT, CSV, JSON for each query result
+# - Structured output directory with comprehensive run.json metadata file
+#
+# DEPENDENCIES:
+# - SystemDS binary (3.4.0-SNAPSHOT or later)
+# - Single-threaded configuration file (auto-generated)
+# - SSB query files in scripts/ssb/queries/
+# - Bash 4.0+ with timeout support
+#
+# USAGE (from repo root):
+#   scripts/ssb/shell/run_ssb.sh                    # run all SSB queries
+#   scripts/ssb/shell/run_ssb.sh q1.1 q2.3          # run specific queries
+#   scripts/ssb/shell/run_ssb.sh --stats            # enable internal statistics
+#   scripts/ssb/shell/run_ssb.sh q3.1 --stats       # run specific query with stats
+#   scripts/ssb/shell/run_ssb.sh --seed=12345       # run with specific seed for reproducibility
+#   scripts/ssb/shell/run_ssb.sh --out-dir=/path    # specify output directory for results
+#
+set -euo pipefail
+export LC_ALL=C
+
+# Determine script directory and project root (repo root)
+if command -v realpath >/dev/null 2>&1; then
+  SCRIPT_DIR="$(dirname "$(realpath "$0")")"
+else
+  SCRIPT_DIR="$(python - <<'PY'
+import os, sys
+print(os.path.dirname(os.path.abspath(sys.argv[1])))
+PY
+"$0")"
+fi
+if command -v git >/dev/null 2>&1 && git -C "$SCRIPT_DIR" rev-parse --show-toplevel >/dev/null 2>&1; then
+  PROJECT_ROOT="$(git -C "$SCRIPT_DIR" rev-parse --show-toplevel)"
+else
+  __dir="$SCRIPT_DIR"
+  PROJECT_ROOT=""
+  while [[ "$__dir" != "/" ]]; do
+    if [[ -d "$__dir/.git" || -f "$__dir/pom.xml" ]]; then
+      PROJECT_ROOT="$__dir"; break
+    fi
+    __dir="$(dirname "$__dir")"
+  done
+  : "${PROJECT_ROOT:=$(cd "$SCRIPT_DIR/../../../" && pwd)}"
+fi
+
+# Locate SystemDS executable
+SYSTEMDS_CMD="$PROJECT_ROOT/bin/systemds"
+if [[ ! -x "$SYSTEMDS_CMD" ]]; then
+  SYSTEMDS_CMD="$(command -v systemds || true)"
+fi
+if [[ -z "$SYSTEMDS_CMD" || ! -x "$SYSTEMDS_CMD" ]]; then
+  echo "Error: could not find SystemDS executable." >&2
+  echo "       Tried: $PROJECT_ROOT/bin/systemds and PATH" >&2
+  exit 1
+fi
+
+# Ensure single-threaded configuration file exists
+CONF_DIR="$PROJECT_ROOT/conf"
+SINGLE_THREAD_CONF="$CONF_DIR/single_thread.xml"
+mkdir -p "$CONF_DIR"
+if [[ ! -f "$SINGLE_THREAD_CONF" ]]; then
+cat > "$SINGLE_THREAD_CONF" <<'XML'
+<configuration>
+  <property>
+    <name>sysds.cp.parallel.ops</name><value>false</value>
+  </property>
+  <property>
+    <name>sysds.num.threads</name><value>1</value>
+  </property>
+</configuration>
+XML
+fi
+SYS_EXTRA_ARGS=( "-config" "$SINGLE_THREAD_CONF" )
+
+# Query directory
+QUERY_DIR="$PROJECT_ROOT/scripts/ssb/queries"
+
+# Verify query directory exists
+if [[ ! -d "$QUERY_DIR" ]]; then
+  echo "Error: Query directory not found: $QUERY_DIR" >&2
+  exit 1
+fi
+
+# Help function
+show_help() {
+  cat << 'EOF'
+SystemDS Star Schema Benchmark (SSB) Runner v1.0
+
+USAGE (from repo root):
+  scripts/ssb/shell/run_ssb.sh [OPTIONS] [QUERIES...]
+
+OPTIONS:
+  --stats, -stats         Enable SystemDS internal statistics collection
+  --seed=N, -seed=N      Set random seed for reproducible results (default: auto-generated)
+  --output-dir=PATH, -output-dir=PATH  Specify custom output directory (default: $PROJECT_ROOT/scripts/ssb/shell/ssbOutputData/QueryData)
+  --input-dir=PATH, -input-dir=PATH  Specify custom data directory (default: $PROJECT_ROOT/data)
+  --help, -help, -h, --h Show this help message
+  --version, -version, -v, --v  Show version information
+
+QUERIES:
+  If no queries are specified, all available SSB queries (q*.dml) will be executed.
+  To run specific queries, provide their names (with or without .dml extension):
+    ./run_ssb.sh q1.1 q2.3 q4.1
+
+EXAMPLES (from repo root):
+  scripts/ssb/shell/run_ssb.sh                          # Run all SSB queries
+  scripts/ssb/shell/run_ssb.sh --stats                  # Run all queries with statistics
+  scripts/ssb/shell/run_ssb.sh -stats                   # Same as above (single dash)
+  scripts/ssb/shell/run_ssb.sh q1.1 q2.3                # Run specific queries only
+  scripts/ssb/shell/run_ssb.sh --seed=12345 --stats     # Reproducible run with statistics
+  scripts/ssb/shell/run_ssb.sh -seed=12345 -stats       # Same as above (single dash)
+  scripts/ssb/shell/run_ssb.sh --output-dir=/tmp/results   # Custom output directory
+  scripts/ssb/shell/run_ssb.sh -output-dir=/tmp/results    # Same as above (single dash)
+  scripts/ssb/shell/run_ssb.sh --input-dir=/path/to/data  # Custom data directory
+  scripts/ssb/shell/run_ssb.sh -input-dir=/path/to/data   # Same as above (single dash)
+
+OUTPUT:
+  Results are saved in multiple formats:
+  - TXT: Human-readable format
+  - CSV: Machine-readable data format
+  - JSON: Structured format with metadata
+  - run.json: Complete run metadata and results
+
+For more information, see the documentation in scripts/ssb/README.md
+EOF
+}
+
+# Parse arguments
+RUN_STATS=false
+QUERIES=()
+SEED=""
+OUT_DIR=""
+INPUT_DIR=""
+for arg in "$@"; do
+  if [[ "$arg" == "--help" || "$arg" == "-help" || "$arg" == "-h" || "$arg" == "--h" ]]; then
+    show_help
+    exit 0
+  elif [[ "$arg" == "--version" || "$arg" == "-version" || "$arg" == "-v" || "$arg" == "--v" ]]; then
+    echo "SystemDS Star Schema Benchmark (SSB) Runner v1.0"
+    echo "First Public Release: September 5, 2025"
+    exit 0
+  elif [[ "$arg" == "--stats" || "$arg" == "-stats" ]]; then
+    RUN_STATS=true
+  elif [[ "$arg" == --seed=* || "$arg" == -seed=* ]]; then
+    if [[ "$arg" == --seed=* ]]; then
+      SEED="${arg#--seed=}"
+    else
+      SEED="${arg#-seed=}"
+    fi
+  elif [[ "$arg" == "--seed" || "$arg" == "-seed" ]]; then
+    echo "Error: --seed/-seed requires a value (e.g., --seed=12345 or -seed=12345)" >&2
+    exit 1
+  elif [[ "$arg" == --output-dir=* || "$arg" == -output-dir=* ]]; then
+    if [[ "$arg" == --output-dir=* ]]; then
+      OUT_DIR="${arg#--output-dir=}"
+    else
+      OUT_DIR="${arg#-output-dir=}"
+    fi
+  elif [[ "$arg" == "--output-dir" || "$arg" == "-output-dir" ]]; then
+    echo "Error: --output-dir/-output-dir requires a value (e.g., --output-dir=/path/to/output or -output-dir=/path/to/output)" >&2
+    exit 1
+  elif [[ "$arg" == --input-dir=* || "$arg" == -input-dir=* ]]; then
+    if [[ "$arg" == --input-dir=* ]]; then
+      INPUT_DIR="${arg#--input-dir=}"
+    else
+      INPUT_DIR="${arg#-input-dir=}"
+    fi
+  elif [[ "$arg" == "--input-dir" || "$arg" == "-input-dir" ]]; then
+    echo "Error: --input-dir/-input-dir requires a value (e.g., --input-dir=/path/to/data or -input-dir=/path/to/data)" >&2
+    exit 1
+  else
+    # Check if argument looks like an unrecognized option (starts with dash)
+    if [[ "$arg" == -* ]]; then
+      echo "Error: Unrecognized option '$arg'" >&2
+      echo "Use --help or -h to see available options." >&2
+      exit 1
+    else
+      # Treat as query name
+      name="$(echo "$arg" | tr '.' '_')"
+      QUERIES+=( "$name.dml" )
+    fi
+  fi
+done
+
+# Set default output directory if not provided
+if [[ -z "$OUT_DIR" ]]; then
+  OUT_DIR="$PROJECT_ROOT/scripts/ssb/shell/ssbOutputData/QueryData"
+fi
+
+# Set default input data directory if not provided
+if [[ -z "$INPUT_DIR" ]]; then
+  INPUT_DIR="$PROJECT_ROOT/data"
+fi
+
+# Normalize paths by removing trailing slashes
+INPUT_DIR="${INPUT_DIR%/}"
+OUT_DIR="${OUT_DIR%/}"
+
+# Ensure output directory exists
+mkdir -p "$OUT_DIR"
+
+# Pass input directory to DML scripts via SystemDS named arguments
+NVARGS=( -nvargs "input_dir=${INPUT_DIR}" )
+
+# Validate input data directory exists
+if [[ ! -d "$INPUT_DIR" ]]; then
+  echo "Error: Input data directory '$INPUT_DIR' does not exist." >&2
+  echo "Please create the directory or specify a valid path with --input-dir=PATH" >&2
+  exit 1
+fi
+
+# Generate seed if not provided
+if [[ -z "$SEED" ]]; then
+  SEED=$((RANDOM * 32768 + RANDOM))
+fi
+
+# Discover queries if none provided
+shopt -s nullglob
+if [[ ${#QUERIES[@]} -eq 0 ]]; then
+  for f in "$QUERY_DIR"/q*.dml; do
+    if [[ -f "$f" ]]; then
+      QUERIES+=("$(basename "$f")")
+    fi
+  done
+  if [[ ${#QUERIES[@]} -eq 0 ]]; then
+    echo "Error: No query files found in $QUERY_DIR" >&2
+    exit 1
+  fi
+fi
+shopt -u nullglob
+
+# Metadata collection functions
+collect_system_metadata() {
+  local timestamp hostname systemds_version jdk_version cpu_info ram_info
+
+  # Basic system info
+  timestamp=$(date -u '+%Y-%m-%d %H:%M:%S UTC')
+  hostname=$(hostname 2>/dev/null || echo "unknown")
+
+  # SystemDS version
+  if [[ -x "$SYSTEMDS_CMD" ]]; then
+    # Try to get version from pom.xml first
+    if [[ -f "$PROJECT_ROOT/pom.xml" ]]; then
+      systemds_version=$(grep -A1 '<groupId>org.apache.systemds</groupId>' "$PROJECT_ROOT/pom.xml" | grep '<version>' | sed 's/.*<version>\(.*\)<\/version>.*/\1/' | head -1 2>/dev/null || echo "unknown")
+    else
+      systemds_version="unknown"
+    fi
+
+    # If pom.xml method failed, try alternative methods
+    if [[ "$systemds_version" == "unknown" ]]; then
+      # Try to extract from SystemDS JAR manifest
+      if [[ -f "$PROJECT_ROOT/target/systemds.jar" ]]; then
+        systemds_version=$(unzip -p "$PROJECT_ROOT/target/systemds.jar" META-INF/MANIFEST.MF 2>/dev/null | grep "Implementation-Version" | cut -d: -f2 | tr -d ' ' || echo "unknown")
+      else
+        # Try to find any SystemDS JAR and extract version
+        local jar_file=$(find "$PROJECT_ROOT" -name "systemds*.jar" | head -1 2>/dev/null)
+        if [[ -n "$jar_file" ]]; then
+          systemds_version=$(unzip -p "$jar_file" META-INF/MANIFEST.MF 2>/dev/null | grep "Implementation-Version" | cut -d: -f2 | tr -d ' ' || echo "unknown")
+        else
+          systemds_version="unknown"
+        fi
+      fi
+    fi
+  else
+    systemds_version="unknown"
+  fi
+
+  # JDK version
+  if command -v java >/dev/null 2>&1; then
+    jdk_version=$(java -version 2>&1 | head -1 | sed 's/.*"\(.*\)".*/\1/' || echo "unknown")
+  else
+    jdk_version="unknown"
+  fi
+
+  # System resources
+  if [[ "$(uname)" == "Darwin" ]]; then
+    # macOS
+    cpu_info=$(sysctl -n machdep.cpu.brand_string 2>/dev/null || echo "unknown")
+    ram_info=$(( $(sysctl -n hw.memsize 2>/dev/null || echo 0) / 1024 / 1024 / 1024 ))GB
+  else
+    # Linux
+    cpu_info=$(grep "model name" /proc/cpuinfo | head -1 | cut -d: -f2- | sed 's/^ *//' 2>/dev/null || echo "unknown")
+    ram_info=$(( $(grep MemTotal /proc/meminfo | awk '{print $2}' 2>/dev/null || echo 0) / 1024 / 1024 ))GB
+  fi
+
+  # Store metadata globally
+  RUN_TIMESTAMP="$timestamp"
+  RUN_HOSTNAME="$hostname"
+  RUN_SYSTEMDS_VERSION="$systemds_version"
+  RUN_JDK_VERSION="$jdk_version"
+  RUN_CPU_INFO="$cpu_info"
+  RUN_RAM_INFO="$ram_info"
+}
+
+collect_data_metadata() {
+  # Check for SSB data directory and get basic stats
+  local ssb_data_dir="$INPUT_DIR"
+  local json_parts=()
+  local display_parts=()
+
+  if [[ -d "$ssb_data_dir" ]]; then
+    # Try to get row counts from data files (if they exist)
+    for table in customer part supplier date; do
+      local file="$ssb_data_dir/${table}.tbl"
+      if [[ -f "$file" ]]; then
+        local count=$(wc -l < "$file" 2>/dev/null | tr -d ' ' || echo "0")
+        json_parts+=("    \"$table\": \"$count\"")
+        display_parts+=("$table:$count")
+      fi
+    done
+    # Check for any lineorder*.tbl file (SSB fact table)
+    local lineorder_file=$(find "$ssb_data_dir" -name "lineorder*.tbl" -type f | head -1)
+    if [[ -n "$lineorder_file" && -f "$lineorder_file" ]]; then
+      local count=$(wc -l < "$lineorder_file" 2>/dev/null | tr -d ' ' || echo "0")
+      json_parts+=("    \"lineorder\": \"$count\"")
+      display_parts+=("lineorder:$count")
+    fi
+  fi
+
+  if [[ ${#json_parts[@]} -eq 0 ]]; then
+    RUN_DATA_INFO='"No data files found"'
+    RUN_DATA_DISPLAY="No data files found"
+  else
+    # Join array elements with commas and newlines, wrap in braces for JSON
+    local formatted_json="{\n"
+    for i in "${!json_parts[@]}"; do
+      formatted_json+="${json_parts[$i]}"
+      if [[ $i -lt $((${#json_parts[@]} - 1)) ]]; then
+        formatted_json+=",\n"
+      else
+        formatted_json+="\n"
+      fi
+    done
+    formatted_json+="  }"
+    RUN_DATA_INFO="$formatted_json"
+
+    # Join with spaces for display
+    local IFS=" "
+    RUN_DATA_DISPLAY="${display_parts[*]}"
+  fi
+}
+
+# Output format functions
+create_output_structure() {
+  local run_id="$1"
+  local base_dir="$OUT_DIR/ssb_run_$run_id"
+
+  # Create output directory structure
+  mkdir -p "$base_dir"/{txt,csv,json}
+
+  # Set global variables for output paths
+  OUTPUT_BASE_DIR="$base_dir"
+  OUTPUT_TXT_DIR="$base_dir/txt"
+  OUTPUT_CSV_DIR="$base_dir/csv"
+  OUTPUT_JSON_DIR="$base_dir/json"
+  OUTPUT_METADATA_FILE="$base_dir/run.json"
+}
+
+save_query_result_txt() {
+  local query_name="$1"
+  local result_data="$2"
+  local output_file="$OUTPUT_TXT_DIR/${query_name}.txt"
+
+  {
+    echo "========================================="
+    echo "SSB Query: $query_name"
+    echo "========================================="
+    echo "Timestamp: $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
+    echo "Seed: $SEED"
+    echo ""
+    echo "Result:"
+    echo "---------"
+    echo "$result_data"
+    echo ""
+    echo "========================================="
+  } > "$output_file"
+}
+
+save_query_result_csv() {
+  local query_name="$1"
+  local result_data="$2"
+  local output_file="$OUTPUT_CSV_DIR/${query_name}.csv"
+
+  # Check if result is a single scalar value (including negative numbers and scientific notation)
+  if [[ "$result_data" =~ ^-?[0-9]+(\.[0-9]+)?([eE][+-]?[0-9]+)?$ ]]; then
+    # Scalar result
+    {
+      echo "query,result"
+      echo "$query_name,$result_data"
+    } > "$output_file"
+  else
+    # Table result - try to convert to CSV format
+    {
+      echo "# SSB Query: $query_name"
+      echo "# Timestamp: $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
+      echo "# Seed: $SEED"
+      # Convert space-separated table data to CSV
+      echo "$result_data" | sed 's/  */,/g' | sed 's/^,//g' | sed 's/,$//g'
+    } > "$output_file"
+  fi
+}
+
+save_query_result_json() {
+  local query_name="$1"
+  local result_data="$2"
+  local output_file="$OUTPUT_JSON_DIR/${query_name}.json"
+
+  # Escape quotes and special characters for JSON
+  local escaped_result=$(echo "$result_data" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | tr '\n' ' ')
+
+  {
+    echo "{"
+    echo "  \"query\": \"$query_name\","
+    echo "  \"timestamp\": \"$(date -u '+%Y-%m-%d %H:%M:%S UTC')\","
+    echo "  \"seed\": $SEED,"
+    echo "  \"result\": \"$escaped_result\","
+    echo "  \"metadata\": {"
+    echo "    \"systemds_version\": \"$RUN_SYSTEMDS_VERSION\","
+    echo "    \"hostname\": \"$RUN_HOSTNAME\""
+    echo "  }"
+    echo "}"
+  } > "$output_file"
+}
+
+save_all_formats() {
+  local query_name="$1"
+  local result_data="$2"
+
+  save_query_result_txt "$query_name" "$result_data"
+  save_query_result_csv "$query_name" "$result_data"
+  save_query_result_json "$query_name" "$result_data"
+}
+
+# Collect metadata
+collect_system_metadata
+collect_data_metadata
+
+# Create output directory structure with timestamp-based run ID
+RUN_ID="$(date +%Y%m%d_%H%M%S)"
+create_output_structure "$RUN_ID"
+
+# Execute queries
+count=0
+failed=0
+SUCCESSFUL_QUERIES=()  # Array to track successfully executed queries
+ALL_RUN_QUERIES=()     # Array to track all queries that were attempted (in order)
+QUERY_STATUS=()        # Array to track status: "success" or "error"
+QUERY_ERROR_MSG=()     # Array to store error messages for failed queries
+QUERY_RESULTS=()       # Array to track query results for display
+QUERY_FULL_RESULTS=()  # Array to track complete query results for JSON
+QUERY_STATS=()         # Array to track SystemDS statistics for JSON
+QUERY_TIMINGS=()       # Array to track execution timing statistics
+LONG_OUTPUTS=()        # Array to store long table outputs for display after summary
+
+# Progress indicator function
+progress_indicator() {
+  local query_name="$1"
+  local current="$2"
+  local total="$3"
+  echo -ne "\r[$current/$total] Running: $query_name                                                   "
+}
+
+for q in "${QUERIES[@]}"; do
+  dml="$QUERY_DIR/$q"
+  if [[ ! -f "$dml" ]]; then
+    echo "Warning: query file '$dml' not found; skipping." >&2
+    continue
+  fi
+
+  # Show progress
+  progress_indicator "$q" "$((count + failed + 1))" "${#QUERIES[@]}"
+
+  # Change to project root directory so relative paths in DML work correctly
+  cd "$PROJECT_ROOT"
+
+  # Clear progress line before showing output
+  echo -ne "\r                                                                                   \r"
+  echo "[$((count + failed + 1))/${#QUERIES[@]}] Running: $q"
+
+  # Record attempted query
+  ALL_RUN_QUERIES+=("$q")
+
+  if $RUN_STATS; then
+    # Capture output to extract result
+    temp_output=$(mktemp)
+  if "$SYSTEMDS_CMD" "$dml" -stats "${SYS_EXTRA_ARGS[@]}" "${NVARGS[@]}" | tee "$temp_output"; then
+      # Even when SystemDS exits 0, the DML can emit runtime errors. Detect common error markers.
+      error_msg=$(sed -n '/An Error Occurred :/,$ p' "$temp_output" | sed -n '1,200p' | tr '\n' ' ' | sed 's/^ *//;s/ *$//')
+      if [[ -n "$error_msg" ]]; then
+        echo "Error: Query $q reported runtime error" >&2
+        echo "$error_msg" >&2
+        failed=$((failed+1))
+        QUERY_STATUS+=("error")
+        QUERY_ERROR_MSG+=("$error_msg")
+        # Maintain array alignment
+        QUERY_STATS+=("")
+        QUERY_RESULTS+=("N/A")
+        QUERY_FULL_RESULTS+=("N/A")
+        LONG_OUTPUTS+=("")
+      else
+        count=$((count+1))
+        SUCCESSFUL_QUERIES+=("$q")  # Track successful query
+        QUERY_STATUS+=("success")
+      # Extract result - try multiple patterns with timeouts to prevent hanging:
+      # 1. Simple scalar pattern like "REVENUE: 687752409"
+      result=$(timeout 5s grep -E "^[A-Z_]+:\s*[0-9]+" "$temp_output" | tail -1 | awk '{print $2}' 2>/dev/null || true)
+      full_result="$result"  # For scalar results, display and full results are the same
+
+      # 2. If no scalar pattern, check for table output and get row count
+      if [[ -z "$result" ]]; then
+        # Look for frame info like "# FRAME: nrow = 53, ncol = 3"
+        nrows=$(timeout 5s grep "# FRAME: nrow =" "$temp_output" | awk '{print $5}' | tr -d ',' 2>/dev/null || true)
+        if [[ -n "$nrows" ]]; then
+          result="${nrows} rows (see below)"
+          # Extract and store the long output for later display (excluding statistics)
+          long_output=$(grep -v "^#" "$temp_output" | grep -v "WARNING" | grep -v "WARN" | grep -v "^$" | sed '/^SystemDS Statistics:/,$ d')
+          LONG_OUTPUTS+=("$long_output")
+          # For JSON, store the actual table data
+          full_result="$long_output"
+        else
+          # Count actual data rows (lines with numbers, excluding headers and comments) - limit to prevent hanging
+          nrows=$(timeout 5s grep -E "^[0-9]" "$temp_output" | sed '/^SystemDS Statistics:/,$ d' | head -1000 | wc -l | tr -d ' ' 2>/dev/null || echo "0")
+          if [[ "$nrows" -gt 0 ]]; then
+            result="${nrows} rows (see below)"
+            # Extract and store the long output for later display (excluding statistics)
+            long_output=$(grep -E "^[0-9]" "$temp_output" | sed '/^SystemDS Statistics:/,$ d' | head -1000)
+            LONG_OUTPUTS+=("$long_output")
+            # For JSON, store the actual table data
+            full_result="$long_output"
+          else
+            result="N/A"
+            full_result="N/A"
+            LONG_OUTPUTS+=("")  # Empty placeholder to maintain array alignment
+          fi
+        fi
+      else
+        LONG_OUTPUTS+=("")  # Empty placeholder for scalar results to maintain array alignment
+      fi
+      QUERY_RESULTS+=("$result")  # Track query result for display
+      QUERY_FULL_RESULTS+=("$full_result")  # Track complete query result for JSON
+
+      # Save result in all formats
+      query_name_clean="${q%.dml}"
+
+      # Extract and store statistics for JSON (preserving newlines)
+      stats_output=$(sed -n '/^SystemDS Statistics:/,$ p' "$temp_output")
+  QUERY_STATS+=("$stats_output")  # Track statistics for JSON
+
+      save_all_formats "$query_name_clean" "$full_result"
+      fi
+    else
+      echo "Error: Query $q failed" >&2
+      failed=$((failed+1))
+      QUERY_STATUS+=("error")
+      QUERY_ERROR_MSG+=("Query execution failed (non-zero exit)")
+      # Add empty stats entry for failed queries to maintain array alignment
+      QUERY_STATS+=("")
+    fi
+    rm -f "$temp_output"
+  else
+    # Capture output to extract result
+    temp_output=$(mktemp)
+  if "$SYSTEMDS_CMD" "$dml" "${SYS_EXTRA_ARGS[@]}" "${NVARGS[@]}" | tee "$temp_output"; then
+      # Detect runtime errors in output even if command returned 0
+      error_msg=$(sed -n '/An Error Occurred :/,$ p' "$temp_output" | sed -n '1,200p' | tr '\n' ' ' | sed 's/^ *//;s/ *$//')
+      if [[ -n "$error_msg" ]]; then
+        echo "Error: Query $q reported runtime error" >&2
+        echo "$error_msg" >&2
+        failed=$((failed+1))
+        QUERY_STATUS+=("error")
+        QUERY_ERROR_MSG+=("$error_msg")
+        QUERY_STATS+=("")
+        QUERY_RESULTS+=("N/A")
+        QUERY_FULL_RESULTS+=("N/A")
+        LONG_OUTPUTS+=("")
+      else
+        count=$((count+1))
+        SUCCESSFUL_QUERIES+=("$q")  # Track successful query
+        QUERY_STATUS+=("success")
+      # Extract result - try multiple patterns with timeouts to prevent hanging:
+      # 1. Simple scalar pattern like "REVENUE: 687752409"
+      result=$(timeout 5s grep -E "^[A-Z_]+:\s*[0-9]+" "$temp_output" | tail -1 | awk '{print $2}' 2>/dev/null || true)
+      full_result="$result"  # For scalar results, display and full results are the same
+
+      # 2. If no scalar pattern, check for table output and get row count
+      if [[ -z "$result" ]]; then
+        # Look for frame info like "# FRAME: nrow = 53, ncol = 3"
+        nrows=$(timeout 5s grep "# FRAME: nrow =" "$temp_output" | awk '{print $5}' | tr -d ',' 2>/dev/null || true)
+        if [[ -n "$nrows" ]]; then
+          result="${nrows} rows (see below)"
+          # Extract and store the long output for later display
+          long_output=$(grep -v "^#" "$temp_output" | grep -v "WARNING" | grep -v "WARN" | grep -v "^$" | tail -n +1)
+          LONG_OUTPUTS+=("$long_output")
+          # For JSON, store the actual table data
+          full_result="$long_output"
+        else
+          # Count actual data rows (lines with numbers, excluding headers and comments) - limit to prevent hanging
+          nrows=$(timeout 5s grep -E "^[0-9]" "$temp_output" | head -1000 | wc -l | tr -d ' ' 2>/dev/null || echo "0")
+          if [[ "$nrows" -gt 0 ]]; then
+            result="${nrows} rows (see below)"
+            # Extract and store the long output for later display
+            long_output=$(grep -E "^[0-9]" "$temp_output" | head -1000)
+            LONG_OUTPUTS+=("$long_output")
+            # For JSON, store the actual table data
+            full_result="$long_output"
+          else
+            result="N/A"
+            full_result="N/A"
+            LONG_OUTPUTS+=("")  # Empty placeholder to maintain array alignment
+          fi
+        fi
+      else
+        LONG_OUTPUTS+=("")  # Empty placeholder for scalar results to maintain array alignment
+      fi
+      QUERY_RESULTS+=("$result")  # Track query result for display
+      QUERY_FULL_RESULTS+=("$full_result")  # Track complete query result for JSON
+
+  # Add empty stats entry for non-stats runs to maintain array alignment
+  QUERY_STATS+=("")
+
+      # Save result in all formats
+      query_name_clean="${q%.dml}"
+      save_all_formats "$query_name_clean" "$full_result"
+      fi
+    else
+      echo "Error: Query $q failed" >&2
+      failed=$((failed+1))
+      QUERY_STATUS+=("error")
+      QUERY_ERROR_MSG+=("Query execution failed (non-zero exit)")
+      # Add empty stats entry for failed queries to maintain array alignment
+      QUERY_STATS+=("")
+    fi
+    rm -f "$temp_output"
+  fi
+done
+
+# Summary
+echo ""
+echo "========================================="
+echo "SSB benchmark completed!"
+echo "Total queries executed: $count"
+if [[ $failed -gt 0 ]]; then
+  echo "Failed queries: $failed"
+fi
+if $RUN_STATS; then
+  echo "Statistics: enabled"
+else
+  echo "Statistics: disabled"
+fi
+
+# Display run metadata summary
+echo ""
+echo "========================================="
+echo "RUN METADATA SUMMARY"
+echo "========================================="
+echo "Timestamp:       $RUN_TIMESTAMP"
+echo "Hostname:        $RUN_HOSTNAME"
+echo "Seed:            $SEED"
+echo ""
+echo "Software Versions:"
+echo "  SystemDS:      $RUN_SYSTEMDS_VERSION"
+echo "  JDK:           $RUN_JDK_VERSION"
+echo ""
+echo "System Resources:"
+echo "  CPU:           $RUN_CPU_INFO"
+echo "  RAM:           $RUN_RAM_INFO"
+echo ""
+echo "Data Build Info:"
+echo "  SSB Data:      $RUN_DATA_DISPLAY"
+echo "========================================="
+
+# Generate metadata JSON file (include all attempted queries with status and error messages)
+{
+  echo "{"
+  echo "  \"benchmark_type\": \"ssb_systemds\","
+  echo "  \"timestamp\": \"$RUN_TIMESTAMP\","
+  echo "  \"hostname\": \"$RUN_HOSTNAME\","
+  echo "  \"seed\": $SEED,"
+  echo "  \"software_versions\": {"
+  echo "    \"systemds\": \"$RUN_SYSTEMDS_VERSION\","
+  echo "    \"jdk\": \"$RUN_JDK_VERSION\""
+  echo "  },"
+  echo "  \"system_resources\": {"
+  echo "    \"cpu\": \"$RUN_CPU_INFO\","
+  echo "    \"ram\": \"$RUN_RAM_INFO\""
+  echo "  },"
+  echo -e "  \"data_build_info\": $RUN_DATA_INFO,"
+  echo "  \"run_configuration\": {"
+  echo "    \"statistics_enabled\": $(if $RUN_STATS; then echo "true"; else echo "false"; fi),"
+  echo "    \"queries_selected\": ${#QUERIES[@]},"
+  echo "    \"queries_executed\": $count,"
+  echo "    \"queries_failed\": $failed"
+  echo "  },"
+  echo "  \"results\": ["
+  for i in "${!ALL_RUN_QUERIES[@]}"; do
+    query="${ALL_RUN_QUERIES[$i]}"
+    status="${QUERY_STATUS[$i]:-error}"
+    error_msg="${QUERY_ERROR_MSG[$i]:-}"
+    # Find matching full_result and stats by searching SUCCESSFUL_QUERIES index
+    full_result=""
+    stats_result=""
+    if [[ "$status" == "success" ]]; then
+      # Find index in SUCCESSFUL_QUERIES
+      for j in "${!SUCCESSFUL_QUERIES[@]}"; do
+        if [[ "${SUCCESSFUL_QUERIES[$j]}" == "$query" ]]; then
+          full_result="${QUERY_FULL_RESULTS[$j]}"
+          stats_result="${QUERY_STATS[$j]}"
+          break
+        fi
+      done
+    fi
+    # Escape quotes and newlines for JSON
+    escaped_result=$(echo "$full_result" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | tr '\n' ' ')
+    escaped_error=$(echo "$error_msg" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | tr '\n' ' ')
+
+    echo "    {"
+    echo "      \"query\": \"${query%.dml}\","
+    echo "      \"status\": \"$status\","
+    echo "      \"error_message\": \"$escaped_error\","
+    echo "      \"result\": \"$escaped_result\""
+    if [[ -n "$stats_result" ]]; then
+      echo "      ,\"stats\": ["
+      echo "$stats_result" | sed 's/\\/\\\\/g' | sed 's/"/\\"/g' | sed 's/\t/    /g' | awk '
+        BEGIN { first = 1 }
+        {
+          if (!first) printf ",\n"
+          printf "        \"%s\"", $0
+          first = 0
+        }
+        END { if (!first) printf "\n" }
+      '
+      echo "      ]"
+    fi
+    if [[ $i -lt $((${#ALL_RUN_QUERIES[@]} - 1)) ]]; then
+      echo "    },"
+    else
+      echo "    }"
+    fi
+  done
+  echo "  ]"
+  echo "}"
+} > "$OUTPUT_METADATA_FILE"
+
+echo ""
+echo "Metadata saved to $OUTPUT_METADATA_FILE"
+echo "Output directory: $OUTPUT_BASE_DIR"
+echo "  - TXT files: $OUTPUT_TXT_DIR"
+echo "  - CSV files: $OUTPUT_CSV_DIR"
+echo "  - JSON files: $OUTPUT_JSON_DIR"
+
+# Detailed per-query summary (show status and error messages if any)
+if [[ ${#ALL_RUN_QUERIES[@]} -gt 0 ]]; then
+  echo ""
+  echo "==================================================="
+  echo "QUERIES SUMMARY"
+  echo "==================================================="
+  printf "%-4s %-15s %-30s %s\n" "No." "Query" "Result" "Status"
+  echo "---------------------------------------------------"
+  for i in "${!ALL_RUN_QUERIES[@]}"; do
+    query="${ALL_RUN_QUERIES[$i]}"
+    query_display="${query%.dml}"  # Remove .dml extension for display
+    status="${QUERY_STATUS[$i]:-error}"
+    if [[ "$status" == "success" ]]; then
+      # Find index in SUCCESSFUL_QUERIES to fetch result
+      result=""
+      for j in "${!SUCCESSFUL_QUERIES[@]}"; do
+        if [[ "${SUCCESSFUL_QUERIES[$j]}" == "$query" ]]; then
+          result="${QUERY_RESULTS[$j]}"
+          break
+        fi
+      done
+      printf "%-4d %-15s %-30s %s\n" "$((i+1))" "$query_display" "$result" "✓ Success"
+    else
+      err="${QUERY_ERROR_MSG[$i]:-Unknown error}"
+      printf "%-4d %-15s %-30s %s\n" "$((i+1))" "$query_display" "N/A" "ERROR: ${err}"
+    fi
+  done
+echo "==================================================="
+fi
+
+# Display long outputs for queries that had table results
+if [[ ${#SUCCESSFUL_QUERIES[@]} -gt 0 ]]; then
+  # Check if we have any long outputs to display
+  has_long_outputs=false
+  for i in "${!LONG_OUTPUTS[@]}"; do
+    if [[ -n "${LONG_OUTPUTS[$i]}" ]]; then
+      has_long_outputs=true
+      break
+    fi
+  done
+
+  if $has_long_outputs; then
+    echo ""
+    echo "========================================="
+    echo "DETAILED QUERY RESULTS"
+    echo "========================================="
+    for i in "${!SUCCESSFUL_QUERIES[@]}"; do
+      if [[ -n "${LONG_OUTPUTS[$i]}" ]]; then
+        query="${SUCCESSFUL_QUERIES[$i]}"
+        query_display="${query%.dml}"  # Remove .dml extension for display
+        echo ""
+        echo "[$((i+1))] Results for $query_display:"
+        echo "----------------------------------------"
+        echo "${LONG_OUTPUTS[$i]}"
+        echo "----------------------------------------"
+      fi
+    done
+    echo "========================================="
+  fi
+fi
+
+# Exit with appropriate code
+if [[ $failed -gt 0 ]]; then
+  exit 1
+fi
diff --git a/scripts/ssb/sql/q1.1.sql b/scripts/ssb/sql/q1.1.sql
new file mode 100644
index 00000000000..02e3844d12c
--- /dev/null
+++ b/scripts/ssb/sql/q1.1.sql
@@ -0,0 +1,7 @@
+SELECT SUM(lo_extendedprice * lo_discount) AS REVENUE
+FROM lineorder, dates
+WHERE
+    lo_orderdate = d_datekey
+    AND d_year = 1993
+    AND lo_discount BETWEEN 1 AND 3
+    AND lo_quantity < 25;
\ No newline at end of file
diff --git a/scripts/ssb/sql/q1.2.sql b/scripts/ssb/sql/q1.2.sql
new file mode 100644
index 00000000000..834d73f623f
--- /dev/null
+++ b/scripts/ssb/sql/q1.2.sql
@@ -0,0 +1,7 @@
+SELECT SUM(lo_extendedprice * lo_discount) AS REVENUE
+FROM lineorder, dates
+WHERE
+    lo_orderdate = d_datekey
+    AND d_yearmonth = 'Jan1994'
+    AND lo_discount BETWEEN 4 AND 6
+    AND lo_quantity BETWEEN 26 AND 35;
\ No newline at end of file
diff --git a/scripts/ssb/sql/q1.3.sql b/scripts/ssb/sql/q1.3.sql
new file mode 100644
index 00000000000..7a09490b840
--- /dev/null
+++ b/scripts/ssb/sql/q1.3.sql
@@ -0,0 +1,9 @@
+SELECT
+    SUM(lo_extendedprice * lo_discount) AS REVENUE
+FROM lineorder, dates
+WHERE
+    lo_orderdate = d_datekey
+    AND d_weeknuminyear = 6
+    AND d_year = 1994
+    AND lo_discount BETWEEN 5 AND 7
+    AND lo_quantity BETWEEN 26 AND 35;
\ No newline at end of file
diff --git a/scripts/ssb/sql/q2.1.sql b/scripts/ssb/sql/q2.1.sql
new file mode 100644
index 00000000000..f455ff9e935
--- /dev/null
+++ b/scripts/ssb/sql/q2.1.sql
@@ -0,0 +1,10 @@
+SELECT SUM(lo_revenue), d_year, p_brand
+FROM lineorder, dates, part, supplier
+WHERE
+    lo_orderdate = d_datekey
+    AND lo_partkey = p_partkey
+    AND lo_suppkey = s_suppkey
+    AND p_category = 'MFGR#12'
+    AND s_region = 'AMERICA'
+GROUP BY d_year, p_brand
+ORDER BY p_brand;
\ No newline at end of file
diff --git a/scripts/ssb/sql/q2.2.sql b/scripts/ssb/sql/q2.2.sql
new file mode 100644
index 00000000000..e28d55153c2
--- /dev/null
+++ b/scripts/ssb/sql/q2.2.sql
@@ -0,0 +1,10 @@
+SELECT SUM(lo_revenue), d_year, p_brand
+FROM lineorder, dates, part, supplier
+WHERE
+    lo_orderdate = d_datekey
+    AND lo_partkey = p_partkey
+    AND lo_suppkey = s_suppkey
+    AND p_brand BETWEEN 'MFGR#2221' AND 'MFGR#2228'
+    AND s_region = 'ASIA'
+GROUP BY d_year, p_brand
+ORDER BY d_year, p_brand;
\ No newline at end of file
diff --git a/scripts/ssb/sql/q2.3.sql b/scripts/ssb/sql/q2.3.sql
new file mode 100644
index 00000000000..8ec135cef0a
--- /dev/null
+++ b/scripts/ssb/sql/q2.3.sql
@@ -0,0 +1,10 @@
+SELECT SUM(lo_revenue), d_year, p_brand
+FROM lineorder, dates, part, supplier
+WHERE
+    lo_orderdate = d_datekey
+    AND lo_partkey = p_partkey
+    AND lo_suppkey = s_suppkey
+    AND p_brand = 'MFGR#2239'
+    AND s_region = 'EUROPE'
+GROUP BY d_year, p_brand
+ORDER BY d_year, p_brand;
\ No newline at end of file
diff --git a/scripts/ssb/sql/q3.1.sql b/scripts/ssb/sql/q3.1.sql
new file mode 100644
index 00000000000..badd93f973a
--- /dev/null
+++ b/scripts/ssb/sql/q3.1.sql
@@ -0,0 +1,16 @@
+SELECT
+    c_nation,
+    s_nation,
+    d_year,
+    SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_orderdate = d_datekey
+    AND c_region = 'ASIA'
+    AND s_region = 'ASIA'
+    AND d_year >= 1992
+    AND d_year <= 1997
+GROUP BY c_nation, s_nation, d_year
+ORDER BY d_year ASC, REVENUE DESC;
\ No newline at end of file
diff --git a/scripts/ssb/sql/q3.2.sql b/scripts/ssb/sql/q3.2.sql
new file mode 100644
index 00000000000..fc5564d3b6e
--- /dev/null
+++ b/scripts/ssb/sql/q3.2.sql
@@ -0,0 +1,16 @@
+SELECT
+    c_city,
+    s_city,
+    d_year,
+    SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_orderdate = d_datekey
+    AND c_nation = 'UNITED STATES'
+    AND s_nation = 'UNITED STATES'
+    AND d_year >= 1992
+    AND d_year <= 1997
+GROUP BY c_city, s_city, d_year
+ORDER BY d_year ASC, REVENUE DESC;
\ No newline at end of file
diff --git a/scripts/ssb/sql/q3.3.sql b/scripts/ssb/sql/q3.3.sql
new file mode 100644
index 00000000000..5fdfdf39eae
--- /dev/null
+++ b/scripts/ssb/sql/q3.3.sql
@@ -0,0 +1,22 @@
+SELECT
+    c_city,
+    s_city,
+    d_year,
+    SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_orderdate = d_datekey
+    AND (
+        c_city = 'UNITED KI1'
+        OR c_city = 'UNITED KI5'
+    )
+    AND (
+        s_city = 'UNITED KI1'
+        OR s_city = 'UNITED KI5'
+    )
+    AND d_year >= 1992
+    AND d_year <= 1997
+GROUP BY c_city, s_city, d_year
+ORDER BY d_year ASC, REVENUE DESC;
\ No newline at end of file
diff --git a/scripts/ssb/sql/q3.4.sql b/scripts/ssb/sql/q3.4.sql
new file mode 100644
index 00000000000..a94a81795f5
--- /dev/null
+++ b/scripts/ssb/sql/q3.4.sql
@@ -0,0 +1,21 @@
+SELECT
+    c_city,
+    s_city,
+    d_year,
+    SUM(lo_revenue) AS REVENUE
+FROM customer, lineorder, supplier, dates
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_orderdate = d_datekey
+    AND (
+        c_city = 'UNITED KI1'
+        OR c_city = 'UNITED KI5'
+    )
+    AND (
+        s_city = 'UNITED KI1'
+        OR s_city = 'UNITED KI5'
+    )
+    AND d_yearmonth = 'Dec1997'
+GROUP BY c_city, s_city, d_year
+ORDER BY d_year ASC, REVENUE DESC;
\ No newline at end of file
diff --git a/scripts/ssb/sql/q4.1.sql b/scripts/ssb/sql/q4.1.sql
new file mode 100644
index 00000000000..a7d48bfe436
--- /dev/null
+++ b/scripts/ssb/sql/q4.1.sql
@@ -0,0 +1,18 @@
+SELECT
+    d_year,
+    c_nation,
+    SUM(lo_revenue - lo_supplycost) AS PROFIT
+FROM dates, customer, supplier, part, lineorder
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_partkey = p_partkey
+    AND lo_orderdate = d_datekey
+    AND c_region = 'AMERICA'
+    AND s_region = 'AMERICA'
+    AND (
+        p_mfgr = 'MFGR#1'
+        OR p_mfgr = 'MFGR#2'
+    )
+GROUP BY d_year, c_nation
+ORDER BY d_year, c_nation;
diff --git a/scripts/ssb/sql/q4.2.sql b/scripts/ssb/sql/q4.2.sql
new file mode 100644
index 00000000000..1c68951d58d
--- /dev/null
+++ b/scripts/ssb/sql/q4.2.sql
@@ -0,0 +1,23 @@
+SELECT
+    d_year,
+    s_nation,
+    p_category,
+    SUM(lo_revenue - lo_supplycost) AS PROFIT
+FROM dates, customer, supplier, part, lineorder
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_partkey = p_partkey
+    AND lo_orderdate = d_datekey
+    AND c_region = 'AMERICA'
+    AND s_region = 'AMERICA'
+    AND (
+        d_year = 1997
+        OR d_year = 1998
+    )
+    AND (
+        p_mfgr = 'MFGR#1'
+        OR p_mfgr = 'MFGR#2'
+    )
+GROUP BY d_year, s_nation, p_category
+ORDER BY d_year, s_nation, p_category;
\ No newline at end of file
diff --git a/scripts/ssb/sql/q4.3.sql b/scripts/ssb/sql/q4.3.sql
new file mode 100644
index 00000000000..815ab2d8a56
--- /dev/null
+++ b/scripts/ssb/sql/q4.3.sql
@@ -0,0 +1,19 @@
+SELECT
+    d_year,
+    s_city,
+    p_brand,
+    SUM(lo_revenue - lo_supplycost) AS PROFIT
+FROM dates, customer, supplier, part, lineorder
+WHERE
+    lo_custkey = c_custkey
+    AND lo_suppkey = s_suppkey
+    AND lo_partkey = p_partkey
+    AND lo_orderdate = d_datekey
+    AND s_nation = 'UNITED STATES'
+    AND (
+        d_year = 1997
+        OR d_year = 1998
+    )
+    AND p_category = 'MFGR#14'
+GROUP BY d_year, s_city, p_brand
+ORDER BY d_year, s_city, p_brand;
\ No newline at end of file
diff --git a/spark_config.xml b/spark_config.xml
new file mode 100644
index 00000000000..8db991ba42d
--- /dev/null
+++ b/spark_config.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<config>
+    <sysds>
+        <spark>
+            <spark.app.name>SSB_Q1_1_Test</spark.app.name>
+            <spark.master>local[*]</spark.master>
+            <spark.driver.memory>4g</spark.driver.memory>
+            <spark.executor.memory>4g</spark.executor.memory>
+            <spark.driver.maxResultSize>2g</spark.driver.maxResultSize>
+        </spark>
+    </sysds>
+</config>
\ No newline at end of file
diff --git a/src/main/java/org/apache/sysds/hops/BinaryOp.java b/src/main/java/org/apache/sysds/hops/BinaryOp.java
index 2b803a053c1..4dd5e1f243d 100644
--- a/src/main/java/org/apache/sysds/hops/BinaryOp.java
+++ b/src/main/java/org/apache/sysds/hops/BinaryOp.java
@@ -854,6 +854,9 @@ else if( (op == OpOp2.CBIND && getDataType().isList())
 			_etype = ExecType.CP;
 		}
 		
+		if( _etype == ExecType.OOC ) //TODO
+			setExecType(ExecType.CP);
+		
 		//mark for recompile (forever)
 		setRequiresRecompileIfNecessary();
 		
diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
index 34a8aa18631..67f9f698a97 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/CacheableData.java
@@ -73,8 +73,8 @@
 
 /**
  * Each object of this class is a cache envelope for some large piece of data
- * called "cache block". For example, the body of a matrix can be the cache block.  
- * The term cache block refers strictly to the cacheable portion of the data object, 
+ * called "cache block". For example, the body of a matrix can be the cache block.
+ * The term cache block refers strictly to the cacheable portion of the data object,
  * often excluding metadata and auxiliary parameters, as defined in the subclasses.
  * Under the protection of the envelope, the data blob may be evicted to
  * the file system; then the subclass must set its reference to <code>null</code>
@@ -96,43 +96,43 @@ public abstract class CacheableData<T extends CacheBlock<?>> extends Data
 	public static final String  CACHING_EVICTION_FILEEXTENSION = ".dat";
 	public static final boolean CACHING_ASYNC_FILECLEANUP = true;
 	public static boolean CACHING_ASYNC_SERIALIZE = false;
-	
+
 	//NOTE CACHING_ASYNC_SERIALIZE:
-	// The serialization of matrices and frames (ultra-sparse matrices or 
-	// frames with strings) into buffer pool byte arrays happens outside the 
+	// The serialization of matrices and frames (ultra-sparse matrices or
+	// frames with strings) into buffer pool byte arrays happens outside the
 	// critical region of the global lock in LazyWriteBuffer. However, it still
-	// requires thread-local serialization (before returning from release) in 
-	// order to guarantee that not too many objects are pinned at the same time 
-	// which would violate the memory budget. Therefore, the new asynchronous 
+	// requires thread-local serialization (before returning from release) in
+	// order to guarantee that not too many objects are pinned at the same time
+	// which would violate the memory budget. Therefore, the new asynchronous
 	// serialization (see CACHING_ASYNC_SERIALIZE) should be understood as
 	// optimistic with weaker guarantees.
-	
+
 	/**
 	 * Defines all possible cache status types for a data blob.
 	 * An object of class {@link CacheableData} can be in one of the following
 	 * five status types:
 	 *
-	 * <code>EMPTY</code>: Either there is no data blob at all, or the data blob  
+	 * <code>EMPTY</code>: Either there is no data blob at all, or the data blob
 	 * resides in a specified import file and has never been downloaded yet.
 	 * <code>READ</code>:   The data blob is in main memory; one or more threads are
 	 * referencing and reading it (shared "read-only" lock).  This status uses a
 	 * counter.  Eviction is NOT allowed.
 	 * <code>MODIFY</code>:   The data blob is in main memory; exactly one thread is
 	 * referencing and modifying it (exclusive "write" lock).  Eviction is NOT allowed.
-	 * <code>CACHED</code>:   The data blob is in main memory, and nobody is using nor referencing it. 
+	 * <code>CACHED</code>:   The data blob is in main memory, and nobody is using nor referencing it.
 	 * There is always an persistent recovery object for it
 	 **/
 	public enum CacheStatus {
-		EMPTY, 
-		READ, 
-		MODIFY, 
+		EMPTY,
+		READ,
+		MODIFY,
 		CACHED,
 		CACHED_NOWRITE,
 	}
-	
+
 	/** Global flag indicating if caching is enabled (controls eviction) */
 	private static volatile boolean _activeFlag = false;
-	
+
 	/** Global sequence for generating unique ids. */
 	private static IDSequence _seq = null;
 
@@ -147,9 +147,9 @@ public enum CacheStatus {
 		@Override protected Long initialValue() { return 0L; }
 	};
 
-	//current size of live broadcast objects (because Spark's ContextCleaner maintains 
-	//a buffer with references to prevent eager cleanup by GC); note that this is an 
-	//overestimate, because we maintain partitioned broadcasts as soft references, which 
+	//current size of live broadcast objects (because Spark's ContextCleaner maintains
+	//a buffer with references to prevent eager cleanup by GC); note that this is an
+	//overestimate, because we maintain partitioned broadcasts as soft references, which
 	//might be collected by the GC and subsequently cleaned up by Spark's ContextCleaner.
 	private static final AtomicLong _refBCs = new AtomicLong(0);
 
@@ -159,16 +159,16 @@ public enum CacheStatus {
 
 	/**
 	 * The unique (JVM-wide) ID of a cacheable data object; to ensure unique IDs across JVMs, we
-	 * concatenate filenames with a unique prefix (map task ID). 
+	 * concatenate filenames with a unique prefix (map task ID).
 	 */
 	private final long _uniqueID;
-	
+
 	/** The cache status of the data blob (whether it can be or is evicted, etc. */
 	private CacheStatus _cacheStatus = null;
-	
+
 	/** Cache for actual data, evicted by garbage collector. */
 	protected SoftReference<T> _cache = null;
-	
+
 	/** Container object that holds the actual data. */
 	protected T _data = null;
 
@@ -177,47 +177,47 @@ public enum CacheStatus {
 	 * includes: 1) Matrix dimensions, if available 2) Number of non-zeros, if
 	 * available 3) Block dimensions, if applicable 4) InputInfo -- subsequent
 	 * operations that use this Matrix expect it to be in this format.
-	 * 
+	 *
 	 * When the matrix is written to HDFS (local file system, as well?), one
 	 * must get the OutputInfo that matches with InputInfo stored inside _mtd.
 	 */
 	protected MetaData _metaData = null;
-	
+
 	protected FederationMap _fedMapping = null;
 
 	protected boolean _compressed = false;
 
 	protected long _compressedSize = -1;
-	
+
 	/** The name of HDFS file in which the data is backed up. */
 	protected String _hdfsFileName = null; // file name and path
 	protected boolean _isPRead = false; //persistent read, must not be deleted
-	
-	/** 
-	 * Flag that indicates whether or not hdfs file exists.It is used 
-	 * for improving the performance of "rmvar" instruction. When it has 
-	 * value <code>false</code>, one can skip file system existence 
+
+	/**
+	 * Flag that indicates whether or not hdfs file exists.It is used
+	 * for improving the performance of "rmvar" instruction. When it has
+	 * value <code>false</code>, one can skip file system existence
 	 * checks which can be expensive.
 	 */
-	private boolean _hdfsFileExists = false; 
+	private boolean _hdfsFileExists = false;
 
 	/** Information relevant to specific external file formats. */
 	private FileFormatProperties _formatProps = null;
-	
+
 	/**
 	 * <code>true</code> if the in-memory or evicted matrix may be different from
 	 * the matrix located at {@link #_hdfsFileName}; <code>false</code> if the two
 	 * matrices should be the same.
 	 */
 	private boolean _dirtyFlag = false;
-	
+
 	// additional private flags and meta data
 	private int     _numReadThreads = 0;   //number of threads for read from HDFS
-	private boolean _cleanupFlag = true;   //flag if obj unpinned (cleanup enabled)	
+	private boolean _cleanupFlag = true;   //flag if obj unpinned (cleanup enabled)
 	private String  _cacheFileName = null; //local eviction file name
 	private boolean _requiresLocalWrite = false; //flag if local write for read obj
-	private boolean _isAcquireFromEmpty = false; //flag if read from status empty 
-	
+	private boolean _isAcquireFromEmpty = false; //flag if read from status empty
+
 	//backend-specific handles
 	//note: we use the abstraction of LineageObjects for two reasons: (1) to keep track of cleanup
 	//for lazily evaluated RDDs, and (2) as abstraction for environments that do not necessarily have spark libraries available
@@ -225,13 +225,13 @@ public enum CacheStatus {
 	private BroadcastObject<T> _bcHandle = null; //Broadcast handle
 	protected HashMap<GPUContext, GPUObject> _gpuObjects = null; //Per GPUContext object allocated on GPU
 	//TODO generalize for frames
-	private OOCStreamable<IndexedMatrixValue> _streamHandle = null;
-	
+	private LocalTaskQueue<IndexedMatrixValue> _streamHandle = null;
+
 	private LineageItem _lineage = null;
-	
+
 	/**
 	 * Basic constructor for any cacheable data.
-	 * 
+	 *
 	 * @param dt data type
 	 * @param vt value type
 	 */
@@ -242,28 +242,28 @@ protected CacheableData(DataType dt, ValueType vt) {
 		_numReadThreads = 0;
 		_gpuObjects = DMLScript.USE_ACCELERATOR ? new HashMap<>() : null;
 	}
-	
+
 	/**
 	 * Copy constructor for cacheable data (of same type).
-	 * 
+	 *
 	 * @param that cacheable data object
 	 */
 	protected CacheableData(CacheableData<T> that) {
 		this( that.getDataType(), that.getValueType() );
 		_cleanupFlag = that._cleanupFlag;
 		_hdfsFileName = that._hdfsFileName;
-		_hdfsFileExists = that._hdfsFileExists; 
+		_hdfsFileExists = that._hdfsFileExists;
 		_gpuObjects = that._gpuObjects;
 		_dirtyFlag = that._dirtyFlag;
 		_compressed = that._compressed;
 		_compressedSize = that._compressedSize;
 		_fedMapping = that._fedMapping;
 	}
-	
+
 	/**
-	 * Enables or disables the cleanup of the associated 
+	 * Enables or disables the cleanup of the associated
 	 * data object on clearData().
-	 * 
+	 *
 	 * @param flag true if cleanup
 	 */
 	public void enableCleanup(boolean flag) {
@@ -271,15 +271,15 @@ public void enableCleanup(boolean flag) {
 	}
 
 	/**
-	 * Indicates if cleanup of the associated data object 
+	 * Indicates if cleanup of the associated data object
 	 * is enabled on clearData().
-	 * 
+	 *
 	 * @return true if cleanup enabled
 	 */
 	public boolean isCleanupEnabled() {
 		return _cleanupFlag;
 	}
-	
+
 	public CacheStatus getStatus() {
 		return _cacheStatus;
 	}
@@ -295,15 +295,15 @@ public void setHDFSFileExists( boolean flag )  {
 	public String getFileName() {
 		return _hdfsFileName;
 	}
-	
+
 	public boolean isPersistentRead() {
 		return _isPRead;
 	}
-	
+
 	public void setPersistentRead(boolean pread) {
 		_isPRead = pread;
 	}
-	
+
 	public long getUniqueID() {
 		return _uniqueID;
 	}
@@ -314,12 +314,12 @@ public synchronized void setFileName( String file ) {
 				_dirtyFlag = true;
 		_hdfsFileName = file;
 	}
-	
+
 	/**
 	 * <code>true</code> if the in-memory or evicted matrix may be different from
 	 * the matrix located at {@link #_hdfsFileName}; <code>false</code> if the two
 	 * matrices are supposed to be the same.
-	 * 
+	 *
 	 * @return true if dirty
 	 */
 	public boolean isDirty() {
@@ -337,7 +337,7 @@ public FileFormatProperties getFileFormatProperties() {
 	public void setFileFormatProperties(FileFormatProperties props) {
 		_formatProps = props;
 	}
-	
+
 	@Override
 	public void setMetaData(MetaData md) {
 		_metaData = md;
@@ -351,7 +351,7 @@ public void setCompressedSize(long size){
 	public boolean isCompressed(){
 		return _compressed;
 	}
-	
+
 	public long getCompressedSize(){
 		return _compressedSize;
 	}
@@ -365,7 +365,7 @@ public MetaData getMetaData() {
 	public void removeMetaData() {
 		_metaData = null;
 	}
-	
+
 	public DataCharacteristics getDataCharacteristics() {
 		return _metaData.getDataCharacteristics();
 	}
@@ -381,11 +381,11 @@ public long getNumRows() {
 	public long getNumColumns() {
 		return getDataCharacteristics().getCols();
 	}
-	
+
 	public int getBlocksize() {
 		return getDataCharacteristics().getBlocksize();
 	}
-	
+
 	public abstract void refreshMetaData();
 
 	public LineageItem getCacheLineage() {
@@ -419,15 +419,15 @@ public boolean isFederated() {
 		}
 		return _fedMapping != null;
 	}
-	
+
 	public boolean isFederated(FType type) {
 		return isFederated() && (type == null || _fedMapping.getType().isType(type));
 	}
-	
+
 	public boolean isFederatedExcept(FType type) {
 		return isFederated() && !isFederated(type);
 	}
-	
+
 	/**
 	 * Gets the mapping of indices ranges to federated objects.
 	 * @return fedMapping mapping
@@ -435,7 +435,7 @@ public boolean isFederatedExcept(FType type) {
 	public FederationMap getFedMapping() {
 		return _fedMapping;
 	}
-	
+
 	/**
 	 * Sets the mapping of indices ranges to federated objects.
 	 * @param fedMapping mapping
@@ -443,7 +443,7 @@ public FederationMap getFedMapping() {
 	public void setFedMapping(FederationMap fedMapping) {
 		_fedMapping = fedMapping;
 	}
-	
+
 	public RDDObject getRDDHandle() {
 		return _rddHandle;
 	}
@@ -452,7 +452,7 @@ public void setRDDHandle( RDDObject rdd ) {
 		//cleanup potential old back reference
 		if( _rddHandle != null )
 			_rddHandle.setBackReference(null);
-		
+
 		//add new rdd handle
 		_rddHandle = rdd;
 		if( _rddHandle != null )
@@ -462,7 +462,7 @@ public void setRDDHandle( RDDObject rdd ) {
 	public boolean hasRDDHandle() {
 		return _rddHandle != null && _rddHandle.hasBackReference();
 	}
-	
+
 	public BroadcastObject<T> getBroadcastHandle() {
 		return _bcHandle;
 	}
@@ -470,44 +470,17 @@ public BroadcastObject<T> getBroadcastHandle() {
 	public boolean hasBroadcastHandle() {
 		return  _bcHandle != null && _bcHandle.hasBackReference();
 	}
-	
-	public OOCStream<IndexedMatrixValue> getStreamHandle() {
-		if( !hasStreamHandle() ) {
-			final SubscribableTaskQueue<IndexedMatrixValue> _mStream = new SubscribableTaskQueue<>();
-			_streamHandle = _mStream;
-			DataCharacteristics dc = getDataCharacteristics();
-			MatrixBlock src = (MatrixBlock)acquireReadAndRelease();
-			LongStream.range(0, dc.getNumBlocks())
-				.mapToObj(i -> UtilFunctions.createIndexedMatrixBlock(src, dc, i))
-				.forEach( blk -> {
-					try{ 
-						_mStream.enqueue(blk);
-					}
-					catch(Exception ex) {
-						throw ex instanceof DMLRuntimeException ? (DMLRuntimeException) ex : new DMLRuntimeException(ex);
-				}});
-			_mStream.closeInput();
-		}
-		
-		return _streamHandle.getReadStream();
+
+	public LocalTaskQueue<IndexedMatrixValue> getStreamHandle() {
+		return _streamHandle;
 	}
-	
-	/**
-	 * Probes if stream handle is existing, because <code>getStreamHandle</code>
-	 * creates a new stream if not existing.
-	 * 
-	 * @return true if existing, false otherwise
-	 */
-	public boolean hasStreamHandle() {
-		return _streamHandle != null && !_streamHandle.isProcessed();
-	} 
 
 	@SuppressWarnings({ "rawtypes", "unchecked" })
 	public void setBroadcastHandle( BroadcastObject bc ) {
 		//cleanup potential old back reference
 		if( _bcHandle != null )
 			_bcHandle.setBackReference(null);
-			
+
 		//add new broadcast handle
 		_bcHandle = bc;
 		if( _bcHandle != null )
@@ -527,15 +500,15 @@ public synchronized void setGPUObject(GPUContext gCtx, GPUObject gObj) {
 		if (old != null)
 				throw new DMLRuntimeException("GPU : Inconsistent internal state - this CacheableData already has a GPUObject assigned to the current GPUContext (" + gCtx + ")");
 	}
-	
+
 	public synchronized void removeGPUObject(GPUContext gCtx) {
 		_gpuObjects.remove(gCtx);
 	}
 
-	public synchronized void setStreamHandle(OOCStreamable<IndexedMatrixValue> q) {
+	public synchronized void setStreamHandle(LocalTaskQueue<IndexedMatrixValue> q) {
 		_streamHandle = q;
 	}
-	
+
 	// *********************************************
 	// ***                                       ***
 	// ***    HIGH-LEVEL METHODS THAT SPECIFY    ***
@@ -548,38 +521,38 @@ public T acquireReadAndRelease() {
 		release();
 		return tmp;
 	}
-	
+
 	/**
 	 * Acquires a shared "read-only" lock, produces the reference to the cache block,
 	 * restores the cache block to main memory, reads from HDFS if needed.
-	 * 
+	 *
 	 * Synchronized because there might be parallel threads (parfor local) that
 	 * access the same object (in case it was created before the loop).
-	 * 
+	 *
 	 * In-Status:  EMPTY, EVICTABLE, EVICTED, READ;
 	 * Out-Status: READ(+1).
-	 * 
+	 *
 	 * @return cacheable data
 	 */
 	public T acquireRead() {
 		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
-		
+
 		//core internal acquire (synchronized per object)
 		T ret = acquireReadIntern();
-		
+
 		//update thread-local status (after pin but outside the
 		//critical section of accessing a shared object)
 		if( !isBelowCachingThreshold() )
 			updateStatusPinned(true);
-		
+
 		if( DMLScript.STATISTICS ){
 			long t1 = System.nanoTime();
 			CacheStatistics.incrementAcquireRTime(t1-t0);
 		}
-		
+
 		return ret;
 	}
-	
+
 	private synchronized T acquireReadIntern() {
 		if ( !isAvailableToRead() )
 			throw new DMLRuntimeException("MatrixObject not available to read.");
@@ -591,7 +564,7 @@ private synchronized T acquireReadIntern() {
 		if (OptimizerUtils.isUMMEnabled())
 			//track and make space in the UMM
 			UnifiedMemoryManager.pin(this);
-		
+
 		//call acquireHostRead if gpuHandle is set as well as is allocated
 		if( DMLScript.USE_ACCELERATOR && _gpuObjects != null ) {
 			boolean copiedFromGPU = false;
@@ -606,7 +579,7 @@ else if (gObj != null) {
 				}
 			}
 		}
-		
+
 		//read data from HDFS/RDD if required
 		//(probe data for cache_nowrite / jvm_reuse)
 		if( _data==null && ( isEmpty(true) || hasValidLineage() )) {
@@ -625,20 +598,20 @@ && getRDDHandle() == null) ) {
 					//mark for initial local write despite read operation
 					_requiresLocalWrite = false;
 				}
-				else if( hasStreamHandle() ) {
-					_data = readBlobFromStream( getStreamHandle().toLocalTaskQueue() );
+				else if( getStreamHandle() != null ) {
+					_data = readBlobFromStream( getStreamHandle() );
 				}
 				else if( getRDDHandle()==null || getRDDHandle().allowsShortCircuitRead() ) {
 					if( DMLScript.STATISTICS )
 						CacheStatistics.incrementHDFSHits();
-					
+
 					//check filename
 					if( _hdfsFileName == null )
 						throw new DMLRuntimeException("Cannot read matrix for empty filename.");
-					
+
 					//read cacheable data from hdfs
 					_data = readBlobFromHDFS( _hdfsFileName );
-					
+
 					//mark for initial local write despite read operation
 					_requiresLocalWrite = false;
 				}
@@ -646,11 +619,11 @@ else if( getRDDHandle()==null || getRDDHandle().allowsShortCircuitRead() ) {
 					//read matrix from rdd (incl execute pending rdd operations)
 					MutableBoolean writeStatus = new MutableBoolean();
 					_data = readBlobFromRDD( getRDDHandle(), writeStatus );
-					
+
 					//mark for initial local write (prevent repeated execution of rdd operations)
 					_requiresLocalWrite = !writeStatus.booleanValue();
 				}
-				
+
 				setDirty(false);
 			}
 			catch (IOException e) {
@@ -667,7 +640,7 @@ else if( _data!=null && DMLScript.STATISTICS ) {
 
 		return _data;
 	}
-	
+
 	/**
 	 * Acquires the exclusive "write" lock for a thread that wants to throw away the
 	 * old cache block data and link up with new cache block data. Abandons the old data
@@ -675,93 +648,93 @@ else if( _data!=null && DMLScript.STATISTICS ) {
 
 	 * In-Status:  EMPTY, EVICTABLE, EVICTED;
 	 * Out-Status: MODIFY.
-	 * 
+	 *
 	 * @param newData new data
 	 * @return cacheable data
 	 */
 	public T acquireModify(T newData) {
 		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
-		
+
 		//core internal acquire (synchronized per object)
 		T ret = acquireModifyIntern(newData);
-		
+
 		//update thread-local status (after pin but outside the
 		//critical section of accessing a shared object)
 		if( !isBelowCachingThreshold() )
 			updateStatusPinned(true);
-		
+
 		if( DMLScript.STATISTICS ){
 			long t1 = System.nanoTime();
 			CacheStatistics.incrementAcquireMTime(t1-t0);
 			if (DMLScript.JMLC_MEM_STATISTICS)
 				Statistics.addCPMemObject(System.identityHashCode(this), getDataSize());
 		}
-		
+
 		if(newData instanceof CompressedMatrixBlock) {
 			setCompressedSize(newData.getInMemorySize());
 		}
 
 		return ret;
 	}
-	
+
 	private synchronized T acquireModifyIntern(T newData) {
 		if (! isAvailableToModify ())
 			throw new DMLRuntimeException("CacheableData not available to modify.");
-		
+
 		//clear old data
 		clearData();
-		
+
 		//cache status maintenance
 		acquire (true, false); //no need to load evicted matrix
-		
+
 		setDirty(true);
 		_isAcquireFromEmpty = false;
-		
+
 		//set references to new data
 		if (newData == null)
 			throw new DMLRuntimeException("acquireModify with empty cache block.");
 		return _data = newData;
 	}
-	
+
 	/**
 	 * Releases the shared ("read-only") or exclusive ("write") lock.  Updates
 	 * size information, last-access time, metadata, etc.
-	 * 
+	 *
 	 * Synchronized because there might be parallel threads (parfor local) that
 	 * access the same object (in case it was created before the loop).
-	 * 
+	 *
 	 * In-Status:  READ, MODIFY;
 	 * Out-Status: READ(-1), EVICTABLE, EMPTY.
-	 * 
+	 *
 	 */
 	public void release() {
 		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
-		
+
 		//update thread-local status (before unpin but outside
 		//the critical section of accessing a shared object)
 		if( !isBelowCachingThreshold() )
 			updateStatusPinned(false);
-		
+
 		//core internal release (synchronized per object)
 		releaseIntern();
-		
+
 		if( DMLScript.STATISTICS ){
 			long t1 = System.nanoTime();
 			CacheStatistics.incrementReleaseTime(t1-t0);
 		}
 	}
-	
+
 	private synchronized void releaseIntern() {
 		boolean write = false;
 		if ( isModify() ) {
 			//set flags for write
 			write = true;
 			setDirty(true);
-			
+
 			//update meta data
 			refreshMetaData();
-			
-			//compact empty in-memory block 
+
+			//compact empty in-memory block
 			_data.compactEmptyBlock();
 		}
 
@@ -771,7 +744,7 @@ private synchronized void releaseIntern() {
 
 		//cache status maintenance (pass cacheNoWrite flag)
 		release(_isAcquireFromEmpty && !_requiresLocalWrite);
-		
+
 		if( isCachingActive() //only if caching is enabled (otherwise keep everything in mem)
 			&& isCached(true) //not empty and not read/modify
 			&& !isBelowCachingThreshold() ) //min size for caching
@@ -793,39 +766,39 @@ && isCached(true) //not empty and not read/modify
 
 			if( DMLScript.STATISTICS && write && hasValidLineage() )
 				CacheStatistics.incrementLinWrites();
-			
+
 			//create cache
 			createCache();
 			_data = null;
 		}
 	}
-	
+
 	public void clearData() {
 		clearData(-1);
 	}
-	
+
 	/**
 	 * Sets the cache block reference to <code>null</code>, abandons the old block.
 	 * Makes the "envelope" empty.  Run it to finalize the object (otherwise the
 	 * evicted cache block file may remain undeleted).
-	 * 
+	 *
 	 * In-Status:  EMPTY, EVICTABLE, EVICTED;
 	 * Out-Status: EMPTY.
-	 * 
+	 *
 	 * @param tid thread ID
-	 * 
+	 *
 	 */
-	public synchronized void clearData(long tid) 
+	public synchronized void clearData(long tid)
 	{
-		// check if cleanup enabled and possible 
-		if( !isCleanupEnabled() ) 
+		// check if cleanup enabled and possible
+		if( !isCleanupEnabled() )
 			return; // do nothing
 		if( !isAvailableToModify() )
 			throw new DMLRuntimeException("CacheableData (" + getDebugName() + ") not available to "
 					+ "modify. Status = " + _cacheStatus.name() + ".");
-		
+
 		// clear existing WB / FS representation (but prevent unnecessary probes)
-		if( !(isEmpty(true)||(_data!=null && isBelowCachingThreshold()) 
+		if( !(isEmpty(true)||(_data!=null && isBelowCachingThreshold())
 			  ||(_data!=null && !isCachingActive()) )) //additional condition for JMLC
 			freeEvictedBlob();
 
@@ -833,7 +806,7 @@ public synchronized void clearData(long tid)
 		_data = null;
 		clearCache();
 		setCacheLineage(null);
-		
+
 		// clear rdd/broadcast back refs
 		if( _rddHandle != null )
 			_rddHandle.setBackReference(null);
@@ -845,11 +818,11 @@ public synchronized void clearData(long tid)
 					gObj.clearData(null, DMLScript.EAGER_CUDA_FREE);
 				}
 		}
-		
+
 		//clear federated matrix
 		if( _fedMapping != null )
 			_fedMapping.execCleanup(tid, _fedMapping.getID());
-		
+
 		// change object state EMPTY
 		setDirty(false);
 		setEmpty();
@@ -858,13 +831,13 @@ public synchronized void clearData(long tid)
 	public synchronized void exportData() {
 		exportData( -1 );
 	}
-	
+
 	/**
 	 * Writes, or flushes, the cache block data to HDFS.
-	 * 
+	 *
 	 * In-Status:  EMPTY, EVICTABLE, EVICTED, READ;
 	 * Out-Status: EMPTY, EVICTABLE, EVICTED, READ.
-	 * 
+	 *
 	 * @param replication ?
 	 */
 	public synchronized void exportData( int replication ) {
@@ -878,18 +851,18 @@ public synchronized void exportData(String fName, String outputFormat) {
 	public synchronized void exportData(String fName, String outputFormat, FileFormatProperties formatProperties) {
 		exportData(fName, outputFormat, -1, formatProperties);
 	}
-	
+
 	/**
 	 * Synchronized because there might be parallel threads (parfor local) that
 	 * access the same object (in case it was created before the loop).
 	 * If all threads export the same data object concurrently it results in errors
 	 * because they all write to the same file. Efficiency for loops and parallel threads
 	 * is achieved by checking if the in-memory block is dirty.
-	 * 
+	 *
 	 * NOTE: MB: we do not use dfs copy from local (evicted) to HDFS because this would ignore
 	 * the output format and most importantly would bypass reblocking during write (which effects the
-	 * potential degree of parallelism). However, we copy files on HDFS if certain criteria are given.  
-	 * 
+	 * potential degree of parallelism). However, we copy files on HDFS if certain criteria are given.
+	 *
 	 * @param fName file name
 	 * @param outputFormat format
 	 * @param replication ?
@@ -905,7 +878,7 @@ public synchronized void exportData (String fName, String outputFormat, int repl
 
 		if( LOG.isTraceEnabled() )
 			LOG.trace("Exporting " + this.getDebugName() + " to " + fName + " in format " + outputFormat);
-		
+
 		if( DMLScript.USE_ACCELERATOR && _gpuObjects != null ) {
 			boolean copiedFromGPU = false;
 			for (Map.Entry<GPUContext, GPUObject> kv : _gpuObjects.entrySet()) {
@@ -919,12 +892,12 @@ public synchronized void exportData (String fName, String outputFormat, int repl
 				}
 			}
 		}
-		
+
 		//check for persistent or transient writes
 		boolean pWrite = !fName.equals(_hdfsFileName);
 		if( !pWrite )
 			setHDFSFileExists(true);
-		
+
 		//check for common file scheme (otherwise no copy/rename)
 		int blen = (formatProperties == null) ?
 			ConfigurationManager.getBlocksize() : formatProperties.getBlocksize();
@@ -933,7 +906,7 @@ public synchronized void exportData (String fName, String outputFormat, int repl
 		boolean eqFormat = isEqualOutputFormat(outputFormat);
 		boolean eqBlksize = (getBlocksize() != blen)
 			&& (outputFormat == null || outputFormat.equals("binary"));
-		
+
 		//actual export (note: no direct transfer of local copy in order to ensure blocking (and hence, parallelism))
 		if( isDirty() || !eqScheme || isFederated() ||
 			(pWrite && (!eqFormat | !eqBlksize)) )
@@ -957,7 +930,7 @@ public synchronized void exportData (String fName, String outputFormat, int repl
 				if( isEmpty(true) && !federatedWrite)
 				{
 					//read data from HDFS if required (never read before), this applies only to pWrite w/ different output formats
-					//note: for large rdd outputs, we compile dedicated writespinstructions (no need to handle this here) 
+					//note: for large rdd outputs, we compile dedicated writespinstructions (no need to handle this here)
 					try {
 						if( getRDDHandle()==null || getRDDHandle().allowsShortCircuitRead() )
 							_data = readBlobFromHDFS( _hdfsFileName );
@@ -972,15 +945,15 @@ else if(!federatedWrite)
 						throw new DMLRuntimeException("Reading of " + _hdfsFileName + " ("+hashCode()+") failed.", e);
 					}
 				}
-				
+
 				//get object from cache
 				if(!federatedWrite) {
 					if( _data == null )
 						getCache();
 					acquire( false, _data==null ); //incl. read matrix if evicted
 				}
-	
-				// b) write the matrix 
+
+				// b) write the matrix
 				try {
 					writeMetaData( fName, outputFormat, formatProperties );
 					writeBlobToHDFS( fName, outputFormat, replication, formatProperties );
@@ -1014,7 +987,7 @@ else if( pWrite ) // pwrite with same output format
 			}
 		}
 		else if( getRDDHandle()!=null && getRDDHandle().isPending()
-			&& !getRDDHandle().isHDFSFile() 
+			&& !getRDDHandle().isHDFSFile()
 			&& !getRDDHandle().allowsShortCircuitRead() )
 		{
 			//CASE 3: pending rdd operation (other than checkpoints)
@@ -1031,25 +1004,25 @@ else if( getRDDHandle()!=null && getRDDHandle().isPending()
 				throw new DMLRuntimeException("Export to " + fName + " failed.", e);
 			}
 		}
-		else 
+		else
 		{
 			//CASE 4: data already in hdfs (do nothing, no need for export)
 			if( LOG.isTraceEnabled() )
 				LOG.trace(this.getDebugName() + ": Skip export to hdfs since data already exists.");
 		}
-		
+
 		_hdfsFileExists = true;
 		if( DMLScript.STATISTICS ){
 			long t1 = System.nanoTime();
 			CacheStatistics.incrementExportTime(t1-t0);
 		}
 	}
-	
+
 	// --------- ABSTRACT LOW-LEVEL CACHE I/O OPERATIONS ----------
 
 	/**
 	 * Checks if the data blob reference points to some in-memory object.
-	 * This method is called when releasing the (last) lock. Do not call 
+	 * This method is called when releasing the (last) lock. Do not call
 	 * this method for a blob that has been evicted.
 	 *
 	 * @return <code>true</code> if the blob is in main memory and the
@@ -1068,11 +1041,11 @@ protected boolean isBlobPresent() {
 	protected void restoreBlobIntoMemory() {
 		String cacheFilePathAndName = getCacheFilePathAndName();
 		long begin = LOG.isTraceEnabled() ? System.currentTimeMillis() : 0;
-		
+
 		if( LOG.isTraceEnabled() )
-			LOG.trace ("CACHE: Restoring matrix...  " + hashCode() + "  HDFS path: " + 
+			LOG.trace ("CACHE: Restoring matrix...  " + hashCode() + "  HDFS path: " +
 						(_hdfsFileName == null ? "null" : _hdfsFileName) + ", Restore from path: " + cacheFilePathAndName);
-				
+
 		if (_data != null)
 			throw new DMLRuntimeException(cacheFilePathAndName + " : Cannot restore on top of existing in-memory data.");
 
@@ -1080,20 +1053,20 @@ protected void restoreBlobIntoMemory() {
 			_data = readBlobFromCache(cacheFilePathAndName);
 		}
 		catch (IOException e) {
-			throw new DMLRuntimeException(cacheFilePathAndName + " : Restore failed.", e);	
+			throw new DMLRuntimeException(cacheFilePathAndName + " : Restore failed.", e);
 		}
-		
+
 		//check for success
 		if (_data == null)
 			throw new DMLRuntimeException (cacheFilePathAndName + " : Restore failed.");
-		
+
 		if( LOG.isTraceEnabled() )
 			LOG.trace("Restoring matrix - COMPLETED ... " + (System.currentTimeMillis()-begin) + " msec.");
 	}
 
 	protected abstract T readBlobFromCache(String fname)
 		throws IOException;
-	
+
 	/**
 	 * Low-level cache I/O method that deletes the file containing the
 	 * evicted data blob, without reading it.
@@ -1103,16 +1076,16 @@ public final void freeEvictedBlob() {
 		String cacheFilePathAndName = getCacheFilePathAndName();
 		long begin = LOG.isTraceEnabled() ? System.currentTimeMillis() : 0;
 		if( LOG.isTraceEnabled() )
-			LOG.trace("CACHE: Freeing evicted matrix...  " + hashCode() + "  HDFS path: " + 
+			LOG.trace("CACHE: Freeing evicted matrix...  " + hashCode() + "  HDFS path: " +
 				(_hdfsFileName == null ? "null" : _hdfsFileName) + " Eviction path: " + cacheFilePathAndName);
-		
+
 		if(isCachingActive()) {
 			if (OptimizerUtils.isUMMEnabled())
 				UnifiedMemoryManager.deleteBlock(cacheFilePathAndName);
 			else
 				LazyWriteBuffer.deleteBlock(cacheFilePathAndName);
 		}
-		
+
 		if( LOG.isTraceEnabled() )
 			LOG.trace("Freeing evicted matrix - COMPLETED ... " + (System.currentTimeMillis()-begin) + " msec.");
 	}
@@ -1120,7 +1093,7 @@ public final void freeEvictedBlob() {
 	protected boolean isBelowCachingThreshold() {
 		return (_data.getInMemorySize() <= CACHING_THRESHOLD);
 	}
-	
+
 	public static boolean isBelowCachingThreshold(CacheBlock<?> data) {
 		boolean ret;
 		if (OptimizerUtils.isUMMEnabled())
@@ -1129,11 +1102,11 @@ public static boolean isBelowCachingThreshold(CacheBlock<?> data) {
 			ret = LazyWriteBuffer.getCacheBlockSize(data) <= CACHING_THRESHOLD;
 		return ret;
 	}
-	
+
 	public long getDataSize() {
 		return (_data != null) ?_data.getInMemorySize() : 0;
 	}
-	
+
 	protected ValueType[] getSchema() {
 		return null;
 	}
@@ -1141,8 +1114,8 @@ protected ValueType[] getSchema() {
 	@Override //Data
 	public synchronized String getDebugName() {
 		int maxLength = 23;
-		String debugNameEnding = (_hdfsFileName == null ? "null" : 
-			(_hdfsFileName.length() < maxLength ? _hdfsFileName : "..." + 
+		String debugNameEnding = (_hdfsFileName == null ? "null" :
+			(_hdfsFileName.length() < maxLength ? _hdfsFileName : "..." +
 				_hdfsFileName.substring (_hdfsFileName.length() - maxLength + 3)));
 		return hashCode() + " " + debugNameEnding;
 	}
@@ -1172,7 +1145,7 @@ protected T readBlobFromFederated(FederationMap fedMap) throws IOException {
 		DataCharacteristics dc = iimd.getDataCharacteristics();
 		return readBlobFromFederated(fedMap, dc.getDims());
 	}
-	
+
 	protected abstract T readBlobFromFederated(FederationMap fedMap, long[] dims)
 		throws IOException;
 
@@ -1181,22 +1154,22 @@ protected abstract void writeBlobToHDFS(String fname, String ofmt, int rep, File
 
 	protected abstract long writeStreamToHDFS(String fname, String ofmt, int rep, FileFormatProperties fprop)
 		throws IOException;
-	
+
 	protected abstract void writeBlobFromRDDtoHDFS(RDDObject rdd, String fname, String ofmt)
 		throws IOException;
 
 	protected abstract T reconstructByLineage(LineageItem li)
 		throws IOException;
 
-	
+
 	protected void writeMetaData (String filePathAndName, String outputFormat, FileFormatProperties formatProperties)
 		throws IOException
-	{	
+	{
 		MetaDataFormat iimd = (MetaDataFormat) _metaData;
-	
+
 		if (iimd == null)
 			throw new DMLRuntimeException("Unexpected error while writing mtd file (" + filePathAndName + ") -- metadata is null.");
-		
+
 		// Write the matrix to HDFS in requested format
 		FileFormat fmt = (outputFormat != null) ? FileFormat.safeValueOf(outputFormat) : iimd.getFileFormat();
 		if ( fmt != FileFormat.MM ) {
@@ -1204,15 +1177,15 @@ protected void writeMetaData (String filePathAndName, String outputFormat, FileF
 			DataCharacteristics dc = iimd.getDataCharacteristics();
 			if( formatProperties != null && formatProperties.knownBlocksize() )
 				dc.setBlocksize(formatProperties.getBlocksize());
-			
+
 			// when outputFormat is binaryblock, make sure that matrixCharacteristics has correct blocking dimensions
-			// note: this is only required if singlenode (due to binarycell default) 
+			// note: this is only required if singlenode (due to binarycell default)
 			if ( fmt == FileFormat.BINARY && DMLScript.getGlobalExecMode() == ExecMode.SINGLE_NODE
 				&& dc.getBlocksize() != ConfigurationManager.getBlocksize() )
 			{
 				dc = new MatrixCharacteristics(dc.getRows(), dc.getCols(), dc.getBlocksize(), dc.getNonZeros());
 			}
-			
+
 			//write the actual meta data file
 			HDFSTool.writeMetaDataFile (filePathAndName + ".mtd",
 				valueType, getSchema(), dataType, dc, fmt, formatProperties);
@@ -1226,9 +1199,9 @@ protected boolean isEqualOutputFormat(String outputFormat) {
 		}
 		return true;
 	}
-	
+
 	// ------------- IMPLEMENTED CACHE LOGIC METHODS --------------
-	
+
 	protected String getCacheFilePathAndName () {
 		if( _cacheFileName==null ) {
 			StringBuilder sb = new StringBuilder();
@@ -1238,15 +1211,15 @@ protected String getCacheFilePathAndName () {
 			sb.append(CacheableData.CACHING_EVICTION_FILEEXTENSION);
 			_cacheFileName = sb.toString();
 		}
-		
+
 		return _cacheFileName;
 	}
-	
+
 	/**
 	 * This method "acquires the lock" to ensure that the data blob is in main memory
 	 * (not evicted) while it is being accessed.  When called, the method will try to
 	 * restore the blob if it has been evicted.  There are two kinds of locks it may
-	 * acquire: a shared "read" lock (if the argument is <code>false</code>) or the 
+	 * acquire: a shared "read" lock (if the argument is <code>false</code>) or the
 	 * exclusive "modify" lock (if the argument is <code>true</code>).
 	 * The method can fail in three ways:
 	 * (1) if there is lock status conflict;
@@ -1256,9 +1229,9 @@ protected String getCacheFilePathAndName () {
 	 * its last-access timestamp.  For the shared "read" lock, acquiring a new lock
 	 * increments the associated count.  The "read" count has to be decremented once
 	 * the blob is no longer used, which may re-enable eviction.  This method has to
-	 * be called only once per matrix operation and coupled with {@link #release()}, 
+	 * be called only once per matrix operation and coupled with {@link #release()},
 	 * because it increments the lock count and the other method decrements this count.
-	 * 
+	 *
 	 * @param isModify : <code>true</code> for the exclusive "modify" lock,
 	 *     <code>false</code> for a shared "read" lock.
 	 * @param restore true if restore
@@ -1290,7 +1263,7 @@ protected void acquire (boolean isModify, boolean restore) {
 			LOG.trace("Acquired lock on " + getDebugName() + ", status: " + _cacheStatus.name() );
 	}
 
-	
+
 	/**
 	 * Call this method to permit eviction for the stored data blob, or to
 	 * decrement its "read" count if it is "read"-locked by other threads.
@@ -1300,7 +1273,7 @@ protected void acquire (boolean isModify, boolean restore) {
 	 * called only once per process and coupled with {@link #acquire(boolean, boolean)},
 	 * because it decrements the lock count and the other method increments
 	 * the lock count.
-	 * 
+	 *
 	 * @param cacheNoWrite ?
 	 */
 	protected void release(boolean cacheNoWrite)
@@ -1321,37 +1294,37 @@ protected void release(boolean cacheNoWrite)
 					setEmpty();
 				break;
 		}
-		
+
 		if( LOG.isTraceEnabled() )
 			LOG.trace("Released lock on " + getDebugName() + ", status: " + _cacheStatus.name());
-		
+
 	}
 
-	
+
 	//  **************************************************
 	//  ***                                            ***
 	//  ***  CACHE STATUS FIELD - CLASSES AND METHODS  ***
 	//  ***                                            ***
 	//  **************************************************
-	
+
 	public boolean isCached(boolean inclCachedNoWrite) {
 		return _cacheStatus == CacheStatus.CACHED
 			|| (inclCachedNoWrite && _cacheStatus == CacheStatus.CACHED_NOWRITE);
 	}
-	
+
 	public void setEmptyStatus() {
 		setEmpty();
 	}
-	
+
 	protected boolean isEmpty(boolean inclCachedNoWrite) {
 		return _cacheStatus == CacheStatus.EMPTY
 			|| (inclCachedNoWrite && _cacheStatus == CacheStatus.CACHED_NOWRITE);
 	}
-	
+
 	protected boolean isModify() {
 		return (_cacheStatus == CacheStatus.MODIFY);
 	}
-	
+
 	public boolean isPendingRDDOps() {
 		return isEmpty(true) && _data == null && (_rddHandle != null && _rddHandle.hasBackReference());
 	}
@@ -1364,11 +1337,11 @@ public boolean isDeviceToHostCopy() {
 	protected void setEmpty() {
 		_cacheStatus = CacheStatus.EMPTY;
 	}
-	
+
 	protected void setModify() {
 		_cacheStatus = CacheStatus.MODIFY;
 	}
-	
+
 	protected void setCached() {
 		_cacheStatus = CacheStatus.CACHED;
 	}
@@ -1377,25 +1350,25 @@ protected void addOneRead() {
 		_numReadThreads ++;
 		_cacheStatus = CacheStatus.READ;
 	}
-	
+
 	protected void removeOneRead(boolean doesBlobExist, boolean cacheNoWrite) {
 		_numReadThreads --;
 		if (_numReadThreads == 0) {
 			if( cacheNoWrite )
-				_cacheStatus = (doesBlobExist ? 
+				_cacheStatus = (doesBlobExist ?
 					CacheStatus.CACHED_NOWRITE : CacheStatus.EMPTY);
 			else
-				_cacheStatus = (doesBlobExist ? 
+				_cacheStatus = (doesBlobExist ?
 					CacheStatus.CACHED : CacheStatus.EMPTY);
 		}
 	}
-	
+
 	protected boolean isAvailableToRead() {
 		return (_cacheStatus != CacheStatus.MODIFY);
 	}
-	
+
 	protected boolean isAvailableToModify() {
-		return (   _cacheStatus == CacheStatus.EMPTY 
+		return (   _cacheStatus == CacheStatus.EMPTY
 				|| _cacheStatus == CacheStatus.CACHED
 				|| _cacheStatus == CacheStatus.CACHED_NOWRITE);
 	}
@@ -1406,10 +1379,10 @@ protected boolean isAvailableToModify() {
 	// ***       FOR SOFTREFERENCE CACHE       ***
 	// ***                                     ***
 	// *******************************************
-	
+
 	/**
 	 * Creates a new cache soft reference to the currently
-	 * referenced cache block.  
+	 * referenced cache block.
 	 */
 	protected void createCache( ) {
 		if( _cache == null || _cache.get() == null )
@@ -1425,7 +1398,7 @@ protected void getCache() {
 			_data = _cache.get();
 		}
 	}
-	
+
 	/** Clears the cache soft reference if existing. */
 	protected void clearCache() {
 		if( _cache != null ) {
@@ -1445,39 +1418,39 @@ protected void updateStatusPinned(boolean add) {
 	protected static long getPinnedSize() {
 		return sizePinned.get();
 	}
-	
+
 	public static void addBroadcastSize(long size) {
 		_refBCs.addAndGet(size);
 	}
-	
+
 	public static long getBroadcastSize() {
 		//scale the total sum of all broadcasts by the current fraction
 		//of local memory to equally distribute it across parfor workers
 		return (long) (_refBCs.longValue() *
 			InfrastructureAnalyzer.getLocalMaxMemoryFraction());
 	}
-	
+
 	// --------- STATIC CACHE INIT/CLEANUP OPERATIONS ----------
 
 	public synchronized static void cleanupCacheDir() {
 		//cleanup remaining cached writes
 		LazyWriteBuffer.cleanup();
 		UnifiedMemoryManager.cleanup();
-		
+
 		//delete cache dir and files
 		cleanupCacheDir(true);
 	}
-	
+
 	/**
 	 * Deletes the DML-script-specific caching working dir.
-	 * 
+	 *
 	 * @param withDir if true, delete directory
 	 */
 	public synchronized static void cleanupCacheDir(boolean withDir)
 	{
 		//get directory name
 		String dir = cacheEvictionLocalFilePath;
-		
+
 		//clean files with cache prefix
 		if( dir != null ) //if previous init cache
 		{
@@ -1491,30 +1464,30 @@ public synchronized static void cleanupCacheDir(boolean withDir)
 					fdir.delete(); //deletes dir only if empty
 			}
 		}
-		
+
 		_activeFlag = false;
 	}
-	
+
 	/**
 	 * Inits caching with the default uuid of DMLScript
-	 * 
+	 *
 	 * @throws IOException if IOException occurs
 	 */
-	public synchronized static void initCaching() 
+	public synchronized static void initCaching()
 		throws IOException
 	{
 		initCaching(DMLScript.getUUID());
 	}
-	
+
 	/**
 	 * Creates the DML-script-specific caching working dir.
-	 * 
+	 *
 	 * Takes the UUID in order to allow for custom uuid, e.g., for remote parfor caching
-	 * 
+	 *
 	 * @param uuid ID
 	 * @throws IOException if IOException occurs
 	 */
-	public synchronized static void initCaching( String uuid ) 
+	public synchronized static void initCaching( String uuid )
 		throws IOException
 	{
 		try
@@ -1527,7 +1500,7 @@ public synchronized static void initCaching( String uuid )
 		{
 			throw new IOException(e);
 		}
-	
+
 		if (OptimizerUtils.isUMMEnabled())
 			//init unified memory manager
 			UnifiedMemoryManager.init();
@@ -1542,26 +1515,26 @@ public synchronized static void initCaching( String uuid )
 	public static boolean isCachingActive() {
 		return _activeFlag;
 	}
-	
+
 	public static void disableCaching() {
 		_activeFlag = false;
 	}
-	
+
 	public static void enableCaching() {
 		_activeFlag = true;
 	}
 
 	public synchronized boolean moveData(String fName, String outputFormat) {
 		boolean ret = false;
-		
+
 		try
 		{
 			//check for common file scheme (otherwise no copy/rename)
 			boolean eqScheme = IOUtilFunctions.isSameFileScheme(
 				new Path(_hdfsFileName), new Path(fName));
-			
+
 			//export or rename to target file on hdfs
-			if( isDirty() || !eqScheme || (!isEqualOutputFormat(outputFormat) && isEmpty(true)) 
+			if( isDirty() || !eqScheme || (!isEqualOutputFormat(outputFormat) && isEmpty(true))
 				|| (getRDDHandle()!=null && !HDFSTool.existsFileOnHDFS(_hdfsFileName)) )
 			{
 				exportData(fName, outputFormat);
@@ -1579,7 +1552,7 @@ else if( isEqualOutputFormat(outputFormat) )
 		catch (Exception e) {
 			throw new DMLRuntimeException("Move to " + fName + " failed.", e);
 		}
-		
+
 		return ret;
 	}
 
@@ -1587,7 +1560,7 @@ else if( isEqualOutputFormat(outputFormat) )
 	public String toString() {
 		return toString(false);
 	}
-	
+
 	@Override
 	public String toString(boolean metaOnly) {
 		StringBuilder str = new StringBuilder();
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/CPInstructionParser.java b/src/main/java/org/apache/sysds/runtime/instructions/CPInstructionParser.java
index 92e11b425dd..b44e06ad2d0 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/CPInstructionParser.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/CPInstructionParser.java
@@ -82,92 +82,92 @@ public static CPInstruction parseSingleInstruction (String str ) {
 			throw new DMLRuntimeException("Unable to parse instruction: " + str);
 		return cpinst;
 	}
-	
+
 	public static CPInstruction parseSingleInstruction ( InstructionType cptype, String str ) {
 		ExecType execType;
-		if ( str == null || str.isEmpty() ) 
+		if ( str == null || str.isEmpty() )
 			return null;
 		switch(cptype) {
 			case AggregateUnary:
 				return AggregateUnaryCPInstruction.parseInstruction(str);
-			
+
 			case AggregateBinary:
 				return AggregateBinaryCPInstruction.parseInstruction(str);
-	
+
 			case AggregateTernary:
 				return AggregateTernaryCPInstruction.parseInstruction(str);
-			
+
 			case Unary:
 				return UnaryCPInstruction.parseInstruction(str);
 
 			case Binary:
 				return BinaryCPInstruction.parseInstruction(str);
-			
+
 			case Ternary:
 				return TernaryCPInstruction.parseInstruction(str);
-			
+
 			case Quaternary:
 				return QuaternaryCPInstruction.parseInstruction(str);
-			
+
 			case BuiltinNary:
 				return BuiltinNaryCPInstruction.parseInstruction(str);
-			
+
 			case Ctable:
 				return CtableCPInstruction.parseInstruction(str);
-			
+
 			case Reorg:
 				return ReorgCPInstruction.parseInstruction(str);
-				
+
 			case Dnn:
 				 return DnnCPInstruction.parseInstruction(str);
-				
+
 			case UaggOuterChain:
 				return UaggOuterChainCPInstruction.parseInstruction(str);
-				
+
 			case Reshape:
 				return ReshapeCPInstruction.parseInstruction(str);
-	
+
 			case Append:
 				return AppendCPInstruction.parseInstruction(str);
-			
+
 			case Variable:
 				return VariableCPInstruction.parseInstruction(str);
-				
+
 			case Rand:
 				return DataGenCPInstruction.parseInstruction(str);
 
 			case StringInit:
 				return StringInitCPInstruction.parseInstruction(str);
-				
+
 			case FCall:
 				return FunctionCallCPInstruction.parseInstruction(str);
 
 			case ParameterizedBuiltin:
 				return ParameterizedBuiltinCPInstruction.parseInstruction(str);
-			
+
 			case MultiReturnParameterizedBuiltin:
 				return MultiReturnParameterizedBuiltinCPInstruction.parseInstruction(str);
-		
+
 			case MultiReturnComplexMatrixBuiltin:
 				return MultiReturnComplexMatrixBuiltinCPInstruction.parseInstruction(str);
-				
+
 			case MultiReturnBuiltin:
 				return MultiReturnBuiltinCPInstruction.parseInstruction(str);
-				
+
 			case QSort:
 				return QuantileSortCPInstruction.parseInstruction(str);
-			
+
 			case QPick:
 				return QuantilePickCPInstruction.parseInstruction(str);
-			
+
 			case MatrixIndexing:
-				execType = ExecType.valueOf( str.split(Instruction.OPERAND_DELIM)[0] ); 
+				execType = ExecType.valueOf( str.split(Instruction.OPERAND_DELIM)[0] );
 				if( execType == ExecType.CP )
 					return IndexingCPInstruction.parseInstruction(str);
 				else //exectype CP_FILE
 					return MatrixIndexingCPFileInstruction.parseInstruction(str);
-			
-			case Builtin: 
+
+			case Builtin:
 				String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
 				if(parts[0].equals(Opcodes.LOG.toString()) || parts[0].equals(Opcodes.LOGNZ.toString())) {
 					if(InstructionUtils.isInteger(parts[3])) // B=log(A), y=log(x)
@@ -177,44 +177,44 @@ public static CPInstruction parseSingleInstruction ( InstructionType cptype, Str
 						return BinaryCPInstruction.parseInstruction(str);
 				}
 				throw new DMLRuntimeException("Invalid Builtin Instruction: " + str );
-			
+
 			case MMTSJ:
 				return MMTSJCPInstruction.parseInstruction(str);
-			
+
 			case PMMJ:
 				return PMMJCPInstruction.parseInstruction(str);
-			
+
 			case MMChain:
 				return MMChainCPInstruction.parseInstruction(str);
-			
+
 			case CentralMoment:
 				return CentralMomentCPInstruction.parseInstruction(str);
-	
+
 			case Covariance:
 				return CovarianceCPInstruction.parseInstruction(str);
 
 			case Compression:
 				return CompressionCPInstruction.parseInstruction(str);
-			
+
 			case DeCompression:
 				return DeCompressionCPInstruction.parseInstruction(str);
-			
+
 			case QuantizeCompression:
 				LOG.debug("Parsing Quantize Compress instruction");
-				return CompressionCPInstruction.parseQuantizationFusedInstruction(str);				
+				return CompressionCPInstruction.parseQuantizationFusedInstruction(str);
 
 			case Local:
 				return LocalCPInstruction.parseInstruction(str);
 
 			case SpoofFused:
 				return SpoofCPInstruction.parseInstruction(str);
-				
+
 			case Sql:
 				return SqlCPInstruction.parseInstruction(str);
-				
+
 			case Prefetch:
 				return PrefetchCPInstruction.parseInstruction(str);
-				
+
 			case Broadcast:
 				return BroadcastCPInstruction.parseInstruction(str);
 
@@ -223,10 +223,10 @@ public static CPInstruction parseSingleInstruction ( InstructionType cptype, Str
 
 			case Union:
 				return UnionCPInstruction.parseInstruction(str);
-			
+
 			case EINSUM:
 				return EinsumCPInstruction.parseInstruction(str);
-				
+
 			default:
 				throw new DMLRuntimeException("Invalid CP Instruction Type: " + cptype );
 		}
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/OOCInstructionParser.java b/src/main/java/org/apache/sysds/runtime/instructions/OOCInstructionParser.java
index f23ad6d67a6..8b64073111c 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/OOCInstructionParser.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/OOCInstructionParser.java
@@ -38,6 +38,8 @@
 import org.apache.sysds.runtime.instructions.ooc.MatrixVectorBinaryOOCInstruction;
 import org.apache.sysds.runtime.instructions.ooc.TransposeOOCInstruction;
 import org.apache.sysds.runtime.instructions.ooc.TeeOOCInstruction;
+import org.apache.sysds.runtime.instructions.ooc.OOCInstruction;
+import org.apache.sysds.runtime.instructions.ooc.ReblockOOCInstruction;
 
 public class OOCInstructionParser extends InstructionParser {
 	protected static final Log LOG = LogFactory.getLog(OOCInstructionParser.class.getName());
@@ -78,7 +80,7 @@ public static OOCInstruction parseSingleInstruction(InstructionType ooctype, Str
 			case Tee:
 				return TeeOOCInstruction.parseInstruction(str);
 			case CentralMoment:
-				return  CentralMomentOOCInstruction.parseInstruction(str);
+				return CentralMomentOOCInstruction.parseInstruction(str);
 			case Ctable:
 				return CtableOOCInstruction.parseInstruction(str);
 			case ParameterizedBuiltin:
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/ooc/AggregateUnaryOOCInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/ooc/AggregateUnaryOOCInstruction.java
index 2a53c5400ae..34df0f4d249 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/ooc/AggregateUnaryOOCInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/ooc/AggregateUnaryOOCInstruction.java
@@ -30,37 +30,27 @@
 import org.apache.sysds.runtime.instructions.cp.DoubleObject;
 import org.apache.sysds.runtime.instructions.spark.data.IndexedMatrixValue;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
-import org.apache.sysds.runtime.matrix.data.MatrixIndexes;
 import org.apache.sysds.runtime.matrix.data.OperationsOnMatrixValues;
 import org.apache.sysds.runtime.matrix.operators.AggregateOperator;
 import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator;
-import org.apache.sysds.runtime.matrix.operators.Operator;
-import org.apache.sysds.runtime.meta.DataCharacteristics;
 
-import java.util.HashMap;
 
 public class AggregateUnaryOOCInstruction extends ComputationOOCInstruction {
 	private AggregateOperator _aop = null;
 
-	protected AggregateUnaryOOCInstruction(OOCType type, AggregateUnaryOperator auop, AggregateOperator aop, 
+	protected AggregateUnaryOOCInstruction(OOCType type, AggregateUnaryOperator auop, AggregateOperator aop,
 			CPOperand in, CPOperand out, String opcode, String istr) {
 		super(type, auop, in, out, opcode, istr);
 		_aop = aop;
 	}
 
-	protected AggregateUnaryOOCInstruction(OOCType type, Operator op, CPOperand in1, CPOperand in2, CPOperand in3,
-		CPOperand out, String opcode, String istr) {
-		super(type, op, in1, in2, in3, out, opcode, istr);
-		_aop = null;
-	}
-
 	public static AggregateUnaryOOCInstruction parseInstruction(String str) {
 		String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
 		InstructionUtils.checkNumFields(parts, 2);
 		String opcode = parts[0];
 		CPOperand in1 = new CPOperand(parts[1]);
 		CPOperand out = new CPOperand(parts[2]);
-		
+
 		String aopcode = InstructionUtils.deriveAggregateOperatorOpcode(opcode);
 		CorrectionLocationType corrLoc = InstructionUtils.deriveAggregateOperatorCorrectionLocation(opcode);
 		AggregateUnaryOperator aggun = InstructionUtils.parseBasicAggregateUnaryOperator(opcode);
@@ -68,112 +58,37 @@ public static AggregateUnaryOOCInstruction parseInstruction(String str) {
 		return new AggregateUnaryOOCInstruction(
 			OOCType.AggregateUnary, aggun, aop, in1, out, opcode, str);
 	}
-	
+
 	@Override
 	public void processInstruction( ExecutionContext ec ) {
-		//TODO support all types of aggregations, currently only full aggregation, row aggregation and column aggregation
-		
+		//TODO support all types of aggregations, currently only full aggregation
+
 		//setup operators and input queue
-		AggregateUnaryOperator aggun = (AggregateUnaryOperator) getOperator(); 
+		AggregateUnaryOperator aggun = (AggregateUnaryOperator) getOperator();
 		MatrixObject min = ec.getMatrixObject(input1);
-		OOCStream<IndexedMatrixValue> q = min.getStreamHandle();
+		LocalTaskQueue<IndexedMatrixValue> q = min.getStreamHandle();
+		IndexedMatrixValue tmp = null;
 		int blen = ConfigurationManager.getBlocksize();
 
-		if (aggun.isRowAggregate() || aggun.isColAggregate()) {
-			DataCharacteristics chars = ec.getDataCharacteristics(input1.getName());
-			// number of blocks to process per aggregation idx (row or column dim)
-			long emitThreshold = aggun.isRowAggregate()? chars.getNumColBlocks() : chars.getNumRowBlocks();
-			OOCMatrixBlockTracker aggTracker = new OOCMatrixBlockTracker(emitThreshold);
-			HashMap<Long, MatrixBlock> corrs = new HashMap<>(); // correction blocks
-
-			OOCStream<IndexedMatrixValue> qOut = createWritableStream();
-			ec.getMatrixObject(output).setStreamHandle(qOut);
-
-			submitOOCTask(() -> {
-					IndexedMatrixValue tmp = null;
-					try {
-						while((tmp = q.dequeue()) != LocalTaskQueue.NO_MORE_TASKS) {
-							long idx  = aggun.isRowAggregate() ?
-								tmp.getIndexes().getRowIndex() : tmp.getIndexes().getColumnIndex();
-							MatrixBlock ret = aggTracker.get(idx);
-							if(ret != null) {
-								MatrixBlock corr = corrs.get(idx);
-
-								// aggregation
-								MatrixBlock ltmp = (MatrixBlock) ((MatrixBlock) tmp.getValue())
-									.aggregateUnaryOperations(aggun, new MatrixBlock(), blen, tmp.getIndexes());
-								OperationsOnMatrixValues.incrementalAggregation(ret,
-									_aop.existsCorrection() ? corr : null, ltmp, _aop, true);
-
-								if (!aggTracker.putAndIncrementCount(idx, ret)){
-									corrs.replace(idx, corr);
-									continue;
-								}
-							}
-							else {
-								// first block for this idx - init aggregate and correction
-								// TODO avoid corr block for inplace incremental aggregation
-								int rows = tmp.getValue().getNumRows();
-								int cols = tmp.getValue().getNumColumns();
-								int extra = _aop.correction.getNumRemovedRowsColumns();
-								ret = aggun.isRowAggregate()? new MatrixBlock(rows, 1 + extra, false) : new MatrixBlock(1 + extra, cols, false);
-								MatrixBlock corr = aggun.isRowAggregate()? new MatrixBlock(rows, 1 + extra, false) : new MatrixBlock(1 + extra, cols, false);
-
-								// aggregation
-								MatrixBlock ltmp = (MatrixBlock) ((MatrixBlock) tmp.getValue()).aggregateUnaryOperations(
-									aggun, new MatrixBlock(), blen, tmp.getIndexes());
-								OperationsOnMatrixValues.incrementalAggregation(ret,
-									_aop.existsCorrection() ? corr : null, ltmp, _aop, true);
-
-								if(emitThreshold > 1){
-									aggTracker.putAndIncrementCount(idx, ret);
-									corrs.put(idx, corr);
-									continue;
-								}
-							}
-
-							// all input blocks for this idx processed - emit aggregated block
-							ret.dropLastRowsOrColumns(_aop.correction);
-							MatrixIndexes midx = aggun.isRowAggregate() ?
-								new MatrixIndexes(tmp.getIndexes().getRowIndex(), 1) :
-								new MatrixIndexes(1, tmp.getIndexes().getColumnIndex());
-							IndexedMatrixValue tmpOut = new IndexedMatrixValue(midx, ret);
-
-							qOut.enqueue(tmpOut);
-							// drop intermediate states
-							aggTracker.remove(idx);
-							corrs.remove(idx);
-						}
-						qOut.closeInput();
-					}
-					catch(Exception ex) {
-						throw new DMLRuntimeException(ex);
-					}
-			}, q, qOut);
-		}
-		// full aggregation
-		else {
-			IndexedMatrixValue tmp = null;
-			//read blocks and aggregate immediately into result
-			int extra = _aop.correction.getNumRemovedRowsColumns();
-			MatrixBlock ret = new MatrixBlock(1,1+extra,false);
-			MatrixBlock corr = new MatrixBlock(1,1+extra,false);
-			try {
-				while((tmp = q.dequeue()) != LocalTaskQueue.NO_MORE_TASKS) {
-					//block aggregation
-					MatrixBlock ltmp = (MatrixBlock) ((MatrixBlock) tmp.getValue())
-						.aggregateUnaryOperations(aggun, new MatrixBlock(), blen, tmp.getIndexes());
-					//accumulation into final result
-					OperationsOnMatrixValues.incrementalAggregation(
-						ret, _aop.existsCorrection() ? corr : null, ltmp, _aop, true);
-				}
+		//read blocks and aggregate immediately into result
+		int extra = _aop.correction.getNumRemovedRowsColumns();
+		MatrixBlock ret = new MatrixBlock(1,1+extra,false);
+		MatrixBlock corr = new MatrixBlock(1,1+extra,false);
+		try {
+			while((tmp = q.dequeueTask()) != LocalTaskQueue.NO_MORE_TASKS) {
+				//block aggregation
+				MatrixBlock ltmp = (MatrixBlock) ((MatrixBlock) tmp.getValue())
+					.aggregateUnaryOperations(aggun, new MatrixBlock(), blen, tmp.getIndexes());
+				//accumulation into final result
+				OperationsOnMatrixValues.incrementalAggregation(
+					ret, _aop.existsCorrection() ? corr : null, ltmp, _aop, true);
 			}
-			catch(Exception ex) {
-				throw new DMLRuntimeException(ex);
-			}
-
-			//create scalar output
-			ec.setScalarOutput(output.getName(), new DoubleObject(ret.get(0, 0)));
 		}
+		catch(Exception ex) {
+			throw new DMLRuntimeException(ex);
+		}
+
+		//create scalar output
+		ec.setScalarOutput(output.getName(), new DoubleObject(ret.get(0, 0)));
 	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/ooc/ComputationOOCInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/ooc/ComputationOOCInstruction.java
index 4dcdffcb0dc..bc5a4d841b4 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/ooc/ComputationOOCInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/ooc/ComputationOOCInstruction.java
@@ -33,7 +33,7 @@ protected ComputationOOCInstruction(OOCType type, Operator op, CPOperand in1, CP
 		input3 = null;
 		output = out;
 	}
-	
+
 	protected ComputationOOCInstruction(OOCType type, Operator op, CPOperand in1, CPOperand in2, CPOperand out, String opcode, String istr) {
 		super(type, op, opcode, istr);
 		input1 = in1;
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/ooc/ReblockOOCInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/ooc/ReblockOOCInstruction.java
index 74b15c9fb0e..1f7fce3b146 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/ooc/ReblockOOCInstruction.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/ooc/ReblockOOCInstruction.java
@@ -41,7 +41,7 @@
 public class ReblockOOCInstruction extends ComputationOOCInstruction {
 	private int blen;
 
-	private ReblockOOCInstruction(Operator op, CPOperand in, CPOperand out, 
+	private ReblockOOCInstruction(Operator op, CPOperand in, CPOperand out,
 		int br, int bc, String opcode, String instr)
 	{
 		super(OOCType.Reblock, op, in, out, opcode, instr);
@@ -71,29 +71,29 @@ public void processInstruction(ExecutionContext ec) {
 
 		//get the source format from the meta data
 		//MetaDataFormat iimd = (MetaDataFormat) min.getMetaData();
-		//TODO support other formats than binary 
-		
+		//TODO support other formats than binary
+
 		//create queue, spawn thread for asynchronous reading, and return
 		OOCStream<IndexedMatrixValue> q = createWritableStream();
 		submitOOCTask(() -> readBinaryBlock(q, min.getFileName()), q);
-		
+
 		MatrixObject mout = ec.getMatrixObject(output);
 		mout.setStreamHandle(q);
 	}
-	
+
 	@SuppressWarnings("resource")
 	private void readBinaryBlock(OOCStream<IndexedMatrixValue> q, String fname) {
 		try {
 			//prepare file access
-			JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());	
-			Path path = new Path( fname ); 
+			JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
+			Path path = new Path( fname );
 			FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
-			
+
 			//check existence and non-empty file
-			MatrixReader.checkValidInputFile(fs, path); 
-			
+			MatrixReader.checkValidInputFile(fs, path);
+
 			//core reading
-			for( Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path) ) { //1..N files 
+			for( Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path) ) { //1..N files
 				//directly read from sequence files (individual partfiles)
 				try( SequenceFile.Reader reader = new SequenceFile
 					.Reader(job, SequenceFile.Reader.file(lpath)) )
diff --git a/src/test/java/org/apache/sysds/test/functions/ooc/SumScalarMultiplicationTest.java b/src/test/java/org/apache/sysds/test/functions/ooc/SumScalarMultiplicationTest.java
index f0d9228a533..2272588bab4 100644
--- a/src/test/java/org/apache/sysds/test/functions/ooc/SumScalarMultiplicationTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/ooc/SumScalarMultiplicationTest.java
@@ -23,7 +23,6 @@
 import org.apache.sysds.common.Types;
 import org.apache.sysds.common.Types.FileFormat;
 import org.apache.sysds.common.Types.ValueType;
-import org.apache.sysds.hops.OptimizerUtils;
 import org.apache.sysds.runtime.instructions.Instruction;
 import org.apache.sysds.runtime.io.MatrixWriter;
 import org.apache.sysds.runtime.io.MatrixWriterFactory;
@@ -58,26 +57,11 @@ public void setUp() {
 	 * Test the sum of scalar multiplication, "sum(X*7)", with OOC backend.
 	 */
 	@Test
-	public void testSumScalarMultNoRewrite() {
-		testSumScalarMult(false);
-	}
-	
-	/**
-	 * Test the sum of scalar multiplication, "sum(X)*7", with OOC backend.
-	 */
-	@Test
-	public void testSumScalarMultRewrite() {
-		testSumScalarMult(true);
-	}
-	
-	
-	public void testSumScalarMult(boolean rewrite)
-	{
+	public void testSumScalarMult() {
+
 		Types.ExecMode platformOld = rtplatform;
 		rtplatform = Types.ExecMode.SINGLE_NODE;
-		boolean oldRewrite = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
-		OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrite;
-		
+
 		try {
 			getAndLoadTestConfiguration(TEST_NAME);
 			String HOME = SCRIPT_DIR + TEST_DIR;
@@ -108,17 +92,16 @@ public void testSumScalarMult(boolean rewrite)
 			String prefix = Instruction.OOC_INST_PREFIX;
 			Assert.assertTrue("OOC wasn't used for RBLK",
 				heavyHittersContainsString(prefix + Opcodes.RBLK));
-			if(!rewrite)
-				Assert.assertTrue("OOC wasn't used for SUM",
-					heavyHittersContainsString(prefix + Opcodes.MULT));
 			Assert.assertTrue("OOC wasn't used for SUM",
 				heavyHittersContainsString(prefix + Opcodes.UAKP));
+			
+//			boolean usedOOCMult = Statistics.getCPHeavyHitterOpCodes().contains(prefix + Opcodes.MULT);
+//			Assert.assertTrue("OOC wasn't used for MULT", usedOOCMult);
 		}
 		catch(Exception ex) {
 			Assert.fail(ex.getMessage());
 		}
 		finally {
-			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewrite;
 			resetExecMode(platformOld);
 		}
 	}