evaleval · JoelNiklaus · Jun 8, 2026 · Jun 8, 2026
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@
 
 - 📋 **A metadata schema** ([`eval.schema.json`](eval.schema.json)) that defines the information needed for meaningful comparison of evaluation results, including [instance-level data](instance_level_eval.schema.json)
 - 🔧 **Validation** that checks data against the schema before it enters the repository
-- 🔌 **Converters** for [Inspect AI](every_eval_ever/converters/inspect/), [HELM](every_eval_ever/converters/helm/), and [lm-eval-harness](every_eval_ever/converters/lm_eval/), so you can transform your existing evaluation logs into the standard format
+- 🔌 **Converters** for [Inspect AI](every_eval_ever/converters/inspect/), [HELM](every_eval_ever/converters/helm/), [lm-eval-harness](every_eval_ever/converters/lm_eval/), [AlpacaEval](every_eval_ever/converters/alpaca_eval/), and [LEXam](every_eval_ever/converters/lexam/), so you can transform your existing evaluation logs into the standard format
 
 Install the package:
 
@@ -327,13 +327,15 @@ uv run python post_codegen.py
 
 ## 🔌 Eval Converters
 
-We have prepared converters to make adapting to our schema as easy as possible. At the moment, we support converting local evaluation harness logs from `Inspect AI`, `HELM` and `lm-evaluation-harness` into our unified schema. Each converter produces aggregate JSON and optionally instance-level JSONL output.
+We have prepared converters to make adapting to our schema as easy as possible. At the moment, we support converting local evaluation harness logs from `Inspect AI`, `HELM` and `lm-evaluation-harness`, plus public leaderboard scrapes from `AlpacaEval` and `LEXam`, into our unified schema. Each converter produces aggregate JSON and optionally instance-level JSONL output.
 
 | Framework | Command | Instance-Level JSONL |
 |---|---|---|
 | [Inspect AI](every_eval_ever/converters/inspect/) | `every_eval_ever convert inspect --log_path <path>` | Yes, if samples in log |
 | [HELM](every_eval_ever/converters/helm/) | `every_eval_ever convert helm --log_path <path>` | Always |
 | [lm-evaluation-harness](every_eval_ever/converters/lm_eval/) | `every_eval_ever convert lm_eval --log_path <path> --include_samples` | With `--include_samples` |
+| [AlpacaEval](every_eval_ever/converters/alpaca_eval/) | `every_eval_ever convert alpaca_eval --output_dir data` | No |
+| [LEXam](every_eval_ever/converters/lexam/) | `every_eval_ever convert lexam --output_dir data` | No |
 
 For full CLI usage and required input files, see the [Eval Converters README](every_eval_ever/converters/README.md).
 

diff --git a/every_eval_ever/cli.py b/every_eval_ever/cli.py
@@ -188,6 +188,40 @@ def _cmd_convert_helm(args: argparse.Namespace) -> int:
     return 0
 
 
+def _cmd_convert_lexam(args: argparse.Namespace) -> int:
+    from every_eval_ever.converters.lexam.adapter import LEXamAdapter
+
+    adapter = LEXamAdapter()
+    output_dir = Path(args.output_dir)
+    logs = adapter.fetch_leaderboard()
+
+    for log in logs:
+        if args.source_organization_name != 'unknown':
+            log.source_metadata.source_organization_name = (
+                args.source_organization_name
+            )
+        if args.source_organization_url is not None:
+            log.source_metadata.source_organization_url = (
+                args.source_organization_url
+            )
+        if args.evaluator_relationship != 'collaborative':
+            from every_eval_ever.eval_types import EvaluatorRelationship
+
+            log.source_metadata.evaluator_relationship = EvaluatorRelationship(
+                args.evaluator_relationship
+            )
+        if args.eval_library_name != 'lexam':
+            log.eval_library.name = args.eval_library_name
+        if args.eval_library_version != 'unknown':
+            log.eval_library.version = args.eval_library_version
+
+        out_file = _write_log(log, output_dir)
+        print(f'  {out_file}')
+
+    print(f'\nConverted {len(logs)} model evaluation(s).')
+    return 0
+
+
 def _cmd_convert_alpaca_eval(args: argparse.Namespace) -> int:
     from every_eval_ever.converters.alpaca_eval.adapter import (
         LEADERBOARDS,
@@ -215,6 +249,7 @@ def _cmd_convert_alpaca_eval(args: argparse.Namespace) -> int:
                 )
             if args.evaluator_relationship != 'third_party':
                 from every_eval_ever.eval_types import EvaluatorRelationship
+
                 log.source_metadata.evaluator_relationship = (
                     EvaluatorRelationship(args.evaluator_relationship)
                 )
@@ -298,7 +333,7 @@ def build_parser() -> argparse.ArgumentParser:
         dest='source', required=True
     )
 
-    for source in ['lm_eval', 'inspect', 'helm', 'alpaca_eval']:
+    for source in ['lm_eval', 'inspect', 'helm', 'alpaca_eval', 'lexam']:
         source_parser = convert_subparsers.add_parser(
             source,
             help=f'Convert {source} logs',
@@ -307,7 +342,7 @@ def build_parser() -> argparse.ArgumentParser:
         source_parser.add_argument(
             '--log_path',
             '--log-path',
-            required=(source != 'alpaca_eval'),
+            required=(source not in {'alpaca_eval', 'lexam'}),
             help='Path to source log file or directory to convert.',
         )
         source_parser.add_argument(
@@ -325,7 +360,7 @@ def build_parser() -> argparse.ArgumentParser:
         source_parser.add_argument(
             '--evaluator_relationship',
             '--evaluator-relationship',
-            default='third_party',
+            default=('collaborative' if source == 'lexam' else 'third_party'),
             choices=EVALUATOR_RELATIONSHIP_CHOICES,
             help='Relationship between evaluator and model developer.',
         )
@@ -421,6 +456,8 @@ def main(argv: list[str] | None = None) -> int:
             return _cmd_convert_helm(args)
         if args.source == 'alpaca_eval':
             return _cmd_convert_alpaca_eval(args)
+        if args.source == 'lexam':
+            return _cmd_convert_lexam(args)
 
     parser.print_help()
     return 1

diff --git a/every_eval_ever/converters/README.md b/every_eval_ever/converters/README.md
@@ -1,5 +1,5 @@
 ## Automatic Evaluation Log Converters
-A collection of scripts to convert evaluation logs from local evaluation frameworks (e.g., `Inspect AI` and `lm-eval-harness`) and public leaderboards (e.g., AlpacaEval) into the unified Every Eval Ever schema.
+A collection of scripts to convert evaluation logs from local evaluation frameworks (e.g., `Inspect AI` and `lm-eval-harness`) and public leaderboards (e.g., AlpacaEval and LEXam) into the unified Every Eval Ever schema.
 
 ### Installation
 
@@ -271,3 +271,36 @@ options:
   --version {v1,v2}            Which leaderboard to convert. Omit to convert both (default).
   --output_dir OUTPUT_DIR      Base output directory (default: data).
 ```
+
+## LEXam
+
+The LEXam converter fetches the public leaderboard HTML from the LEXam project
+website repository and converts all model entries into the unified schema.
+No local log files are required.
+
+Metrics converted per model:
+
+| Metric | Description |
+|---|---|
+| Open Question Judge Score | Mean expert-validated LLM-judge score on open-ended law exam questions (0-100) |
+| Multiple-Choice Accuracy | Accuracy across all LEXam MCQ configs (0-100) |
+
+### Usage
+
+```bash
+uv run every_eval_ever convert lexam --output_dir data
+```
+
+Full argument list:
+
+```
+usage: every_eval_ever convert lexam [-h] [--output_dir OUTPUT_DIR]
+                                     [--source_organization_name ...]
+                                     [--evaluator_relationship ...]
+                                     [--source_organization_url ...]
+                                     [--eval_library_name ...]
+                                     [--eval_library_version ...]
+
+options:
+  --output_dir OUTPUT_DIR      Base output directory (default: data).
+```
diff --git a/every_eval_ever/converters/lexam/__init__.py b/every_eval_ever/converters/lexam/__init__.py
diff --git a/every_eval_ever/converters/lexam/__main__.py b/every_eval_ever/converters/lexam/__main__.py
@@ -0,0 +1,56 @@
+"""CLI for converting LEXam leaderboard data to every_eval_ever format."""
+
+import argparse
+import json
+import sys
+import uuid
+from pathlib import Path
+
+from .adapter import LEXamAdapter
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description=(
+            'Fetch LEXam leaderboard data from GitHub and convert it '
+            'to Every Eval Ever schema JSON files.'
+        )
+    )
+    parser.add_argument(
+        '--output_dir',
+        default='data',
+        help='Base output directory (default: data).',
+    )
+    args = parser.parse_args()
+
+    adapter = LEXamAdapter()
+    output_dir = Path(args.output_dir)
+
+    try:
+        logs = adapter.fetch_leaderboard()
+    except Exception as exc:
+        print(f'ERROR: {exc}', file=sys.stderr)
+        raise SystemExit(1) from exc
+
+    for log in logs:
+        parts = log.model_info.id.split('/', 1)
+        developer = parts[0] if len(parts) == 2 else 'unknown'
+        model_name = parts[1] if len(parts) == 2 else log.model_info.id
+
+        out_dir = output_dir / 'lexam' / developer / model_name
+        out_dir.mkdir(parents=True, exist_ok=True)
+        out_file = out_dir / f'{uuid.uuid4()}.json'
+
+        with out_file.open('w', encoding='utf-8') as file:
+            json.dump(
+                log.model_dump(mode='json', exclude_none=True),
+                file,
+                indent=2,
+            )
+        print(out_file)
+
+    print(f'\nConverted {len(logs)} model evaluation(s).')
+
+
+if __name__ == '__main__':
+    main()