diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ffb7583e..28d63c83 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,7 +5,7 @@ # the repo. Unless a later match takes precedence, # @global-owner1 and @global-owner2 will be requested for # review when someone opens a pull request. -* @washingtonpost/news-engineering +* @washingtonpost/news-engineering @WashPost/data-reporting # Order is important; the last matching pattern takes the most # precedence. When someone opens a pull request that only diff --git a/.github/workflows/_python-wheels.yml b/.github/workflows/_python-wheels.yml index c55a17e0..f1bde58d 100644 --- a/.github/workflows/_python-wheels.yml +++ b/.github/workflows/_python-wheels.yml @@ -27,7 +27,7 @@ jobs: python-version: "3.11" - name: Install Zig - run: python -m pip install ziglang==0.11.0 wheel + run: python -m pip install ziglang==0.15.2 wheel - name: Build wheels run: python python/make_wheels.py ${{ matrix.architecture }}-${{ matrix.os }} diff --git a/.github/workflows/_release.yml b/.github/workflows/_release.yml index efff2e4a..dbeb8ee2 100644 --- a/.github/workflows/_release.yml +++ b/.github/workflows/_release.yml @@ -25,7 +25,7 @@ jobs: - uses: actions/checkout@v4 - uses: goto-bus-stop/setup-zig@v1 with: - version: 0.11.0 + version: 0.15.2 - name: Get output paths uses: kanga333/variable-mapper@master id: map @@ -36,21 +36,25 @@ jobs: "linux-gnu": { "libExt": "so", "libName": "libfastfec", + "libDir": "lib", "exeExt": "" }, "macos": { "libExt": "dylib", "libName": "libfastfec", + "libDir": "lib", "exeExt": "" }, "windows": { "libExt": "dll", "libName": "fastfec", + "libDir": "bin", "exeExt": ".exe" }, "wasm": { "libExt": "wasm", "libName": "fastfec", + "libDir": "bin", "exeExt": "" } } @@ -66,10 +70,10 @@ jobs: run: zip -j fastfec-${{ matrix.os }}-${{ matrix.architecture }}-${{ inputs.version }}.zip zig-out/bin/fastfec${{ steps.map.outputs.exeExt }} - name: Move output library if: matrix.os != 'wasm' - run: mv zig-out/lib/${{ steps.map.outputs.libName }}.${{ steps.map.outputs.libExt }} libfastfec-${{ matrix.os }}-${{ matrix.architecture }}-${{ inputs.version }}.${{ steps.map.outputs.libExt }} + run: mv zig-out/${{ steps.map.outputs.libDir }}/${{ steps.map.outputs.libName }}.${{ steps.map.outputs.libExt }} libfastfec-${{ matrix.os }}-${{ matrix.architecture }}-${{ inputs.version }}.${{ steps.map.outputs.libExt }} - name: Move output library (wasm) if: matrix.os == 'wasm' - run: mv zig-out/lib/${{ steps.map.outputs.libName }}.${{ steps.map.outputs.libExt }} libfastfec-${{ inputs.version }}.${{ steps.map.outputs.libExt }} + run: mv zig-out/${{ steps.map.outputs.libDir }}/${{ steps.map.outputs.libName }}.${{ steps.map.outputs.libExt }} libfastfec-${{ inputs.version }}.${{ steps.map.outputs.libExt }} - name: Upload artifacts uses: actions/upload-artifact@v4 if: matrix.os != 'wasm' diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4e8806c4..483160f2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,7 +8,7 @@ jobs: - uses: actions/checkout@v4 - uses: goto-bus-stop/setup-zig@v1 with: - version: 0.11.0 + version: 0.15.2 - name: Run zig test run: zig build test @@ -17,9 +17,12 @@ jobs: timeout-minutes: 5 strategy: matrix: - python-version: [3.8] + python-version: [3.11] steps: - uses: actions/checkout@v4 + - uses: goto-bus-stop/setup-zig@v1 + with: + version: 0.15.2 - name: Setup Python uses: actions/setup-python@v4 with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9d1ff3eb..dee4074e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,30 +1,29 @@ files: \.py$ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v5.0.0 hooks: - id: end-of-file-fixer - id: trailing-whitespace - repo: https://github.com/pycqa/isort - rev: 5.10.1 + rev: 5.13.2 hooks: - id: isort name: isort (python) - args: ["--profile", "black", --line-length=120] + args: ["--profile", "black", "--line-length=120"] # black - - repo: https://github.com/ambv/black - rev: 21.9b0 + - repo: https://github.com/psf/black + rev: 24.10.0 hooks: - id: black args: # arguments to configure black - --line-length=120 - language_version: python3.9 # flake8 - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 + - repo: https://github.com/PyCQA/flake8 + rev: 7.1.1 hooks: - id: flake8 args: # arguments to configure flake8 diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..d43adae6 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,105 @@ +# AGENTS.md + +## What this repo is + +FastFEC is a C parser for raw `.fec` filings. It has: + +- a CLI binary (`fastfec`) that writes parsed CSV files +- a shared C library (`libfastfec`) +- a Python wrapper package in `python/` that loads the shared library + +## Repo structure + +- `src/`: C source code for parser, CLI, writer, and bundled PCRE +- `build.zig`: Zig build script for CLI, shared lib, and C tests +- `scripts/`: mapping generation scripts (`generate_mappings.py`) +- `python/src/fastfec/`: Python wrapper (`ctypes` client + utils) +- `python/tests/`: Python tests + fixture `.fec` files + +## macOS setup + +1. Install Zig: + +```sh +brew install zig +``` + +2. Verify: + +```sh +zig version +``` + +## Build (C CLI + shared lib) + +From repo root: + +```sh +zig build +``` + +Outputs: + +- `zig-out/bin/fastfec` +- `zig-out/lib/libfastfec.dylib` + +## Run CLI + +Parse a local filing: + +```sh +./zig-out/bin/fastfec -x -s python/tests/fixtures/13360.fec /private/tmp/fastfec_out 13360 +``` + +Notes: + +- `-x` disables stdin and forces file input mode +- output files are written under `{output_dir}/{filing_id}/` + +## Run C tests + +```sh +zig build test +``` + +## Makefile shortcuts + +From repo root: + +```sh +make help +``` + +Main targets: + +- `make build`: build CLI + shared library +- `make c-test`: run C tests +- `make cli-smoke`: parse fixture filing and write CSV output to `/private/tmp/fastfec_make_smoke` +- `make py-setup`: create `python/.venv` +- `make py-install`: install Python dev dependencies +- `make py-test`: run Python tests (`tox -e py`) +- `make all`: run full verification (`build`, `c-test`, `cli-smoke`, `py-test`) + +## Python interface setup + +From repo root: + +```sh +python3 -m venv python/.venv +source python/.venv/bin/activate +cd python +pip install -r requirements-dev.txt +``` + +## Run Python tests + +```sh +source python/.venv/bin/activate +cd python +tox -e py +``` + +## Dependency notes + +- The Python package depends on `ziglang==0.15.2` for wheel/build workflows. +- Requires Zig >= 0.15.0 (build.zig uses the 0.15 `root_module` API). diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..d23b6468 --- /dev/null +++ b/Makefile @@ -0,0 +1,47 @@ +SHELL := /bin/zsh + +ZIG ?= zig + +VENV := python/.venv +FIXTURE := python/tests/fixtures/13360.fec +SMOKE_OUT := /private/tmp/fastfec_make_smoke + +.PHONY: help all check-zig build c-test cli-smoke py-setup py-install py-test + +help: + @echo "Targets:" + @echo " make build - Build CLI + shared lib" + @echo " make c-test - Run C tests" + @echo " make cli-smoke - Parse fixture with CLI" + @echo " make py-setup - Create python/.venv" + @echo " make py-install - Install python dev deps" + @echo " make py-test - Run Python tox tests" + @echo " make all - Run full verification" + +check-zig: + @command -v $(ZIG) >/dev/null 2>&1 || (echo "Zig not found. Install with: brew install zig" && exit 1) + @$(ZIG) version + +build: check-zig + $(ZIG) build + +c-test: check-zig + $(ZIG) build test + +cli-smoke: build + mkdir -p $(SMOKE_OUT) + ./zig-out/bin/fastfec -x -s $(FIXTURE) $(SMOKE_OUT) 13360 + @test -d $(SMOKE_OUT)/13360 + @echo "Smoke output files:" + @find $(SMOKE_OUT)/13360 -maxdepth 1 -type f | head -n 10 + +py-setup: + @test -d $(VENV) || python3 -m venv $(VENV) + +py-install: py-setup + source $(VENV)/bin/activate && cd python && pip install -r requirements-dev.txt + +py-test: py-install + source $(VENV)/bin/activate && cd python && tox -e py + +all: build c-test cli-smoke py-test diff --git a/build.zig b/build.zig index 20188daf..d1c8f30e 100644 --- a/build.zig +++ b/build.zig @@ -1,10 +1,11 @@ const std = @import("std"); const builtin = @import("builtin"); -const CrossTarget = std.zig.CrossTarget; -pub fn linkPcre(vendored_pcre: bool, libExe: *std.build.LibExeObjStep) void { +pub fn linkPcre(vendored_pcre: bool, libExe: *std.Build.Step.Compile) void { if (vendored_pcre) { - libExe.addCSourceFiles(&pcreSources, &buildOptions); + libExe.addCSourceFiles(.{ .files = &pcreSources, .flags = &buildOptions }); + // When vendoring PCRE, define PCRE_STATIC to avoid dllimport on Windows + libExe.root_module.addCMacro("PCRE_STATIC", "1"); } else { if (builtin.os.tag == .windows) { libExe.linkSystemLibrary("pcre"); @@ -12,13 +13,20 @@ pub fn linkPcre(vendored_pcre: bool, libExe: *std.build.LibExeObjStep) void { libExe.linkSystemLibrary("libpcre"); } } - if (libExe.target.isDarwin()) { + if (libExe.rootModuleTarget().os.tag.isDarwin()) { // useful for package maintainers // see https://github.com/ziglang/zig/issues/13388 libExe.headerpad_max_install_names = true; } } +fn createModule(b: *std.Build, target: std.Build.ResolvedTarget, optimize: std.builtin.OptimizeMode) *std.Build.Module { + return b.createModule(.{ + .target = target, + .optimize = optimize, + }); +} + pub fn build(b: *std.Build) !void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{ @@ -34,68 +42,75 @@ pub fn build(b: *std.Build) !void { if (!lib_only and !wasm) { const fastfec_cli = b.addExecutable(.{ .name = "fastfec", - .target = target, - .optimize = optimize, + .root_module = createModule(b, target, optimize), }); fastfec_cli.linkLibC(); - fastfec_cli.addCSourceFiles(&libSources, &buildOptions); + fastfec_cli.addCSourceFiles(.{ .files = &libSources, .flags = &buildOptions }); linkPcre(vendored_pcre, fastfec_cli); - fastfec_cli.addCSourceFiles(&.{ - "src/cli.c", - "src/main.c", - }, &buildOptions); + fastfec_cli.addCSourceFiles(.{ + .files = &.{ + "src/cli.c", + "src/main.c", + }, + .flags = &buildOptions, + }); b.installArtifact(fastfec_cli); } if (!wasm and !skip_lib) { // Library build step - const fastfec_lib = b.addSharedLibrary(.{ + const fastfec_lib = b.addLibrary(.{ .name = "fastfec", - .target = target, - .optimize = optimize, - .version = null, + .linkage = .dynamic, + .root_module = createModule(b, target, optimize), }); - if (fastfec_lib.target.isDarwin()) { + if (fastfec_lib.rootModuleTarget().os.tag.isDarwin()) { // useful for package maintainers // see https://github.com/ziglang/zig/issues/13388 fastfec_lib.headerpad_max_install_names = true; } fastfec_lib.linkLibC(); - fastfec_lib.addCSourceFiles(&libSources, &buildOptions); + fastfec_lib.addCSourceFiles(.{ .files = &libSources, .flags = &buildOptions }); linkPcre(vendored_pcre, fastfec_lib); b.installArtifact(fastfec_lib); } else if (wasm) { // Wasm library build step - const wasm_target = CrossTarget{ .cpu_arch = .wasm32, .os_tag = .freestanding }; - const fastfec_wasm = b.addSharedLibrary(.{ - .name = "fastfec", - .target = wasm_target, + const wasm_target: std.Target.Query = .{ .cpu_arch = .wasm32, .os_tag = .wasi }; + const wasm_module = b.createModule(.{ + .target = b.resolveTargetQuery(wasm_target), .optimize = optimize, - .version = null, + .link_libc = true, + }); + const fastfec_wasm = b.addExecutable(.{ + .name = "fastfec", + .root_module = wasm_module, }); - fastfec_wasm.linkLibC(); - fastfec_wasm.addCSourceFiles(&libSources, &buildOptions); + fastfec_wasm.rdynamic = true; + fastfec_wasm.entry = .disabled; + fastfec_wasm.import_symbols = true; + fastfec_wasm.addCSourceFiles(.{ .files = &libSources, .flags = &buildOptions }); linkPcre(vendored_pcre, fastfec_wasm); fastfec_wasm.addCSourceFile(.{ .file = .{ - .path = "src/wasm.c", + .cwd_relative = "src/wasm.c", }, .flags = &buildOptions }); b.installArtifact(fastfec_wasm); } // Test step - var prev_test_step: ?*std.build.Step = null; + var prev_test_step: ?*std.Build.Step = null; for (tests) |test_file| { const base_file = std.fs.path.basename(test_file); const subtest_exe = b.addExecutable(.{ .name = base_file, + .root_module = createModule(b, target, optimize), }); subtest_exe.linkLibC(); - subtest_exe.addCSourceFiles(&testIncludes, &buildOptions); + subtest_exe.addCSourceFiles(.{ .files = &testIncludes, .flags = &buildOptions }); linkPcre(vendored_pcre, subtest_exe); subtest_exe.addCSourceFile(.{ - .file = .{ .path = test_file }, + .file = .{ .cwd_relative = test_file }, .flags = &buildOptions, }); const subtest_cmd = b.addRunArtifact(subtest_exe); diff --git a/python/make_wheels.py b/python/make_wheels.py index 8e87b45a..2d4072cd 100644 --- a/python/make_wheels.py +++ b/python/make_wheels.py @@ -14,7 +14,6 @@ import shutil import subprocess import sys -import time from email.message import EmailMessage from glob import glob from zipfile import ZIP_DEFLATED, ZipInfo @@ -39,6 +38,7 @@ PARENT_DIR = os.path.dirname(CURRENT_DIR) SRC_DIR = os.path.join(CURRENT_DIR, "src", "fastfec") LIBRARY_DIR = os.path.join(PARENT_DIR, "zig-out", "lib") +BIN_DIR = os.path.join(PARENT_DIR, "zig-out", "bin") OUTPUT_DIR = "wheelhouse" PACKAGE_NAME = "fastfec" with open(os.path.join(PARENT_DIR, "README.md"), "r") as f: @@ -107,9 +107,7 @@ def write_wheel(out_dir, *, name, version, tag, metadata, description, contents) for path in glob(os.path.join(SRC_DIR, "*.py"), recursive=True): with open(path, "rb") as f: file_contents = f.read() - base_contents[os.path.join(PACKAGE_NAME, os.path.relpath(path, SRC_DIR))] = ( - file_contents - ) + base_contents[os.path.join(PACKAGE_NAME, os.path.relpath(path, SRC_DIR))] = file_contents current_platform = platform.system() for target_platform, zig_target, wheel_platform in matrix: @@ -118,12 +116,11 @@ def write_wheel(out_dir, *, name, version, tag, metadata, description, contents) # First clear the target directory of any stray files if os.path.exists(LIBRARY_DIR): shutil.rmtree(LIBRARY_DIR) - # Compile! Requires ziglang==0.11.0 to be installed - subprocess.call( - [ - sys.executable, - "-m", - "ziglang", + # Compile! Requires zig >= 0.15.2 (system or via ziglang pip package) + zig_cmd = ["zig"] if shutil.which("zig") else [sys.executable, "-m", "ziglang"] + subprocess.check_call( + zig_cmd + + [ "build", "-Dlib-only=true", f"-Dtarget={zig_target}", @@ -131,17 +128,17 @@ def write_wheel(out_dir, *, name, version, tag, metadata, description, contents) cwd=PARENT_DIR, ) # Collect compiled library files (extension .dylib|.so|.dll) + # Zig 0.15 places .dylib/.so in zig-out/lib/ but .dll in zig-out/bin/ library_files = ( glob(os.path.join(LIBRARY_DIR, "*.dylib")) + glob(os.path.join(LIBRARY_DIR, "*.so")) + glob(os.path.join(LIBRARY_DIR, "*.dll")) + + glob(os.path.join(BIN_DIR, "*.dll")) ) # Write the library file to the archive contents for library_file in library_files: with open(library_file, "rb") as f: - contents[ - os.path.join(PACKAGE_NAME, os.path.relpath(library_file, LIBRARY_DIR)) - ] = f.read() + contents[os.path.join(PACKAGE_NAME, os.path.basename(library_file))] = f.read() # Create output directory if needed if not os.path.exists(OUTPUT_DIR): diff --git a/python/pyproject.toml b/python/pyproject.toml index bc31a70b..505529c2 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,6 +1,2 @@ [build-system] -requires = [ - "setuptools", - "wheel", - "ziglang==0.11.0" -] +requires = ["setuptools", "wheel", "ziglang==0.15.2"] diff --git a/python/requirements-dev.txt b/python/requirements-dev.txt index c5c546a3..b9d6f4dc 100644 --- a/python/requirements-dev.txt +++ b/python/requirements-dev.txt @@ -7,5 +7,5 @@ pytest-xdist pytest-mock black isort -ziglang==0.11.0 +ziglang==0.15.2 -e . \ No newline at end of file diff --git a/python/setup.py b/python/setup.py index 36371fe4..f8896673 100644 --- a/python/setup.py +++ b/python/setup.py @@ -20,9 +20,7 @@ def compile_library(): - subprocess.call( - [sys.executable, "-m", "ziglang", "build", "-Dlib-only=true"], cwd=PARENT_DIR - ) + subprocess.check_call([sys.executable, "-m", "ziglang", "build", "-Dlib-only=true"], cwd=PARENT_DIR) compile_library() @@ -35,8 +33,7 @@ def compile_library(): ) # Copy them into the Python project src directory and get their relative paths library_files = [ - os.path.basename(shutil.copy(library_file, os.path.join(CURRENT_DIR, "src"))) - for library_file in raw_library_files + os.path.basename(shutil.copy(library_file, os.path.join(CURRENT_DIR, "src"))) for library_file in raw_library_files ] # Force building a non-pure lib wheel diff --git a/python/src/fastfec/utils.py b/python/src/fastfec/utils.py index 5f32242d..fca03706 100644 --- a/python/src/fastfec/utils.py +++ b/python/src/fastfec/utils.py @@ -11,16 +11,7 @@ import logging import os import pathlib -from ctypes import ( - CFUNCTYPE, - POINTER, - c_char, - c_char_p, - c_int, - c_size_t, - c_void_p, - memmove, -) +from ctypes import CFUNCTYPE, POINTER, c_char, c_char_p, c_int, c_size_t, c_void_p, memmove from glob import glob logger = logging.getLogger("fastfec") @@ -225,9 +216,7 @@ def write_callback(filename, extension, contents, num_bytes): file_descriptor = write_cache.file_descriptors.get(path) if not file_descriptor: # Open the file - write_cache.file_descriptors[path] = open_function( - path.decode("utf8"), mode="wb" - ) + write_cache.file_descriptors[path] = open_function(path.decode("utf8"), mode="wb") file_descriptor = write_cache.file_descriptors[path] write_cache.last_filename = filename write_cache.last_fd = file_descriptor diff --git a/python/tests/conftest.py b/python/tests/conftest.py index 82db08f3..f014bc8b 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -38,6 +38,7 @@ def filing_1550548(get_fixture): """ return get_fixture("1550548.fec") + @pytest.fixture def filing_1606847(get_fixture): """ @@ -45,9 +46,10 @@ def filing_1606847(get_fixture): """ return get_fixture("1606847.fec") + @pytest.fixture def filing_invalid_version(get_fixture): """ Returns the file path for filing_invalid_version.fec """ - return get_fixture("filing_invalid_version.fec") \ No newline at end of file + return get_fixture("filing_invalid_version.fec") diff --git a/python/tests/test_client.py b/python/tests/test_client.py index ef3b4331..d08f4d00 100644 --- a/python/tests/test_client.py +++ b/python/tests/test_client.py @@ -1,6 +1,5 @@ import datetime import os -import pytest from fastfec import FastFEC diff --git a/scripts/generate_mappings.py b/scripts/generate_mappings.py index 1ae60032..71e90c3e 100644 --- a/scripts/generate_mappings.py +++ b/scripts/generate_mappings.py @@ -78,9 +78,7 @@ def with_comment(comment, text): headers = [] for form_type in mappings_json: for version in mappings_json[form_type]: - headers.append( - [version, form_type, list_to_csv(mappings_json[form_type][version])] - ) + headers.append([version, form_type, list_to_csv(mappings_json[form_type][version])]) header_table = generate_c_array("headers", 3, headers) types = [] @@ -93,9 +91,7 @@ def with_comment(comment, text): raise ValueError("Expect to get a type") if type_value == "date": if type_mapping.get("format") != "%Y%m%d": - raise ValueError( - f'Unexpected date format: {type_mapping.get("format")}' - ) + raise ValueError(f'Unexpected date format: {type_mapping.get("format")}') elif type_value != "float": raise ValueError(f"Unexpected type: {type_value}") @@ -130,9 +126,7 @@ def with_comment(comment, text): "r", encoding="utf8", ) as f: - assert ( - result == f.read() - ), "Mappings outdated: Update by running `python scripts/generate_mappings.py`" + assert result == f.read(), "Mappings outdated: Update by running `python scripts/generate_mappings.py`" print("Mappings are up-to-date") else: with open(