Skip to content

Commit e7b44f1

Browse files
committed
Created nemotron-parse reader, with tests
1 parent 3e2631b commit e7b44f1

9 files changed

+1615
-1
lines changed

packages/paper-qa-nemotron/src/paperqa_nemotron/reader.py

Lines changed: 496 additions & 0 deletions
Large diffs are not rendered by default.

packages/paper-qa-nemotron/tests/cassettes/TestNemotronAPI.test_detection_only[0].yaml

Lines changed: 99 additions & 0 deletions
Large diffs are not rendered by default.

packages/paper-qa-nemotron/tests/cassettes/TestNemotronAPI.test_detection_only[1].yaml

Lines changed: 93 additions & 0 deletions
Large diffs are not rendered by default.

packages/paper-qa-nemotron/tests/cassettes/TestNemotronAPI.test_markdown_bbox[0].yaml

Lines changed: 141 additions & 0 deletions
Large diffs are not rendered by default.

packages/paper-qa-nemotron/tests/cassettes/TestNemotronAPI.test_markdown_bbox[1].yaml

Lines changed: 126 additions & 0 deletions
Large diffs are not rendered by default.

packages/paper-qa-nemotron/tests/cassettes/TestNemotronAPI.test_markdown_no_bbox[0].yaml

Lines changed: 111 additions & 0 deletions
Large diffs are not rendered by default.

packages/paper-qa-nemotron/tests/cassettes/TestNemotronAPI.test_markdown_no_bbox[1].yaml

Lines changed: 111 additions & 0 deletions
Large diffs are not rendered by default.

packages/paper-qa-nemotron/tests/test_paperqa_nemotron.py

Lines changed: 431 additions & 0 deletions
Large diffs are not rendered by default.

tests/test_paperqa.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from lmi.llms import rate_limited
4141
from lmi.utils import VCR_DEFAULT_MATCH_ON, validate_image
4242
from paperqa_docling import parse_pdf_to_pages as docling_parse_pdf_to_pages
43+
from paperqa_nemotron import parse_pdf_to_pages as nemotron_parse_pdf_to_pages
4344
from paperqa_pymupdf import parse_pdf_to_pages as pymupdf_parse_pdf_to_pages
4445
from paperqa_pypdf import parse_pdf_to_pages as pypdf_parse_pdf_to_pages
4546
from pydantic import ValidationError
@@ -2181,7 +2182,12 @@ async def test_images_corrupt(stub_data_dir: Path, caplog) -> None:
21812182

21822183
@pytest.mark.vcr(before_record_request=record_non_llm_requests)
21832184
@pytest.mark.parametrize(
2184-
"parser", [pymupdf_parse_pdf_to_pages, docling_parse_pdf_to_pages]
2185+
"parser",
2186+
[
2187+
pymupdf_parse_pdf_to_pages,
2188+
docling_parse_pdf_to_pages,
2189+
nemotron_parse_pdf_to_pages,
2190+
],
21852191
)
21862192
@pytest.mark.asyncio
21872193
async def test_equations(stub_data_dir: Path, parser: PDFParserFn) -> None:

0 commit comments

Comments
 (0)