Skip to content

Commit b62c46d

Browse files
committed
Use PyMuPDF instead of PyPDF (WIP)
Currently it’s impossible to implement horizontal and vertical mirroring with MuPDF; see pymupdf/PyMuPDF#4822 One other test fails (pdfnup-recursive-links), but this appears to be a trivial (ignorable) difference in colour quantization. It’s also noteworthy that PyMuPDF is not yet packaged in Debian.
1 parent d15d4dc commit b62c46d

File tree

6 files changed

+65
-78
lines changed

6 files changed

+65
-78
lines changed

psutils/command/psjoin.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import warnings
1212
from typing import IO
1313

14-
from pypdf import PdfReader, PdfWriter
14+
from pymupdf import Document
1515

1616
from psutils.argparse import HelpFormatter, add_version_argument
1717
from psutils.io import setup_inputs_and_output
@@ -64,15 +64,15 @@ def join_pdf(
6464
args: argparse.Namespace, infiles: list[IO[bytes]], outfile: IO[bytes]
6565
) -> None:
6666
# Merge input files
67-
out_pdf = PdfWriter()
67+
out_pdf = Document()
6868
for file in infiles:
69-
in_pdf = PdfReader(file)
70-
out_pdf.append(in_pdf)
71-
if args.even and len(in_pdf.pages) % 2 == 1:
72-
out_pdf.add_blank_page()
69+
in_pdf = Document(stream=file)
70+
out_pdf.insert_pdf(in_pdf)
71+
if args.even and in_pdf.page_count % 2 == 1:
72+
out_pdf.new_page()
7373

7474
# Write output
75-
out_pdf.write(outfile)
75+
outfile.write(out_pdf.convert_to_pdf())
7676
sys.stdout.buffer.flush()
7777

7878

psutils/command/psnup.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def psnup(argv: list[str] = sys.argv[1:]) -> None:
208208
in_size = paper_size
209209
if size is None:
210210
die("output page size not set, and could not get default paper size")
211-
assert(in_size)
211+
assert in_size
212212

213213
# Take account of flip
214214
if args.flip:
@@ -281,9 +281,10 @@ def reduce_waste(
281281
)
282282

283283
# Page centring shifts
284-
hshift, vshift = (ppwid / horiz - in_size.width * scale) / 2, (
285-
pphgt / vert - in_size.height * scale
286-
) / 2
284+
hshift, vshift = (
285+
(ppwid / horiz - in_size.width * scale) / 2,
286+
(pphgt / vert - in_size.height * scale) / 2,
287+
)
287288

288289
# Construct specification list
289290
spec_list = []
@@ -303,12 +304,12 @@ def reduce_waste(
303304
xoff = args.margin + across * ppwid / horiz + hshift
304305
yoff = args.margin + up * pphgt / vert + vshift
305306
spec_list.append(
306-
f'{page}{"L" if rotate else ""}@{scale:f}({xoff:f},{yoff:f})'
307+
f"{page}{'L' if rotate else ''}@{scale:f}({xoff:f},{yoff:f})"
307308
)
308309

309310
# Rearrange pages
310311
specs, modulo, flipped = parsespecs(
311-
f'{args.nup}:{"+".join(spec_list)}', paper_context
312+
f"{args.nup}:{'+'.join(spec_list)}", paper_context
312313
)
313314
transform = document_transform(
314315
doc, outfile, size, orig_in_size, specs, args.draw, in_size_guessed

psutils/readers.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,20 @@
77
import re
88
from typing import IO
99

10-
from pypdf import PdfReader as PdfReaderBase
11-
from pypdf._utils import StrByteType
10+
from pymupdf import Document
1211

1312
from .types import Rectangle
1413
from .warnings import die
1514

1615

17-
class PdfReader(PdfReaderBase):
16+
class PdfReader(Document):
1817
def __init__(
1918
self,
20-
stream: StrByteType,
21-
strict: bool = False,
22-
password: str | bytes | None = None,
19+
stream: IO[bytes],
2320
) -> None:
24-
super().__init__(stream, strict, password)
25-
assert len(self.pages) > 0
26-
mediabox = self.pages[0].mediabox
21+
super().__init__(stream=stream)
22+
assert self.page_count > 0
23+
mediabox = self[0].mediabox
2724
self.size = Rectangle(mediabox.width, mediabox.height)
2825
self.size_guessed = False
2926

psutils/transformers.py

Lines changed: 41 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
Released under the GPL version 3, or (at your option) any later version.
55
"""
66

7-
import io
87
import shutil
98
import sys
109
from abc import ABC, abstractmethod
@@ -13,8 +12,7 @@
1312
from typing import IO
1413
from warnings import warn
1514

16-
from pypdf import PdfWriter, Transformation
17-
from pypdf.annotations import PolyLine
15+
from pymupdf import Document, Identity, Matrix
1816

1917
from .argparse import parserange
2018
from .io import setup_input_and_output
@@ -384,20 +382,20 @@ def __init__(
384382
super().__init__()
385383
self.outfile = outfile
386384
self.reader = reader
387-
self.writer = PdfWriter()
385+
self.writer = Document()
388386
self.draw = draw
389387
self.specs = specs
390388

391389
if in_size is None:
392390
in_size = reader.size
393391
if size is None:
394392
size = in_size
395-
393+
assert size is not None
396394
self.size = size
397395
self.in_size = in_size
398396

399397
def pages(self) -> int:
400-
return len(self.reader.pages)
398+
return self.reader.page_count
401399

402400
def write_header(self, maxpage: int, modulo: int) -> None:
403401
pass
@@ -423,78 +421,67 @@ def write_page(
423421
len(page_specs) == 1
424422
and not page_specs[0].has_transform()
425423
and page_number < page_list.num_pages()
426-
and 0 <= real_page < len(self.reader.pages)
424+
and 0 <= real_page < self.reader.page_count
427425
and self.draw == 0
428426
and self.size == self.in_size
429427
and (
430428
self.in_size.width is None
431429
or (
432-
self.in_size.width == self.reader.pages[real_page].mediabox.width
433-
and self.in_size.height
434-
== self.reader.pages[real_page].mediabox.height
430+
self.in_size.width == self.reader[real_page].mediabox.width
431+
and self.in_size.height == self.reader[real_page].mediabox.height
435432
)
436433
)
437434
):
438-
self.writer.add_page(self.reader.pages[real_page])
435+
self.writer.insert_pdf(self.reader, from_page=real_page, to_page=real_page)
439436
else:
440437
# Add a blank page of the correct size to the end of the document
441-
outpdf_page = self.writer.add_blank_page(self.size.width, self.size.height)
438+
outpdf_page = self.writer.new_page(-1, self.size.width, self.size.height)
442439
for spec in page_specs:
443440
page_number = page_index_to_page_number(spec, maxpage, modulo, pagebase)
444441
real_page = page_list.real_page(page_number)
445-
if page_number < page_list.num_pages() and 0 <= real_page < len(
446-
self.reader.pages
442+
if (
443+
page_number < page_list.num_pages()
444+
and 0 <= real_page < self.reader.page_count
447445
):
448446
# Calculate input page transformation
449-
t = Transformation()
447+
t = Matrix(Identity)
448+
mbox = self.reader[real_page].mediabox
450449
if spec.hflip:
451-
t = t.transform(
452-
Transformation((-1, 0, 0, 1, self.in_size.width, 0))
453-
)
450+
t.concat(t, Matrix(-1, 0, 0, 1, self.in_size.width, 0))
454451
elif spec.vflip:
455-
t = t.transform(
456-
Transformation((1, 0, 0, -1, 0, self.in_size.height))
457-
)
452+
t.concat(t, Matrix(1, 0, 0, -1, 0, self.in_size.height))
458453
if spec.rotate != 0:
459-
t = t.rotate(spec.rotate % 360)
454+
t.prerotate(spec.rotate % 360)
460455
if spec.scale != 1.0:
461-
t = t.scale(spec.scale, spec.scale)
456+
t.prescale(spec.scale, spec.scale)
462457
if spec.off != Offset(0.0, 0.0):
463-
t = t.translate(spec.off.x, spec.off.y)
458+
# (t.prerotate applies translation according to original axes)
459+
t.e += spec.off.x
460+
t.f += spec.off.y
461+
# Transform input page mediabox
462+
t.concat(t, outpdf_page.transformation_matrix)
463+
mbox.transform(t)
464464
# Merge input page into the output document
465-
outpdf_page.merge_transformed_page(self.reader.pages[real_page], t)
466-
if self.draw > 0: # FIXME: draw the line at the requested width
467-
mediabox = self.reader.pages[real_page].mediabox
468-
line = PolyLine(
469-
vertices=[
470-
(
471-
mediabox.left + spec.off.x,
472-
mediabox.bottom + spec.off.y,
473-
),
474-
(mediabox.left + spec.off.x, mediabox.top + spec.off.y),
475-
(
476-
mediabox.right + spec.off.x,
477-
mediabox.top + spec.off.y,
478-
),
479-
(
480-
mediabox.right + spec.off.x,
481-
mediabox.bottom + spec.off.y,
482-
),
483-
(
484-
mediabox.left + spec.off.x,
485-
mediabox.bottom + spec.off.y,
486-
),
487-
],
488-
)
489-
self.writer.add_annotation(outpdf_page, line)
465+
# FIXME: use Document.insert_pdf when outputting only
466+
# one page per page with only rotation. Otherwise, use
467+
# Document.bake() to get annotations & fields.
468+
outpdf_page.show_pdf_page(
469+
mbox, self.reader, real_page, rotate=spec.rotate % 360
470+
)
471+
if self.draw > 0:
472+
line = [
473+
(int(mbox.x0), int(mbox.y1)),
474+
(int(mbox.x0), int(mbox.y0)),
475+
(int(mbox.x1), int(mbox.y0)),
476+
(int(mbox.x1), int(mbox.y1)),
477+
(int(mbox.x0), int(mbox.y1)),
478+
]
479+
outpdf_page.draw_polyline(line, width=self.draw)
490480

491481
def finalize(self) -> None:
492-
# PyPDF seeks, so write to a buffer first in case outfile is stdout.
493-
buf = io.BytesIO()
494-
self.writer.write(buf)
495-
buf.seek(0)
496-
self.outfile.write(buf.read())
482+
self.outfile.write(self.writer.convert_to_pdf())
497483
self.outfile.flush()
484+
self.writer.close()
498485

499486

500487
def document_transform(

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ urls = {Homepage = "https://github.com/rrthomas/psutils"}
1111
requires-python = ">= 3.12"
1212
dependencies = [
1313
"puremagic >= 1.26",
14-
"pypdf >= 4.3.0",
14+
"pymupdf",
1515
]
1616
classifiers = [
1717
"Environment :: Console",
@@ -39,7 +39,7 @@ test = [
3939
requires = [
4040
"argparse-manpage[setuptools] >= 4.2",
4141
"puremagic >= 1.26",
42-
"pypdf >= 4.3.0",
42+
"pymupdf",
4343
]
4444

4545
[tool.build_manpages]

tox.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@ envlist = py312,py313
33

44
[testenv]
55
deps =
6+
Wand
67
argparse-manpage >= 4.2
78
pyright
9+
pytest
810
ruff
911
setuptools
1012
types-colorama

0 commit comments

Comments
 (0)