Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ config.ini
*.txt
.*
!.gitignore
__pycache__/
3 changes: 3 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## 2024-05-24 - Speed up yEnc decoding in Python
**Learning:** Python's native `bytearray` iteration (`for byte in bytes:`) and index access is extremely slow compared to native C extensions. By utilizing `bytes.translate()` and `bytes.find()`, one can push the entire loop into C, even when the logic requires conditional parsing (like yEnc's `=`).
**Action:** When working with large byte payloads in pure Python (like NNTP downloads or parsing), always prefer vectorized operations like `.translate()`, `.split()`, `.find()` over pure python character iteration.
35 changes: 25 additions & 10 deletions verify_nzb.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,19 +115,34 @@ def _parse_yenc_attrs(line: bytes) -> dict[str, str]:
return attrs


_YENC_DECODE_TABLE = bytes((i - 42) % 256 for i in range(256))


def _decode_yenc_lines(lines: Iterable[bytes]) -> bytes:
# ⚡ Bolt: Using bytes.translate and bytes.find natively in C is significantly faster
# than iterating byte-by-byte in pure Python.
decoded = bytearray()
for line in lines:
index = 0
while index < len(line):
byte = line[index]
if byte == 61:
index += 1
if index >= len(line):
raise ValueError("dangling yEnc escape")
byte = (line[index] - 64) % 256
decoded.append((byte - 42) % 256)
index += 1
if b"=" not in line:
decoded.extend(line.translate(_YENC_DECODE_TABLE))
continue

idx = 0
length = len(line)
while idx < length:
next_idx = line.find(b"=", idx)
if next_idx == -1:
decoded.extend(line[idx:].translate(_YENC_DECODE_TABLE))
break

if next_idx > idx:
decoded.extend(line[idx:next_idx].translate(_YENC_DECODE_TABLE))

if next_idx + 1 >= length:
raise ValueError("dangling yEnc escape")

decoded.append((line[next_idx + 1] - 106) % 256)
idx = next_idx + 2
return bytes(decoded)


Expand Down