Skip to content

Commit 094bdfa

Browse files
committed
fix: suppress pdfminer warnings to prevent upload halting
- Added warning suppression for pdfminer warnings during Docling PDF processing - Suppresses 'Cannot set gray non-stroke color' warnings that cause uploads to halt - Temporarily sets pdfminer logger to ERROR level during document processing - Fixes issue where files ~34MB would fail due to pdfminer warning spam Resolves issue where PDF uploads would halt with repeated pdfminer warnings
1 parent 57fd82f commit 094bdfa

File tree

2 files changed

+40
-2
lines changed

2 files changed

+40
-2
lines changed

surfsense_backend/app/tasks/document_processors/file_processors.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -886,13 +886,40 @@ async def process_file_in_background(
886886
)
887887

888888
# Use Docling service for document processing
889+
import warnings
890+
889891
from app.services.docling_service import create_docling_service
890892

891893
# Create Docling service
892894
docling_service = create_docling_service()
893895

894-
# Process the document
895-
result = await docling_service.process_document(file_path, filename)
896+
# Suppress pdfminer warnings that can cause processing to hang
897+
# These warnings are harmless but can spam logs and potentially halt processing
898+
# Suppress both Python warnings and logging warnings from pdfminer
899+
pdfminer_logger = logging.getLogger("pdfminer")
900+
original_level = pdfminer_logger.level
901+
902+
with warnings.catch_warnings():
903+
warnings.filterwarnings(
904+
"ignore", category=UserWarning, module="pdfminer"
905+
)
906+
warnings.filterwarnings(
907+
"ignore",
908+
message=".*Cannot set gray non-stroke color.*",
909+
)
910+
warnings.filterwarnings(
911+
"ignore", message=".*invalid float value.*"
912+
)
913+
914+
# Temporarily suppress pdfminer logging warnings
915+
pdfminer_logger.setLevel(logging.ERROR)
916+
917+
try:
918+
# Process the document
919+
result = await docling_service.process_document(file_path, filename)
920+
finally:
921+
# Restore original logging level
922+
pdfminer_logger.setLevel(original_level)
896923

897924
# Clean up the temp file
898925
import os

surfsense_backend/pyproject.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
[build-system]
2+
requires = ["setuptools>=61.0", "wheel"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[tool.setuptools]
6+
packages = ["app"]
7+
include-package-data = false
8+
9+
[tool.setuptools.package-data]
10+
app = ["**/*"]
11+
112
[project]
213
name = "surf-new-backend"
314
version = "0.0.8"

0 commit comments

Comments
 (0)