From 094bdfad450431c1b7b09967ce52598c045ed48a Mon Sep 17 00:00:00 2001 From: Chirag Date: Sat, 1 Nov 2025 17:42:23 +0530 Subject: [PATCH 1/3] fix: suppress pdfminer warnings to prevent upload halting - Added warning suppression for pdfminer warnings during Docling PDF processing - Suppresses 'Cannot set gray non-stroke color' warnings that cause uploads to halt - Temporarily sets pdfminer logger to ERROR level during document processing - Fixes issue where files ~34MB would fail due to pdfminer warning spam Resolves issue where PDF uploads would halt with repeated pdfminer warnings --- .../document_processors/file_processors.py | 31 +++++++++++++++++-- surfsense_backend/pyproject.toml | 11 +++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index 1858a5b9..e762607c 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -886,13 +886,40 @@ async def process_file_in_background( ) # Use Docling service for document processing + import warnings + from app.services.docling_service import create_docling_service # Create Docling service docling_service = create_docling_service() - # Process the document - result = await docling_service.process_document(file_path, filename) + # Suppress pdfminer warnings that can cause processing to hang + # These warnings are harmless but can spam logs and potentially halt processing + # Suppress both Python warnings and logging warnings from pdfminer + pdfminer_logger = logging.getLogger("pdfminer") + original_level = pdfminer_logger.level + + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", category=UserWarning, module="pdfminer" + ) + warnings.filterwarnings( + "ignore", + message=".*Cannot set gray non-stroke color.*", + ) + warnings.filterwarnings( + "ignore", message=".*invalid float value.*" + ) + + # Temporarily suppress pdfminer logging warnings + pdfminer_logger.setLevel(logging.ERROR) + + try: + # Process the document + result = await docling_service.process_document(file_path, filename) + finally: + # Restore original logging level + pdfminer_logger.setLevel(original_level) # Clean up the temp file import os diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index 58511a10..1b3c1866 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -1,3 +1,14 @@ +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +packages = ["app"] +include-package-data = false + +[tool.setuptools.package-data] +app = ["**/*"] + [project] name = "surf-new-backend" version = "0.0.8" From b3026e44129de4cb8ef1aa7587330c3935e822bf Mon Sep 17 00:00:00 2001 From: Chirag Date: Sun, 2 Nov 2025 12:03:03 +0530 Subject: [PATCH 2/3] fix: resolve ruff F823 error by importing getLogger and ERROR directly --- .../app/tasks/document_processors/file_processors.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/surfsense_backend/app/tasks/document_processors/file_processors.py b/surfsense_backend/app/tasks/document_processors/file_processors.py index e762607c..3618969e 100644 --- a/surfsense_backend/app/tasks/document_processors/file_processors.py +++ b/surfsense_backend/app/tasks/document_processors/file_processors.py @@ -4,6 +4,8 @@ import contextlib import logging +import warnings +from logging import ERROR, getLogger from fastapi import HTTPException from langchain_core.documents import Document as LangChainDocument @@ -886,8 +888,6 @@ async def process_file_in_background( ) # Use Docling service for document processing - import warnings - from app.services.docling_service import create_docling_service # Create Docling service @@ -896,7 +896,7 @@ async def process_file_in_background( # Suppress pdfminer warnings that can cause processing to hang # These warnings are harmless but can spam logs and potentially halt processing # Suppress both Python warnings and logging warnings from pdfminer - pdfminer_logger = logging.getLogger("pdfminer") + pdfminer_logger = getLogger("pdfminer") original_level = pdfminer_logger.level with warnings.catch_warnings(): @@ -912,7 +912,7 @@ async def process_file_in_background( ) # Temporarily suppress pdfminer logging warnings - pdfminer_logger.setLevel(logging.ERROR) + pdfminer_logger.setLevel(ERROR) try: # Process the document From 2efce16f5f59407b998e56d4f4459623cbdecdfa Mon Sep 17 00:00:00 2001 From: Chirag Date: Tue, 4 Nov 2025 09:26:52 +0530 Subject: [PATCH 3/3] revert: remove setuptools configuration from pyproject.toml --- surfsense_backend/pyproject.toml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml index 1b3c1866..58511a10 100644 --- a/surfsense_backend/pyproject.toml +++ b/surfsense_backend/pyproject.toml @@ -1,14 +1,3 @@ -[build-system] -requires = ["setuptools>=61.0", "wheel"] -build-backend = "setuptools.build_meta" - -[tool.setuptools] -packages = ["app"] -include-package-data = false - -[tool.setuptools.package-data] -app = ["**/*"] - [project] name = "surf-new-backend" version = "0.0.8"