brave · mschfh · Nov 8, 2025 · Nov 8, 2025 · Nov 8, 2025 · Nov 18, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,7 @@ description = "Tool to download and mirror adblock lists"
 requires-python = ">=3.13"
 dependencies = [
     "aiohttp==3.12.15",
-    "requests==2.32.5",
+    "aiohttp-retry==2.9.1",
     "sentry-sdk[aiohttp]==2.35.2",
 ]
 [tool.uv]

diff --git a/update-lists.py b/update-lists.py
@@ -12,8 +12,8 @@
 from pathlib import Path
 
 import aiohttp
-import requests
 import sentry_sdk
+from aiohttp_retry import ExponentialRetry, RetryClient
 
 logger = logging.getLogger("update_lists")
 logging.basicConfig(
@@ -24,6 +24,8 @@
     ),
 )
 
+MAX_FILE_SIZE = 100 * 1024 * 1024  # 100 MB
+
 sentry_sdk.init(enable_tracing=False)
 
 
@@ -56,7 +58,7 @@ def validate_checksum(filename):
     )
     match = checksum_pattern.search(data)
     if not match:
-        logger.warn(f"Couldn't find a checksum in {filename}")
+        logger.warning(f"Couldn't find a checksum in {filename}")
         return
 
     checksum = match.group(1)
@@ -106,39 +108,72 @@ def move_downloaded_file(filename, url, output_dir):
         logger.info(f"moving {filename} to {output_file_path}")
         shutil.move(filename, output_file_path)
     except Exception:
-        logger.exception(f"An exception happened while processing {filename}")
+        logger.exception(
+            f"An exception happened while processing {filename} from {url}"
+        )
         Path(filename).unlink()
 
     return output_file_path
 
 
-async def fetch_and_save_url(url, output_dir):
-    async with aiohttp.ClientSession() as session:
-        try:
-            async with session.get(url, raise_for_status=True) as response:
-                # Check if the response is successful
-                if response.status == 200:
-                    # Create a temporary file
-                    temp_file = tempfile.NamedTemporaryFile(delete=False)
-
+async def fetch_and_save_url(session, url, output_dir):
+    """Fetch a URL and save it to a file, with retries for transient errors."""
+    try:
+        async with session.get(url, raise_for_status=True, timeout=60) as response:
+            # Check if the response is successful
+            if response.status == 200:
+                # Create a temporary file
+                with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                    file_size = 0
                     # Write the response content to the temporary file
                     while True:
                         chunk = await response.content.read(1024)
                         if not chunk:
                             break
+
+                        file_size += len(chunk)
+                        if file_size > MAX_FILE_SIZE:
+                            logger.error(
+                                "Download failed for %s: file size (%d bytes) "
+                                "exceeds the limit of %d bytes.",
+                                url,
+                                file_size,
+                                MAX_FILE_SIZE,
+                            )
+                            Path(temp_file.name).unlink()
+                            return
+
                         temp_file.write(chunk)
-                    temp_file.close()
                     logger.info(f"downloaded {url}")
-
+                    temp_file.close()
                     move_downloaded_file(temp_file.name, url, output_dir)
-        except aiohttp.ClientError:
-            logging.exception(f"An exception happened while processing {url}")
+                return  # Success
+            else:
+                logger.error(
+                    "Request to %s returned status %s, expected 200. Skipping.",
+                    url,
+                    response.status,
+                )
+                return
+    except aiohttp.ClientError:
+        logging.exception(f"An exception occurred while processing {url}")
+        return
 
 
 async def main():
     args = parse_arguments()
 
-    adblock_catalog = requests.get(args.adblock_catalog, timeout=60).json()
+    # Fetch the adblock catalog using aiohttp
+    retry_options = ExponentialRetry(attempts=3)
+    connector = aiohttp.TCPConnector(limit_per_host=3)
+
+    async with RetryClient(
+        connector=connector, retry_options=retry_options, connector_owner=False
+    ) as session:
+        async with session.get(
+            args.adblock_catalog, timeout=60, raise_for_status=True
+        ) as response:
+            adblock_catalog = await response.json(content_type="text/plain")
 
     adblock_lists = []
     metadata = {}
@@ -151,9 +186,11 @@ async def main():
     metadata_file = Path(args.output_dir) / "metadata.json"
     metadata_file.write_text(json.dumps(metadata, indent=4) + "\n")
 
-    return await asyncio.gather(
-        *[fetch_and_save_url(url, args.output_dir) for url in adblock_lists]
-    )
+    async with RetryClient(connector=connector, retry_options=retry_options) as session:
+        tasks = [
+            fetch_and_save_url(session, url, args.output_dir) for url in adblock_lists
+        ]
+        await asyncio.gather(*tasks)
 
 
 if __name__ == "__main__":

diff --git a/uv.lock b/uv.lock