@@ -495,7 +495,7 @@ def import_ia_urls(urls, *, from_date=None, to_date=None,
495495 version_filter ,
496496 # Use a custom session to make sure CDX calls are extra robust.
497497 client = wayback .WaybackClient (wayback .WaybackSession (user_agent = USER_AGENT ,
498- retries = 10 ,
498+ retries = 4 ,
499499 backoff = 4 )),
500500 stop = stop_event )))
501501 cdx_thread .start ()
@@ -561,6 +561,7 @@ def _list_ia_versions_for_urls(url_patterns, from_date, to_date,
561561
562562 with client or wayback .WaybackClient (wayback .WaybackSession (user_agent = USER_AGENT )) as client :
563563 for url in url_patterns :
564+ should_retry = True
564565 if stop and stop .is_set ():
565566 break
566567
@@ -579,10 +580,24 @@ def _list_ia_versions_for_urls(url_patterns, from_date, to_date,
579580 logger .warn (f'CDX search error: { error !r} ' )
580581 except WaybackException as error :
581582 logger .error (f'Error getting CDX data for { url } : { error !r} ' )
582- except Exception :
583- # Need to handle the exception here to let iteration continue
584- # and allow other threads that might be running to be joined.
585- logger .exception (f'Error processing versions of { url } ' )
583+ except Exception as error :
584+ # On connection failures, reset the session and try again. If
585+ # we don't do this, the connection pool for this thread is
586+ # pretty much dead. It's not clear to me whether there is a
587+ # problem in urllib3 or Wayback's servers that requires this.
588+ # This unfortunately requires string checking because the error
589+ # can get wrapped up into multiple kinds of higher-level
590+ # errors :(
591+ # TODO: unify this with similar code in WaybackRecordsWorker or
592+ # push it down into the `wayback` package.
593+ if should_retry and ('failed to establish a new connection' in str (error ).lower ()):
594+ client .session .reset ()
595+ should_retry = False
596+ else :
597+ # Need to handle the exception here to let iteration
598+ # continue and allow other threads that might be running to
599+ # be joined.
600+ logger .exception (f'Error processing versions of { url } ' )
586601
587602 if skipped > 0 :
588603 logger .info ('Skipped %s URLs that did not match filters' , skipped )
0 commit comments