Skip to content

Commit f7568a3

Browse files
committed
Fix bug where all crawls are added to workflow as successful even if failed
1 parent ec28414 commit f7568a3

File tree

4 files changed

+26
-9
lines changed

4 files changed

+26
-9
lines changed

backend/btrixcloud/basecrawls.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,14 +372,21 @@ async def delete_crawls(
372372
size += crawl_size
373373

374374
cid = crawl.cid
375+
successful = crawl.state in SUCCESSFUL_STATES
375376
if cid:
376377
if cids_to_update.get(cid):
377378
cids_to_update[cid]["inc"] += 1
378379
cids_to_update[cid]["size"] += crawl_size
380+
if successful:
381+
cids_to_update[cid]["successful"] += 1
379382
else:
380383
cids_to_update[cid] = {}
381384
cids_to_update[cid]["inc"] = 1
382385
cids_to_update[cid]["size"] = crawl_size
386+
if successful:
387+
cids_to_update[cid]["successful"] = 1
388+
else:
389+
cids_to_update[cid]["successful"] = 0
383390

384391
if type_ == "crawl":
385392
asyncio.create_task(
@@ -866,7 +873,10 @@ async def delete_crawls_all_types(
866873
for cid, cid_dict in cids_to_update.items():
867874
cid_size = cid_dict["size"]
868875
cid_inc = cid_dict["inc"]
869-
await self.crawl_configs.stats_recompute_last(cid, -cid_size, -cid_inc)
876+
cid_successful = cid_dict["successful"]
877+
await self.crawl_configs.stats_recompute_last(
878+
cid, -cid_size, -cid_inc, -cid_successful
879+
)
870880

871881
if uploads_length:
872882
upload_delete_list = DeleteCrawlList(crawl_ids=uploads)

backend/btrixcloud/crawlconfigs.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -908,7 +908,9 @@ async def get_last_successful_crawl_out(
908908

909909
return None
910910

911-
async def stats_recompute_last(self, cid: UUID, size: int, inc_crawls: int = 1):
911+
async def stats_recompute_last(
912+
self, cid: UUID, size: int, inc_crawls: int = 1, inc_successful: int = 1
913+
):
912914
"""recompute stats by incrementing size counter and number of crawls"""
913915
update_query: dict[str, object] = {}
914916

@@ -965,7 +967,7 @@ async def stats_recompute_last(self, cid: UUID, size: int, inc_crawls: int = 1):
965967
"$inc": {
966968
"totalSize": size,
967969
"crawlCount": inc_crawls,
968-
"crawlSuccessfulCount": inc_crawls,
970+
"crawlSuccessfulCount": inc_successful,
969971
},
970972
},
971973
)

backend/btrixcloud/crawls.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,10 @@ async def delete_crawls(
412412
for cid, cid_dict in cids_to_update.items():
413413
cid_size = cid_dict["size"]
414414
cid_inc = cid_dict["inc"]
415-
await self.crawl_configs.stats_recompute_last(cid, -cid_size, -cid_inc)
415+
cid_successful = cid_dict["successful"]
416+
await self.crawl_configs.stats_recompute_last(
417+
cid, -cid_size, -cid_inc, -cid_successful
418+
)
416419

417420
return count, cids_to_update, quota_reached
418421

@@ -903,7 +906,9 @@ async def shutdown_crawl(
903906
if not graceful:
904907
await self.update_crawl_state(crawl_id, "canceled")
905908
crawl = await self.get_crawl(crawl_id, org)
906-
if not await self.crawl_configs.stats_recompute_last(crawl.cid, 0, -1):
909+
if not await self.crawl_configs.stats_recompute_last(
910+
crawl.cid, 0, -1, 0
911+
):
907912
raise HTTPException(
908913
status_code=404,
909914
detail=f"crawl_config_not_found: {crawl.cid}",

backend/btrixcloud/operator/crawls.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1746,11 +1746,10 @@ async def do_crawl_finished_tasks(
17461746
stats: Optional[CrawlStats],
17471747
) -> None:
17481748
"""Run tasks after crawl completes in asyncio.task coroutine."""
1749-
await self.crawl_config_ops.stats_recompute_last(
1750-
crawl.cid, status.filesAddedSize, 1
1751-
)
1752-
17531749
if state in SUCCESSFUL_STATES and crawl.oid:
1750+
await self.crawl_config_ops.stats_recompute_last(
1751+
crawl.cid, status.filesAddedSize, 1, 1
1752+
)
17541753
await self.page_ops.set_archived_item_page_counts(crawl.id)
17551754
await self.org_ops.set_last_crawl_finished(crawl.oid)
17561755
await self.coll_ops.add_successful_crawl_to_collections(
@@ -1767,6 +1766,7 @@ async def do_crawl_finished_tasks(
17671766
)
17681767

17691768
if state in FAILED_STATES:
1769+
await self.crawl_config_ops.stats_recompute_last(crawl.cid, 0, 1, 0)
17701770
await self.crawl_ops.delete_failed_crawl_files(crawl.id, crawl.oid)
17711771
await self.page_ops.delete_crawl_pages(crawl.id, crawl.oid)
17721772

0 commit comments

Comments
 (0)