Skip to content

Commit 9ecd7c2

Browse files
committed
feat(analytics): enhance bot event filtering in create_pr_metrics and add corresponding tests
1 parent c75522c commit 9ecd7c2

File tree

2 files changed

+49
-15
lines changed

2 files changed

+49
-15
lines changed

backend/analytics_server/mhq/service/code/sync/etl_code_analytics.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
PullRequestState,
1111
)
1212
from mhq.utils.time import Interval
13+
import re
1314

1415

1516
class CodeETLAnalyticsService:
@@ -21,19 +22,9 @@ def create_pr_metrics(
2122
) -> PullRequest:
2223
if pr.state == PullRequestState.OPEN:
2324
return pr
24-
# Filter bots from PR events
25-
pr_events = [
26-
event
27-
for event in pr_events
28-
if (
29-
not event.actor_username.endswith("[bot]")
30-
and not (
31-
event.data.get("user")
32-
and event.data.get("user", {}).get("type") == "Bot"
33-
)
34-
)
35-
]
36-
pr_performance = self.get_pr_performance(pr, pr_events)
25+
26+
non_bot_pr_events = self.filter_non_bot_events(pr_events)
27+
pr_performance = self.get_pr_performance(pr, non_bot_pr_events)
3728

3829
pr.first_response_time = (
3930
pr_performance.first_review_time
@@ -50,11 +41,11 @@ def create_pr_metrics(
5041
pr_performance.cycle_time if pr_performance.cycle_time != -1 else None
5142
)
5243
pr.reviewers = list(
53-
{e.actor_username for e in pr_events if e.actor_username != pr.author}
44+
{e.actor_username for e in non_bot_pr_events if e.actor_username != pr.author}
5445
)
5546

5647
if pr_commits:
57-
pr.rework_cycles = self.get_rework_cycles(pr, pr_events, pr_commits)
48+
pr.rework_cycles = self.get_rework_cycles(pr, non_bot_pr_events, pr_commits)
5849
pr_commits.sort(key=lambda x: x.created_at)
5950
first_commit_to_open = pr.created_at - pr_commits[0].created_at
6051
if isinstance(first_commit_to_open, timedelta):
@@ -184,3 +175,20 @@ def get_rework_cycles(
184175
rework_cycles += 1
185176

186177
return rework_cycles
178+
179+
def filter_non_bot_events(self, pr_events: List[PullRequestEvent]) -> List[PullRequestEvent]:
180+
"""Filter out events created by bot users using regex patterns."""
181+
182+
bot_pattern = re.compile(r'bot|[bB][oO][tT]|\[bot\]|automated|jenkins|ci-|github-actions', re.IGNORECASE)
183+
184+
return [
185+
event
186+
for event in pr_events
187+
if (
188+
not bool(bot_pattern.search(event.actor_username))
189+
and not (
190+
event.data.get("user")
191+
and event.data.get("user", {}).get("type") == "Bot"
192+
)
193+
)
194+
]

backend/analytics_server/tests/service/code/sync/test_etl_code_analytics.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,3 +601,29 @@ def test_create_pr_metrics_no_human_rework_time():
601601
pr, [changes_requested_event, approval_event], []
602602
)
603603
assert pr_metrics.rework_time is None
604+
605+
def test_create_pr_metrics_filters_bot_type_events():
606+
pr_service = CodeETLAnalyticsService()
607+
t1 = time_now()
608+
t2 = t1 + timedelta(hours=1)
609+
t3 = t2 + timedelta(hours=1)
610+
pr = get_pull_request(state=PullRequestState.MERGED, created_at=t1, updated_at=t1)
611+
612+
bot_event = get_pull_request_event(
613+
pull_request_id=pr.id,
614+
reviewer="github_app",
615+
state=PullRequestEventState.COMMENTED.value,
616+
created_at=t2,
617+
data={"user": {"type": "Bot"}},
618+
)
619+
620+
human_event = get_pull_request_event(
621+
pull_request_id=pr.id,
622+
reviewer="human_user",
623+
state=PullRequestEventState.APPROVED.value,
624+
created_at=t3,
625+
)
626+
627+
pr_metrics = pr_service.create_pr_metrics(pr, [bot_event, human_event], [])
628+
assert "human_user" in pr_metrics.reviewers
629+
assert "github_app" not in pr_metrics.reviewers

0 commit comments

Comments
 (0)