diff --git a/.env.example b/.env.example index a6f2cbc..94113e0 100644 --- a/.env.example +++ b/.env.example @@ -36,6 +36,23 @@ CAPTCHA_ENABLED=false # Default: 120 seconds (2 minutes) CAPTCHA_TIMEOUT_SECONDS=120 +# Enable duplicate message spam detection (true/false) +# Detects and restricts users who paste the same message repeatedly +DUPLICATE_SPAM_ENABLED=true + +# Time window in seconds for detecting duplicate messages +DUPLICATE_SPAM_WINDOW_SECONDS=120 + +# Number of similar messages within the window before restricting +DUPLICATE_SPAM_THRESHOLD=2 + +# Minimum normalized text length to consider (avoids false positives on short messages) +DUPLICATE_SPAM_MIN_LENGTH=20 + +# Similarity threshold (0.0-1.0) for matching duplicate messages +# 0.95 catches minor edits, 0.97 only near-exact copies, 0.90 is more aggressive +DUPLICATE_SPAM_SIMILARITY=0.95 + # Path to groups.json for multi-group support (optional) # If this file exists, per-group settings are loaded from it instead of the # GROUP_ID/WARNING_TOPIC_ID/etc. fields above. See groups.json.example. diff --git a/groups.json.example b/groups.json.example index fca1559..7cbd0f5 100644 --- a/groups.json.example +++ b/groups.json.example @@ -9,7 +9,12 @@ "captcha_timeout_seconds": 120, "new_user_probation_hours": 72, "new_user_violation_threshold": 3, - "rules_link": "https://t.me/pythonID/290029/321799" + "rules_link": "https://t.me/pythonID/290029/321799", + "duplicate_spam_enabled": true, + "duplicate_spam_window_seconds": 120, + "duplicate_spam_threshold": 2, + "duplicate_spam_min_length": 20, + "duplicate_spam_similarity": 0.95 }, { "group_id": -1009876543210, @@ -21,6 +26,11 @@ "captcha_timeout_seconds": 180, "new_user_probation_hours": 168, "new_user_violation_threshold": 2, - "rules_link": "https://t.me/mygroup/rules" + "rules_link": "https://t.me/mygroup/rules", + "duplicate_spam_enabled": true, + "duplicate_spam_window_seconds": 60, + "duplicate_spam_threshold": 2, + "duplicate_spam_min_length": 20, + "duplicate_spam_similarity": 0.90 } ] diff --git a/src/bot/config.py b/src/bot/config.py index 36b636e..5ec1e6c 100644 --- a/src/bot/config.py +++ b/src/bot/config.py @@ -79,6 +79,11 @@ class Settings(BaseSettings): captcha_timeout_seconds: int = 120 new_user_probation_hours: int = 72 # 3 days default new_user_violation_threshold: int = 3 # restrict after this many violations + duplicate_spam_enabled: bool = True + duplicate_spam_window_seconds: int = 120 + duplicate_spam_threshold: int = 2 + duplicate_spam_min_length: int = 20 + duplicate_spam_similarity: float = 0.95 groups_config_path: str = "groups.json" logfire_token: str | None = None logfire_service_name: str = "pythonid-bot" diff --git a/src/bot/constants.py b/src/bot/constants.py index 4244ecf..0dd4f2a 100644 --- a/src/bot/constants.py +++ b/src/bot/constants.py @@ -211,6 +211,21 @@ def format_hours_display(hours: int) -> str: "πŸ“Œ [Peraturan Grup]({rules_link})" ) +# Duplicate message spam notification +DUPLICATE_SPAM_RESTRICTION = ( + "🚫 *Spam Pesan Duplikat*\n\n" + "{user_mention} telah dibatasi karena mengirim pesan yang sama " + "sebanyak {count} kali dalam waktu singkat.\n\n" + "πŸ“Œ [Peraturan Grup]({rules_link})" +) + +DUPLICATE_SPAM_RESTRICTION_NO_RESTRICT = ( + "🚫 *Spam Pesan Duplikat*\n\n" + "Pesan duplikat dari {user_mention} telah dihapus " + "({count} pesan yang sama dalam waktu singkat).\n\n" + "πŸ“Œ [Peraturan Grup]({rules_link})" +) + # Whitelisted URL domains for new user probation # These domains are allowed even during probation period # Matches exact domain or subdomains (e.g., "github.com" matches "www.github.com") diff --git a/src/bot/group_config.py b/src/bot/group_config.py index 8390c6e..b97210e 100644 --- a/src/bot/group_config.py +++ b/src/bot/group_config.py @@ -35,6 +35,11 @@ class GroupConfig(BaseModel): new_user_probation_hours: int = 72 new_user_violation_threshold: int = 3 rules_link: str = "https://t.me/pythonID/290029/321799" + duplicate_spam_enabled: bool = True + duplicate_spam_window_seconds: int = 120 + duplicate_spam_threshold: int = 2 + duplicate_spam_min_length: int = 20 + duplicate_spam_similarity: float = 0.95 @field_validator("group_id") @classmethod @@ -181,6 +186,11 @@ def build_group_registry(settings: object) -> GroupRegistry: new_user_probation_hours=settings.new_user_probation_hours, new_user_violation_threshold=settings.new_user_violation_threshold, rules_link=settings.rules_link, + duplicate_spam_enabled=settings.duplicate_spam_enabled, + duplicate_spam_window_seconds=settings.duplicate_spam_window_seconds, + duplicate_spam_threshold=settings.duplicate_spam_threshold, + duplicate_spam_min_length=settings.duplicate_spam_min_length, + duplicate_spam_similarity=settings.duplicate_spam_similarity, ) registry.register(config) diff --git a/src/bot/handlers/duplicate_spam.py b/src/bot/handlers/duplicate_spam.py new file mode 100644 index 0000000..391aa7f --- /dev/null +++ b/src/bot/handlers/duplicate_spam.py @@ -0,0 +1,215 @@ +""" +Duplicate message spam detection handler. + +This module detects users who spam by repeatedly posting the same or +very similar messages within a short time window. When the threshold +is reached, duplicate messages are deleted and the user is restricted. + +Uses an in-memory rolling window per (group_id, user_id) to track +recent messages. No database state is needed β€” restrictions applied +here are NOT reversible via the DM unrestriction flow (no UserWarning +record is created). +""" + +import logging +import re +import unicodedata +from collections import deque +from dataclasses import dataclass +from datetime import UTC, datetime +from difflib import SequenceMatcher + +from telegram import Update +from telegram.ext import ApplicationHandlerStop, ContextTypes + +from bot.constants import ( + DUPLICATE_SPAM_RESTRICTION, + DUPLICATE_SPAM_RESTRICTION_NO_RESTRICT, + RESTRICTED_PERMISSIONS, +) +from bot.group_config import GroupConfig, get_group_config_for_update +from bot.services.telegram_utils import get_user_mention + +logger = logging.getLogger(__name__) + +RECENT_MESSAGES_KEY = "duplicate_spam_recent" + + +@dataclass +class RecentMessage: + """A recent message entry for duplicate detection.""" + + timestamp: datetime + normalized_text: str + message_id: int + + +def normalize_text(text: str) -> str: + """ + Normalize text for duplicate comparison. + + Lowercases, strips whitespace, collapses runs of whitespace, + removes emoji/symbol unicode categories, and strips punctuation. + """ + text = text.lower() + text = unicodedata.normalize("NFKC", text) + text = re.sub(r"\s+", " ", text).strip() + text = re.sub(r"[^\w\s]", "", text, flags=re.UNICODE) + return text + + +def is_similar(a: str, b: str, threshold: float = 0.95) -> bool: + """Check if two normalized texts are similar enough to be considered duplicates.""" + if a == b: + return True + return SequenceMatcher(None, a, b).ratio() >= threshold + + +def _get_recent_messages( + context: ContextTypes.DEFAULT_TYPE, group_id: int, user_id: int +) -> deque[RecentMessage]: + """Get or create the recent messages deque for a (group, user) pair.""" + store: dict[tuple[int, int], deque[RecentMessage]] = context.bot_data.setdefault( + RECENT_MESSAGES_KEY, {} + ) + key = (group_id, user_id) + if key not in store: + store[key] = deque() + return store[key] + + +def _prune_old_messages( + dq: deque[RecentMessage], window_seconds: int, now: datetime +) -> None: + """Remove messages older than the window from the deque.""" + while dq and (now - dq[0].timestamp).total_seconds() > window_seconds: + dq.popleft() + + +def count_similar_in_window( + dq: deque[RecentMessage], normalized: str, threshold: float = 0.95 +) -> int: + """Count how many messages in the deque are similar to the given text.""" + return sum(1 for m in dq if is_similar(normalized, m.normalized_text, threshold)) + + +async def handle_duplicate_spam( + update: Update, context: ContextTypes.DEFAULT_TYPE +) -> None: + """ + Detect and handle duplicate message spam. + + Tracks recent messages per (group_id, user_id) in memory. When the + count of similar messages within the time window reaches the threshold, + deletes the message and restricts the user. + """ + if not update.message or not update.message.from_user: + return + + group_config = get_group_config_for_update(update) + if group_config is None: + return + + if not group_config.duplicate_spam_enabled: + return + + user = update.message.from_user + if user.is_bot: + return + + admin_ids = context.bot_data.get("group_admin_ids", {}).get(group_config.group_id, []) + if user.id in admin_ids: + return + + text = update.message.text or update.message.caption + if not text: + return + + normalized = normalize_text(text) + if len(normalized) < group_config.duplicate_spam_min_length: + return + + now = datetime.now(UTC) + dq = _get_recent_messages(context, group_config.group_id, user.id) + _prune_old_messages(dq, group_config.duplicate_spam_window_seconds, now) + + similar_count = count_similar_in_window(dq, normalized, group_config.duplicate_spam_similarity) + + dq.append( + RecentMessage( + timestamp=now, + normalized_text=normalized, + message_id=update.message.message_id, + ) + ) + + if similar_count < group_config.duplicate_spam_threshold - 1: + return + + total_count = similar_count + 1 + user_mention = get_user_mention(user) + + logger.info( + f"Duplicate spam detected: user_id={user.id}, " + f"group_id={group_config.group_id}, count={total_count}" + ) + + try: + await update.message.delete() + logger.info(f"Deleted duplicate spam from user_id={user.id}") + except Exception: + logger.error( + f"Failed to delete duplicate spam: user_id={user.id}", + exc_info=True, + ) + + await _enforce_restriction(context, group_config, user, user_mention, total_count) + + raise ApplicationHandlerStop + + +async def _enforce_restriction( + context: ContextTypes.DEFAULT_TYPE, + group_config: GroupConfig, + user: object, + user_mention: str, + count: int, +) -> None: + """Restrict the user and send notification to warning topic.""" + restricted = False + try: + await context.bot.restrict_chat_member( + chat_id=group_config.group_id, + user_id=user.id, + permissions=RESTRICTED_PERMISSIONS, + ) + restricted = True + logger.info(f"Restricted user_id={user.id} for duplicate spam") + except Exception: + logger.error( + f"Failed to restrict user for duplicate spam: user_id={user.id}", + exc_info=True, + ) + + try: + template = ( + DUPLICATE_SPAM_RESTRICTION if restricted + else DUPLICATE_SPAM_RESTRICTION_NO_RESTRICT + ) + notification_text = template.format( + user_mention=user_mention, + count=count, + rules_link=group_config.rules_link, + ) + await context.bot.send_message( + chat_id=group_config.group_id, + message_thread_id=group_config.warning_topic_id, + text=notification_text, + parse_mode="Markdown", + ) + logger.info(f"Sent duplicate spam notification for user_id={user.id}") + except Exception: + logger.error( + f"Failed to send duplicate spam notification: user_id={user.id}", + exc_info=True, + ) diff --git a/src/bot/main.py b/src/bot/main.py index 4e9946b..54ba309 100644 --- a/src/bot/main.py +++ b/src/bot/main.py @@ -19,6 +19,7 @@ from bot.group_config import get_group_registry, init_group_registry from bot.handlers import captcha from bot.handlers.anti_spam import handle_inline_keyboard_spam, handle_new_user_spam +from bot.handlers.duplicate_spam import handle_duplicate_spam from bot.handlers.dm import handle_dm from bot.handlers.message import handle_message from bot.handlers.topic_guard import guard_warning_topic @@ -294,7 +295,16 @@ def main() -> None: ) logger.info("Registered handler: anti_spam_handler (group=0)") - # Handler 10: Group message handler - monitors messages in monitored + # Handler 10: Duplicate message spam handler - detects repeated identical messages + application.add_handler( + MessageHandler( + filters.ChatType.GROUPS & ~filters.COMMAND, + handle_duplicate_spam, + ) + ) + logger.info("Registered handler: duplicate_spam_handler (group=0)") + + # Handler 11: Group message handler - monitors messages in monitored # groups and warns/restricts users with incomplete profiles application.add_handler( MessageHandler( diff --git a/tests/test_config.py b/tests/test_config.py index b144ee5..32752d0 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -123,6 +123,36 @@ def test_captcha_timeout_timedelta(self, monkeypatch): assert settings.captcha_timeout_timedelta == timedelta(seconds=90) + def test_duplicate_spam_defaults(self, monkeypatch): + """Test that duplicate_spam fields have correct defaults.""" + monkeypatch.setenv("TELEGRAM_BOT_TOKEN", "test_token") + monkeypatch.setenv("GROUP_ID", "-100999") + monkeypatch.setenv("WARNING_TOPIC_ID", "1") + + settings = Settings(_env_file=None) + + assert settings.duplicate_spam_enabled is True + assert settings.duplicate_spam_window_seconds == 120 + assert settings.duplicate_spam_threshold == 2 + assert settings.duplicate_spam_min_length == 20 + + def test_duplicate_spam_from_env(self, monkeypatch): + """Test that duplicate_spam fields are read from environment variables.""" + monkeypatch.setenv("TELEGRAM_BOT_TOKEN", "test_token") + monkeypatch.setenv("GROUP_ID", "-100999") + monkeypatch.setenv("WARNING_TOPIC_ID", "1") + monkeypatch.setenv("DUPLICATE_SPAM_ENABLED", "false") + monkeypatch.setenv("DUPLICATE_SPAM_WINDOW_SECONDS", "300") + monkeypatch.setenv("DUPLICATE_SPAM_THRESHOLD", "5") + monkeypatch.setenv("DUPLICATE_SPAM_MIN_LENGTH", "50") + + settings = Settings(_env_file=None) + + assert settings.duplicate_spam_enabled is False + assert settings.duplicate_spam_window_seconds == 300 + assert settings.duplicate_spam_threshold == 5 + assert settings.duplicate_spam_min_length == 50 + class TestSettingsValidation: def test_group_id_must_be_negative(self, monkeypatch): diff --git a/tests/test_duplicate_spam.py b/tests/test_duplicate_spam.py new file mode 100644 index 0000000..c68135c --- /dev/null +++ b/tests/test_duplicate_spam.py @@ -0,0 +1,357 @@ +"""Tests for the duplicate message spam detection handler.""" + +from collections import deque +from datetime import UTC, datetime, timedelta +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from telegram import Chat, Message, User +from telegram.ext import ApplicationHandlerStop + +from bot.group_config import GroupConfig +from bot.handlers.duplicate_spam import ( + RecentMessage, + _get_recent_messages, + _prune_old_messages, + count_similar_in_window, + handle_duplicate_spam, + is_similar, + normalize_text, + RECENT_MESSAGES_KEY, +) + +DEFAULT_SIMILARITY = 0.95 + + +class TestNormalizeText: + """Tests for the normalize_text function.""" + + def test_lowercase(self): + assert normalize_text("Hello World") == "hello world" + + def test_collapse_whitespace(self): + assert normalize_text("hello world") == "hello world" + + def test_strip_punctuation(self): + assert normalize_text("hello, world!") == "hello world" + + def test_strip_emoji(self): + result = normalize_text("hello πŸ™") + assert result.strip() == "hello" + + def test_unicode_normalization(self): + result = normalize_text("find") + assert result == "find" + + def test_multiline(self): + result = normalize_text("line one\nline two\nline three") + assert result == "line one line two line three" + + def test_empty_string(self): + assert normalize_text("") == "" + + +class TestIsSimilar: + """Tests for the is_similar function.""" + + def test_exact_match(self): + assert is_similar("hello world", "hello world") is True + + def test_very_similar(self): + a = "barangkali di sini ada yang sedang mencari kerja" + b = "barangkali di sini ada yang sedang mencari kerja" + assert is_similar(a, b) is True + + def test_different_texts(self): + assert is_similar("hello world", "goodbye universe") is False + + def test_slightly_different(self): + a = "barangkali di sini ada yang sedang mencari kerja bisa menghubungi saya kak" + b = "barangkali di sini ada yang sedang mencari kerja bisa menghubungi saya ya" + assert is_similar(a, b) is True + + def test_completely_different(self): + a = "python is great for data science" + b = "javascript is used for web development" + assert is_similar(a, b) is False + + +class TestPruneOldMessages: + """Tests for the _prune_old_messages function.""" + + def test_removes_old_messages(self): + now = datetime.now(UTC) + dq = deque([ + RecentMessage(timestamp=now - timedelta(seconds=200), normalized_text="old", message_id=1), + RecentMessage(timestamp=now - timedelta(seconds=50), normalized_text="recent", message_id=2), + ]) + _prune_old_messages(dq, 120, now) + assert len(dq) == 1 + assert dq[0].normalized_text == "recent" + + def test_keeps_all_within_window(self): + now = datetime.now(UTC) + dq = deque([ + RecentMessage(timestamp=now - timedelta(seconds=60), normalized_text="a", message_id=1), + RecentMessage(timestamp=now - timedelta(seconds=30), normalized_text="b", message_id=2), + ]) + _prune_old_messages(dq, 120, now) + assert len(dq) == 2 + + def test_empty_deque(self): + dq: deque[RecentMessage] = deque() + _prune_old_messages(dq, 120, datetime.now(UTC)) + assert len(dq) == 0 + + +class TestCountSimilarInWindow: + """Tests for the count_similar_in_window function.""" + + def test_counts_similar(self): + dq = deque([ + RecentMessage(timestamp=datetime.now(UTC), normalized_text="spam message here", message_id=1), + RecentMessage(timestamp=datetime.now(UTC), normalized_text="spam message here", message_id=2), + RecentMessage(timestamp=datetime.now(UTC), normalized_text="different message", message_id=3), + ]) + assert count_similar_in_window(dq, "spam message here") == 2 + + def test_no_similar(self): + dq = deque([ + RecentMessage(timestamp=datetime.now(UTC), normalized_text="hello world foo bar", message_id=1), + ]) + assert count_similar_in_window(dq, "completely different text here") == 0 + + +class TestGetRecentMessages: + """Tests for the _get_recent_messages function.""" + + def test_creates_new_deque(self): + context = MagicMock() + context.bot_data = {} + dq = _get_recent_messages(context, -100, 42) + assert isinstance(dq, deque) + assert len(dq) == 0 + + def test_returns_existing_deque(self): + context = MagicMock() + existing_dq = deque() + existing_dq.append( + RecentMessage(timestamp=datetime.now(UTC), normalized_text="test", message_id=1) + ) + context.bot_data = {RECENT_MESSAGES_KEY: {(-100, 42): existing_dq}} + dq = _get_recent_messages(context, -100, 42) + assert len(dq) == 1 + + +class TestHandleDuplicateSpam: + """Tests for the handle_duplicate_spam handler.""" + + @pytest.fixture + def group_config(self): + return GroupConfig( + group_id=-100, + warning_topic_id=999, + duplicate_spam_enabled=True, + duplicate_spam_window_seconds=120, + duplicate_spam_threshold=2, + duplicate_spam_min_length=20, + ) + + @pytest.fixture + def mock_update(self): + update = MagicMock() + update.message = MagicMock(spec=Message) + update.message.from_user = MagicMock(spec=User) + update.message.from_user.id = 42 + update.message.from_user.is_bot = False + update.message.from_user.full_name = "Test User" + update.message.from_user.username = "testuser" + update.message.text = "Barangkali di sini ada yang sedang mencari kerja bisa menghubungi saya" + update.message.caption = None + update.message.message_id = 100 + update.message.delete = AsyncMock() + update.effective_chat = MagicMock(spec=Chat) + update.effective_chat.id = -100 + return update + + @pytest.fixture + def mock_context(self): + context = MagicMock() + context.bot_data = {"group_admin_ids": {-100: [1, 2]}} + context.bot = MagicMock() + context.bot.restrict_chat_member = AsyncMock() + context.bot.send_message = AsyncMock() + return context + + async def test_skips_no_message(self, mock_context, group_config): + update = MagicMock() + update.message = None + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + await handle_duplicate_spam(update, mock_context) + + async def test_skips_no_user(self, mock_context, group_config): + update = MagicMock() + update.message = MagicMock(spec=Message) + update.message.from_user = None + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + await handle_duplicate_spam(update, mock_context) + + async def test_skips_unmonitored_group(self, mock_update, mock_context): + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=None): + await handle_duplicate_spam(mock_update, mock_context) + mock_update.message.delete.assert_not_called() + + async def test_skips_when_disabled(self, mock_update, mock_context, group_config): + group_config.duplicate_spam_enabled = False + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + await handle_duplicate_spam(mock_update, mock_context) + mock_update.message.delete.assert_not_called() + + async def test_skips_bots(self, mock_update, mock_context, group_config): + mock_update.message.from_user.is_bot = True + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + await handle_duplicate_spam(mock_update, mock_context) + mock_update.message.delete.assert_not_called() + + async def test_skips_admins(self, mock_update, mock_context, group_config): + mock_update.message.from_user.id = 1 # admin + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + await handle_duplicate_spam(mock_update, mock_context) + mock_update.message.delete.assert_not_called() + + async def test_skips_no_text(self, mock_update, mock_context, group_config): + mock_update.message.text = None + mock_update.message.caption = None + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + await handle_duplicate_spam(mock_update, mock_context) + mock_update.message.delete.assert_not_called() + + async def test_skips_short_text(self, mock_update, mock_context, group_config): + mock_update.message.text = "ok" + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + await handle_duplicate_spam(mock_update, mock_context) + mock_update.message.delete.assert_not_called() + + async def test_first_message_no_action(self, mock_update, mock_context, group_config): + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + await handle_duplicate_spam(mock_update, mock_context) + mock_update.message.delete.assert_not_called() + + async def test_second_message_triggers_restriction(self, mock_update, mock_context, group_config): + now = datetime.now(UTC) + norm = normalize_text(mock_update.message.text) + existing_dq = deque([ + RecentMessage(timestamp=now, normalized_text=norm, message_id=99), + ]) + mock_context.bot_data[RECENT_MESSAGES_KEY] = {(-100, 42): existing_dq} + + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + with pytest.raises(ApplicationHandlerStop): + await handle_duplicate_spam(mock_update, mock_context) + + mock_update.message.delete.assert_called_once() + mock_context.bot.restrict_chat_member.assert_called_once() + mock_context.bot.send_message.assert_called_once() + + async def test_uses_caption_when_no_text(self, mock_update, mock_context, group_config): + mock_update.message.text = None + mock_update.message.caption = "Barangkali di sini ada yang sedang mencari kerja bisa menghubungi saya" + now = datetime.now(UTC) + norm = normalize_text(mock_update.message.caption) + existing_dq = deque([ + RecentMessage(timestamp=now, normalized_text=norm, message_id=99), + ]) + mock_context.bot_data[RECENT_MESSAGES_KEY] = {(-100, 42): existing_dq} + + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + with pytest.raises(ApplicationHandlerStop): + await handle_duplicate_spam(mock_update, mock_context) + + mock_update.message.delete.assert_called_once() + + async def test_expired_messages_not_counted(self, mock_update, mock_context, group_config): + old = datetime.now(UTC) - timedelta(seconds=200) + norm = normalize_text(mock_update.message.text) + existing_dq = deque([ + RecentMessage(timestamp=old, normalized_text=norm, message_id=99), + ]) + mock_context.bot_data[RECENT_MESSAGES_KEY] = {(-100, 42): existing_dq} + + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + await handle_duplicate_spam(mock_update, mock_context) + + mock_update.message.delete.assert_not_called() + + async def test_different_messages_not_counted(self, mock_update, mock_context, group_config): + now = datetime.now(UTC) + existing_dq = deque([ + RecentMessage(timestamp=now, normalized_text="some completely different text here one", message_id=98), + RecentMessage(timestamp=now, normalized_text="another totally different text here two", message_id=99), + ]) + mock_context.bot_data[RECENT_MESSAGES_KEY] = {(-100, 42): existing_dq} + + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + await handle_duplicate_spam(mock_update, mock_context) + + mock_update.message.delete.assert_not_called() + + async def test_delete_failure_continues(self, mock_update, mock_context, group_config): + mock_update.message.delete = AsyncMock(side_effect=Exception("Delete failed")) + now = datetime.now(UTC) + norm = normalize_text(mock_update.message.text) + existing_dq = deque([ + RecentMessage(timestamp=now, normalized_text=norm, message_id=99), + ]) + mock_context.bot_data[RECENT_MESSAGES_KEY] = {(-100, 42): existing_dq} + + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + with pytest.raises(ApplicationHandlerStop): + await handle_duplicate_spam(mock_update, mock_context) + + mock_context.bot.restrict_chat_member.assert_called_once() + + async def test_restrict_failure_still_notifies(self, mock_update, mock_context, group_config): + mock_context.bot.restrict_chat_member = AsyncMock(side_effect=Exception("Restrict failed")) + now = datetime.now(UTC) + norm = normalize_text(mock_update.message.text) + existing_dq = deque([ + RecentMessage(timestamp=now, normalized_text=norm, message_id=99), + ]) + mock_context.bot_data[RECENT_MESSAGES_KEY] = {(-100, 42): existing_dq} + + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + with pytest.raises(ApplicationHandlerStop): + await handle_duplicate_spam(mock_update, mock_context) + + mock_context.bot.send_message.assert_called_once() + call_kwargs = mock_context.bot.send_message.call_args + assert "dihapus" in call_kwargs.kwargs.get("text", call_kwargs[1].get("text", "")) + + async def test_notification_failure_still_raises_stop(self, mock_update, mock_context, group_config): + mock_context.bot.send_message = AsyncMock(side_effect=Exception("Send failed")) + now = datetime.now(UTC) + norm = normalize_text(mock_update.message.text) + existing_dq = deque([ + RecentMessage(timestamp=now, normalized_text=norm, message_id=99), + ]) + mock_context.bot_data[RECENT_MESSAGES_KEY] = {(-100, 42): existing_dq} + + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + with pytest.raises(ApplicationHandlerStop): + await handle_duplicate_spam(mock_update, mock_context) + + async def test_third_message_also_triggers(self, mock_update, mock_context, group_config): + now = datetime.now(UTC) + norm = normalize_text(mock_update.message.text) + existing_dq = deque([ + RecentMessage(timestamp=now, normalized_text=norm, message_id=98), + RecentMessage(timestamp=now, normalized_text=norm, message_id=99), + ]) + mock_context.bot_data[RECENT_MESSAGES_KEY] = {(-100, 42): existing_dq} + + with patch("bot.handlers.duplicate_spam.get_group_config_for_update", return_value=group_config): + with pytest.raises(ApplicationHandlerStop): + await handle_duplicate_spam(mock_update, mock_context) + + mock_update.message.delete.assert_called_once() + mock_context.bot.restrict_chat_member.assert_called_once() diff --git a/tests/test_group_config.py b/tests/test_group_config.py index c073c2e..07e5757 100644 --- a/tests/test_group_config.py +++ b/tests/test_group_config.py @@ -88,6 +88,27 @@ def test_captcha_timeout_timedelta(self): gc = GroupConfig(group_id=-1, warning_topic_id=42, captcha_timeout_seconds=120) assert gc.captcha_timeout_timedelta == timedelta(seconds=120) + def test_duplicate_spam_defaults(self): + gc = GroupConfig(group_id=-1, warning_topic_id=42) + assert gc.duplicate_spam_enabled is True + assert gc.duplicate_spam_window_seconds == 120 + assert gc.duplicate_spam_threshold == 2 + assert gc.duplicate_spam_min_length == 20 + + def test_duplicate_spam_custom_values(self): + gc = GroupConfig( + group_id=-1, + warning_topic_id=42, + duplicate_spam_enabled=False, + duplicate_spam_window_seconds=300, + duplicate_spam_threshold=5, + duplicate_spam_min_length=50, + ) + assert gc.duplicate_spam_enabled is False + assert gc.duplicate_spam_window_seconds == 300 + assert gc.duplicate_spam_threshold == 5 + assert gc.duplicate_spam_min_length == 50 + class TestGroupRegistry: def test_register_and_get(self): @@ -157,6 +178,10 @@ def test_load_with_all_fields(self): "captcha_enabled": True, "captcha_timeout_seconds": 180, "rules_link": "https://example.com/rules", + "duplicate_spam_enabled": False, + "duplicate_spam_window_seconds": 300, + "duplicate_spam_threshold": 5, + "duplicate_spam_min_length": 50, } ] with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: @@ -166,6 +191,10 @@ def test_load_with_all_fields(self): assert configs[0].restrict_failed_users is True assert configs[0].warning_threshold == 5 + assert configs[0].duplicate_spam_enabled is False + assert configs[0].duplicate_spam_window_seconds == 300 + assert configs[0].duplicate_spam_threshold == 5 + assert configs[0].duplicate_spam_min_length == 50 def test_file_not_found(self): with pytest.raises(FileNotFoundError): @@ -244,6 +273,10 @@ def test_falls_back_to_env(self): settings.new_user_probation_hours = 72 settings.new_user_violation_threshold = 3 settings.rules_link = "https://t.me/test/rules" + settings.duplicate_spam_enabled = False + settings.duplicate_spam_window_seconds = 300 + settings.duplicate_spam_threshold = 5 + settings.duplicate_spam_min_length = 50 registry = build_group_registry(settings) @@ -252,6 +285,10 @@ def test_falls_back_to_env(self): assert gc is not None assert gc.warning_topic_id == 42 assert gc.rules_link == "https://t.me/test/rules" + assert gc.duplicate_spam_enabled is False + assert gc.duplicate_spam_window_seconds == 300 + assert gc.duplicate_spam_threshold == 5 + assert gc.duplicate_spam_min_length == 50 class TestGetGroupConfigForUpdate: @@ -313,6 +350,10 @@ def test_init_and_get(self): settings.new_user_probation_hours = 72 settings.new_user_violation_threshold = 3 settings.rules_link = "https://t.me/test/rules" + settings.duplicate_spam_enabled = True + settings.duplicate_spam_window_seconds = 120 + settings.duplicate_spam_threshold = 3 + settings.duplicate_spam_min_length = 20 registry = init_group_registry(settings) assert registry is get_group_registry() @@ -331,6 +372,10 @@ def test_reset_clears_registry(self): settings.new_user_probation_hours = 72 settings.new_user_violation_threshold = 3 settings.rules_link = "https://t.me/test/rules" + settings.duplicate_spam_enabled = True + settings.duplicate_spam_window_seconds = 120 + settings.duplicate_spam_threshold = 3 + settings.duplicate_spam_min_length = 20 init_group_registry(settings) reset_group_registry() diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 672aadd..65a67c6 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -57,6 +57,14 @@ async def test_restricts_expired_warnings(self, mock_registry): mock_bot.restrict_chat_member = AsyncMock() mock_bot.send_message = AsyncMock() + mock_user = MagicMock() + mock_user.username = "testuser" + mock_user.full_name = "Test User" + mock_user.id = 123 + mock_member = MagicMock() + mock_member.user = mock_user + mock_bot.get_chat_member = AsyncMock(return_value=mock_member) + # Mock context (JobQueue context) mock_context = MagicMock() mock_context.bot = mock_bot @@ -148,6 +156,14 @@ async def test_restricts_multiple_expired_warnings(self, mock_registry): mock_bot.restrict_chat_member = AsyncMock() mock_bot.send_message = AsyncMock() + mock_user = MagicMock() + mock_user.username = "testuser" + mock_user.full_name = "Test User" + mock_user.id = 123 + mock_member = MagicMock() + mock_member.user = mock_user + mock_bot.get_chat_member = AsyncMock(return_value=mock_member) + mock_context = MagicMock() mock_context.bot = mock_bot