From f8d2ebee4811ab2bb28bac286b44096b7e0678bd Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Wed, 19 Nov 2025 22:47:26 +0200 Subject: [PATCH 1/5] fixed not shared spam_data among user and his nodes/preprints --- osf/external/spam/tasks.py | 88 ++++++-- osf/models/mixins.py | 32 +-- osf/models/spam.py | 2 +- osf/models/user.py | 4 +- osf_tests/test_node.py | 443 ++++++++++++++++++++++++++++++++++++- 5 files changed, 529 insertions(+), 40 deletions(-) diff --git a/osf/external/spam/tasks.py b/osf/external/spam/tasks.py index 9c2a348cdcd..65a30b10604 100644 --- a/osf/external/spam/tasks.py +++ b/osf/external/spam/tasks.py @@ -103,6 +103,29 @@ def check_resource_with_spam_services(resource, content, author, author_email, r """ Return statements used only for debugging and recording keeping """ + from osf.models import OSFUser, AbstractNode, Preprint + + def set_found_spam_info(resource, client, details): + if not resource.spam_data.get('who_flagged'): + resource.spam_data['who_flagged'] = client.NAME + elif resource.spam_data['who_flagged'] != client.NAME: + resource.spam_data['who_flagged'] = 'both' + + if client.NAME == 'akismet': + resource.spam_pro_tip = details + if client.NAME == 'oopspam': + resource.spam_data['oopspam_data'] = details + + def set_collected_info(resource): + resource.spam_data['headers'] = { + 'Remote-Addr': request_kwargs.get('remote_addr'), + 'User-Agent': request_kwargs.get('user_agent'), + 'Referer': request_kwargs.get('referer'), + } + resource.spam_data['content'] = content + resource.spam_data['author'] = author + resource.spam_data['author_email'] = author_email + any_is_spam = False kwargs = dict( @@ -115,6 +138,10 @@ def check_resource_with_spam_services(resource, content, author, author_email, r content=content, ) + creator = OSFUser.objects.get(username=author_email) + nodes_to_flag = creator.nodes.filter(is_public=True, is_deleted=False) + preprints_to_flag = creator.preprints.filter(is_public=True, deleted__isnull=True) + spam_clients = [] if settings.AKISMET_ENABLED: spam_clients.append(AkismetClient()) @@ -123,28 +150,43 @@ def check_resource_with_spam_services(resource, content, author, author_email, r for client in spam_clients: is_spam, details = client.check_content(**kwargs) - if is_spam: - any_is_spam = True - if not resource.spam_data.get('who_flagged'): - resource.spam_data['who_flagged'] = client.NAME - elif resource.spam_data['who_flagged'] != client.NAME: - resource.spam_data['who_flagged'] = 'both' - - if client.NAME == 'akismet': - resource.spam_pro_tip = details - if client.NAME == 'oopspam': - resource.spam_data['oopspam_data'] = details - - if any_is_spam: - resource.spam_data['headers'] = { - 'Remote-Addr': request_kwargs.get('remote_addr'), - 'User-Agent': request_kwargs.get('user_agent'), - 'Referer': request_kwargs.get('referer'), - } - resource.spam_data['content'] = content - resource.spam_data['author'] = author - resource.spam_data['author_email'] = author_email - resource.flag_spam() + if not is_spam: + continue + + any_is_spam = True + + set_found_spam_info(resource, client, details) + + if not isinstance(resource, OSFUser): + set_found_spam_info(creator, client, details) + + for node in nodes_to_flag: + set_found_spam_info(node, client, details) + + for preprint in preprints_to_flag: + set_found_spam_info(preprint, client, details) + + if not any_is_spam: + return any_is_spam + + set_collected_info(resource) + resource.flag_spam(skip_user_suspension=True) + + # set spam_data but don't flag the creator because it'll happen at the end of check_resource_for_spam_postcommit + if not isinstance(resource, OSFUser): + set_collected_info(creator) + creator.save() + + for node in nodes_to_flag: + set_collected_info(node) + node.flag_spam(skip_user_suspension=True) + + for preprint in preprints_to_flag: + set_collected_info(preprint) + preprint.flag_spam(skip_user_suspension=True) + + AbstractNode.objects.bulk_update(nodes_to_flag, ['spam_status', 'spam_data', 'spam_pro_tip'], batch_size=100) + Preprint.objects.bulk_update(preprints_to_flag, ['spam_status', 'spam_data', 'spam_pro_tip'], batch_size=100) return any_is_spam @@ -183,7 +225,7 @@ def check_resource_for_spam_postcommit(guid, content, author, author_email, requ if hasattr(resource, 'check_spam_user'): user = OSFUser.objects.get(username=author_email) - resource.check_spam_user(user) + resource.check_spam_user(user, domains=list(spammy_domains)) @celery_app.task(ignore_results=False, max_retries=5, default_retry_delay=60) diff --git a/osf/models/mixins.py b/osf/models/mixins.py index 0e351edcd1b..2672aee3b8f 100644 --- a/osf/models/mixins.py +++ b/osf/models/mixins.py @@ -2213,28 +2213,29 @@ def check_spam(self, user, saved_fields, request_headers): request_headers, ) - def check_spam_user(self, user): + def check_spam_user(self, user, domains=None): if ( - settings.SPAM_ACCOUNT_SUSPENSION_ENABLED - and (timezone.now() - user.date_confirmed) <= settings.SPAM_ACCOUNT_SUSPENSION_THRESHOLD + settings.SPAM_ACCOUNT_SUSPENSION_ENABLED + and (timezone.now() - user.date_confirmed) <= settings.SPAM_ACCOUNT_SUSPENSION_THRESHOLD ) or ( - settings.SPAM_AUTOBAN_IP_BLOCK and self.spam_data.get('oopspam_data', None) - and self.spam_data['oopspam_data']['Details']['isIPBlocked'] + settings.SPAM_AUTOBAN_IP_BLOCK and self.spam_data.get('oopspam_data', None) + and self.spam_data['oopspam_data']['Details']['isIPBlocked'] ): - self.suspend_spam_user(user) + self.suspend_spam_user(user, domains=domains) - def suspend_spam_user(self, user): + def suspend_spam_user(self, user, domains=None): """ This suspends a users account and makes all there resources private, key word here is SUSPENDS this should not delete the account or any info associated with it. It should not be assumed the account is spam and it should not be used to train spam detecting services. """ + domains = domains or [] if user.is_hammy: return False - self.confirm_spam(save=True, train_spam_services=False) + + self.flag_spam(skip_user_suspension=True) # Suspend the flagged user for spam. - user.flag_spam() if not user.is_disabled: user.deactivate_account() mails.send_mail( @@ -2244,19 +2245,24 @@ def suspend_spam_user(self, user): osf_support_email=settings.OSF_SUPPORT_EMAIL, can_change_preferences=False, ) + + user.flag_spam() + if domains: + user.spam_data['domains'] = list(set(user.spam_data.get('domains', []) + domains)) + user.save() # Make public nodes private from this contributor for node in user.all_nodes: if self._id != node._id and len(node.contributors) == 1 and node.is_public: - node.confirm_spam(save=True, train_spam_services=False) + node.confirm_spam(save=True, domains=domains, train_spam_services=False) # Make preprints private from this contributor for preprint in user.preprints.all(): if self._id != preprint._id and len(preprint.contributors) == 1 and preprint.is_public: - preprint.confirm_spam(save=True, train_spam_services=False) + preprint.confirm_spam(save=True, domains=domains, train_spam_services=False) - def flag_spam(self): + def flag_spam(self, skip_user_suspension=False): """ Overrides SpamMixin#flag_spam. """ super().flag_spam() @@ -2272,7 +2278,7 @@ def flag_spam(self): ) log.save() - if settings.SPAM_THROTTLE_AUTOBAN: + if settings.SPAM_THROTTLE_AUTOBAN and not skip_user_suspension: creator = self.creator yesterday = timezone.now() - timezone.timedelta(days=1) node_spam_count = creator.all_nodes.filter(spam_status__in=[SpamStatus.FLAGGED, SpamStatus.SPAM], diff --git a/osf/models/spam.py b/osf/models/spam.py index d2f5946533c..43e862d97db 100644 --- a/osf/models/spam.py +++ b/osf/models/spam.py @@ -68,7 +68,7 @@ class Meta: default=dict, blank=True, validators=[_validate_reports] ) - def flag_spam(self): + def flag_spam(self, **kwargs): # If ham and unedited then tell user that they should read it again if self.spam_status == SpamStatus.UNKNOWN: self.spam_status = SpamStatus.FLAGGED diff --git a/osf/models/user.py b/osf/models/user.py index a27f7aca3b3..796f765b9df 100644 --- a/osf/models/user.py +++ b/osf/models/user.py @@ -1437,9 +1437,9 @@ def confirm_spam(self, domains=None, save=True, train_spam_services=False): # Don't train on resources merely associated with spam user for node in self.nodes.filter(is_public=True, is_deleted=False): - node.confirm_spam(train_spam_services=train_spam_services) + node.confirm_spam(domains=domains, train_spam_services=train_spam_services) for preprint in self.preprints.filter(is_public=True, deleted__isnull=True): - preprint.confirm_spam(train_spam_services=train_spam_services) + preprint.confirm_spam(domains=domains, train_spam_services=train_spam_services) def confirm_ham(self, save=False, train_spam_services=False): self.reactivate_account() diff --git a/osf_tests/test_node.py b/osf_tests/test_node.py index ee89ebb2d8a..9d208cbfe8d 100644 --- a/osf_tests/test_node.py +++ b/osf_tests/test_node.py @@ -2385,6 +2385,18 @@ def user(self): def project(self, user): return ProjectFactory(creator=user) + @pytest.fixture() + def project2(self, user): + return ProjectFactory(creator=user) + + @pytest.fixture() + def project3(self, user): + return ProjectFactory(creator=user) + + @pytest.fixture() + def preprint(self, user): + return PreprintFactory(creator=user) + @pytest.fixture() def request_headers(self): return { @@ -2393,6 +2405,108 @@ def request_headers(self): 'Referer': 'https://osf.io' } + def run_akismet_and_oops_tests( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed=None): + """ + This method makes the next checks when akismet and oopsystem are used: + 1. A resource and its creator become FLAGGED and other public nodes/preprints of this creator become SPAMMED and contain the same spam data + 2. Private nodes/preprints don't contain spam data + 3. If akismet confirms spam, all objects from #1 have spam_pro_tip + 4. If oopspam confirms spam, all objects from #1 have oopspam_data key in spam_data + 5. who_flagged property in spam_data contains either service name (akismet or oopspam) or 'both' value + 6. spam_data of objects from #1 contains headers, request user info and spammy content + + Params: + spam_object - object that is being checked for spam + akismet_spam_data - spam data returned by akismet + oops_spam_data - spam data returned by oopsystem + objects_to_be_spammed - objects to be spammed instead of flagged. Example: + spam_object = Node + spam objects is flagged, its creator is flagged and the others user's public nodes/preprints must be spammed + """ + project.set_privacy('public') + project2.set_privacy('public') + mock_check_domains.return_value = [] + + author = user.fullname + author_email = user.username + content = 'Check me for spam with akismet and oops' + objects_to_be_spammed = objects_to_be_spammed or [] + + # configurable part + spam_object = spam_object + akismet_spam_data = akismet_spam_data + oops_spam_data = oops_spam_data + + if akismet_spam_data and oops_spam_data: + expected_who_flagged = 'both' + elif akismet_spam_data: + expected_who_flagged = 'akismet' + elif oops_spam_data: + expected_who_flagged = 'oopspam' + else: + expected_who_flagged = None + + if akismet_spam_data or oops_spam_data: + expected_spam_data = { + 'headers': request_headers, + 'author': author, + 'author_email': author_email, + 'content': content, + 'who_flagged': expected_who_flagged + } + if oops_spam_data: + expected_spam_data['oopspam_data'] = oops_spam_data + else: + expected_spam_data = {} + + with mock.patch('osf.external.spam.tasks.AkismetClient.check_content') as mock_akismet_check_content: + with mock.patch('osf.external.spam.tasks.OOPSpamClient.check_content') as mock_oops_check_content: + mock_akismet_check_content.return_value = (bool(akismet_spam_data), akismet_spam_data) + mock_oops_check_content.return_value = (bool(oops_spam_data), oops_spam_data) + spam_tasks.check_resource_for_spam_postcommit( + guid=spam_object._id, + content=content, + author=author, + author_email=author_email, + request_headers=request_headers + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + + for obj in [user, project, project2, preprint]: + if akismet_spam_data or oops_spam_data: + if obj in objects_to_be_spammed: + assert obj.spam_status == SpamStatus.SPAM + else: + assert obj.spam_status == SpamStatus.FLAGGED + + assert obj.spam_data == expected_spam_data + else: + assert obj.spam_status is None + assert obj.spam_data == {} + assert obj.spam_pro_tip is None + + if mock_akismet_check_content.return_value[0] and mock_oops_check_content.return_value[0]: + assert obj.spam_pro_tip == akismet_spam_data + assert obj.spam_data.get('oopspam_data', {}) == oops_spam_data + elif mock_akismet_check_content.return_value[0]: + assert obj.spam_pro_tip == akismet_spam_data + assert 'oopspam_data' not in obj.spam_data + elif mock_oops_check_content.return_value[0]: + assert obj.spam_data.get('oopspam_data', {}) == oops_spam_data + assert obj.spam_pro_tip is None + + # private node shouldn't be spammed + assert project3.spam_status is None + assert project3.spam_pro_tip is None + assert project3.spam_data == {} + + assert user.spam_data == project.spam_data == project2.spam_data == preprint.spam_data + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) @mock.patch('osf.external.spam.tasks._check_resource_for_domains') def test_check_resource_for_spam_postcommit_with_spammy_domains(self, mock_check_domains, project, user): @@ -2460,7 +2574,334 @@ def test_check_resource_for_spam_postcommit_checks_user(self, mock_check_domains author_email=user.username, request_headers=request_headers ) - mock_check_user.assert_called_once_with(user) + mock_check_user.assert_called_once_with(user, domains=[]) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_spammed_user_shares_spam_data_with_nodes_and_preprints(self, mock_check_domains, user, project, preprint): + user.date_confirmed = timezone.now() + user.save() + + mock_check_domains.return_value = ['spam_domain.com'] + + project.set_privacy('public') + + spam_tasks.check_resource_for_spam_postcommit( + guid=user._id, + content='Check me for spam at spam_domain.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.SPAM + assert user.spam_data['domains'] == ['spam_domain.com'] + + assert preprint.spam_status == SpamStatus.SPAM + assert preprint.spam_data['domains'] == ['spam_domain.com'] + + assert project.spam_status == SpamStatus.SPAM + assert project.spam_data['domains'] == ['spam_domain.com'] + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_spammed_project_shares_spam_data_with_creator_and_other_nodes_and_preprints(self, mock_check_domains, user, project, preprint): + user.date_confirmed = timezone.now() + user.save() + + project2 = ProjectFactory(creator=user) + project2.set_privacy('public') + + mock_check_domains.return_value = ['again_spam.com'] + + spam_tasks.check_resource_for_spam_postcommit( + guid=project._id, + content='Check me for spam at again_spam.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + + assert project.spam_status == SpamStatus.SPAM + assert project.spam_data['domains'] == ['again_spam.com'] + + assert project2.spam_status == SpamStatus.SPAM + assert project2.spam_data['domains'] == ['again_spam.com'] + + assert preprint.spam_status == SpamStatus.SPAM + assert preprint.spam_data['domains'] == ['again_spam.com'] + + # when user isn't a direct resource of spam, it's suspected to be spammed + assert user.spam_status == SpamStatus.FLAGGED + assert user.spam_data['domains'] == ['again_spam.com'] + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_spammed_preprint_shares_spam_data_with_creator_and_other_nodes_and_preprints(self, mock_check_domains, user, project, preprint): + user.date_confirmed = timezone.now() + user.save() + + project2 = ProjectFactory(creator=user) + project2.set_privacy('public') + + preprint2 = PreprintFactory(creator=user) + + mock_check_domains.return_value = ['again_spam.com'] + + spam_tasks.check_resource_for_spam_postcommit( + guid=preprint._id, + content='Check me for spam at again_spam.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + preprint2.reload() + + # project is private + assert project.spam_status != SpamStatus.SPAM + assert 'domains' not in project.spam_data + + assert project2.spam_status == SpamStatus.SPAM + assert project2.spam_data['domains'] == ['again_spam.com'] + + assert preprint.spam_status == SpamStatus.SPAM + assert preprint.spam_data['domains'] == ['again_spam.com'] + + assert preprint2.spam_status == SpamStatus.SPAM + assert preprint2.spam_data['domains'] == ['again_spam.com'] + + assert user.spam_status == SpamStatus.FLAGGED + assert user.spam_data['domains'] == ['again_spam.com'] + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_no_spam_found_by_akismet_and_oopspam_for_user( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + spam_object = user + akismet_spam_data = '' + oops_spam_data = {} + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data + ) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_no_spam_found_by_akismet_and_oopspam_for_node( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + spam_object = project + akismet_spam_data = '' + oops_spam_data = {} + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data + ) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_no_spam_found_by_akismet_and_oopspam_for_preprint( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + spam_object = preprint + akismet_spam_data = '' + oops_spam_data = {} + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data + ) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_akismet_spammed_user_shares_spam_data_with_nodes_and_preprints( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + spam_object = user + akismet_spam_data = 'It is a spammy content, spam it!' + oops_spam_data = {} + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data + ) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_oops_spammed_user_shares_spam_data_with_nodes_and_preprints( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + spam_object = user + akismet_spam_data = '' + oops_spam_data = {'reason': 'spam'} + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data + ) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_akismet_and_oops_spammed_user_shares_spam_data_with_nodes_and_preprints( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + spam_object = user + akismet_spam_data = 'some spam found' + oops_spam_data = {'reason': 'spam'} + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data + ) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_akismet_spammed_node_shares_spam_data_with_creator_and_other_nodes_and_preprints( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.date_confirmed = timezone.now() + user.save() + + spam_object = project + akismet_spam_data = 'some spam found' + oops_spam_data = {} + objects_to_be_spammed = [project2, preprint] + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed + ) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_oops_spammed_node_shares_spam_data_with_creator_and_other_nodes_and_preprints( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.date_confirmed = timezone.now() + user.save() + + spam_object = project + akismet_spam_data = '' + oops_spam_data = {'reason': 'some spam info'} + objects_to_be_spammed = [project2, preprint] + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed + ) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_akismet_and_oops_spammed_node_shares_spam_data_with_creator_and_other_nodes_and_preprints( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.date_confirmed = timezone.now() + user.save() + + spam_object = project + akismet_spam_data = 'it is a real spam!!!' + oops_spam_data = {'reason': 'some spam info'} + objects_to_be_spammed = [project2, preprint] + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed + ) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_akismet_spammed_preprint_shares_spam_data_with_creator_and_other_nodes_and_preprints( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.date_confirmed = timezone.now() + user.save() + + spam_object = preprint + akismet_spam_data = 'some spam found' + oops_spam_data = {} + objects_to_be_spammed = [project2, project] + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed + ) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_oops_spammed_preprint_shares_spam_data_with_creator_and_other_nodes_and_preprints( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.date_confirmed = timezone.now() + user.save() + + spam_object = preprint + akismet_spam_data = '' + oops_spam_data = {'reason': 'some spam info'} + objects_to_be_spammed = [project2, project] + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed + ) + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_akismet_and_oops_spammed_preprint_shares_spam_data_with_creator_and_other_nodes_and_preprints( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.date_confirmed = timezone.now() + user.save() + + spam_object = preprint + akismet_spam_data = 'it is a real spam!!!' + oops_spam_data = {'reason': 'some spam info'} + objects_to_be_spammed = [project2, project] + self.run_akismet_and_oops_tests( + mock_check_domains, user, project, project2, project3, preprint, request_headers, + spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed + ) # copied from tests/test_models.py From ac570d79c0ae103340e4f47b31cb10a21804e6ba Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Thu, 20 Nov 2025 19:11:54 +0200 Subject: [PATCH 2/5] fixed case when resource is a user --- osf/external/spam/tasks.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/osf/external/spam/tasks.py b/osf/external/spam/tasks.py index 65a30b10604..b1bf6791eb9 100644 --- a/osf/external/spam/tasks.py +++ b/osf/external/spam/tasks.py @@ -138,16 +138,20 @@ def set_collected_info(resource): content=content, ) - creator = OSFUser.objects.get(username=author_email) - nodes_to_flag = creator.nodes.filter(is_public=True, is_deleted=False) - preprints_to_flag = creator.preprints.filter(is_public=True, deleted__isnull=True) - spam_clients = [] if settings.AKISMET_ENABLED: spam_clients.append(AkismetClient()) if settings.OOPSPAM_ENABLED: spam_clients.append(OOPSpamClient()) + if isinstance(resource, OSFUser): + creator = resource + else: + creator = OSFUser.objects.get(username=author_email) + + nodes_to_flag = creator.nodes.filter(is_public=True, is_deleted=False) + preprints_to_flag = creator.preprints.filter(is_public=True, deleted__isnull=True) + for client in spam_clients: is_spam, details = client.check_content(**kwargs) if not is_spam: @@ -156,7 +160,6 @@ def set_collected_info(resource): any_is_spam = True set_found_spam_info(resource, client, details) - if not isinstance(resource, OSFUser): set_found_spam_info(creator, client, details) From 539895de480fbe680a10d081aca071563a145366 Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Fri, 21 Nov 2025 18:43:40 +0200 Subject: [PATCH 3/5] handle hammy user cases --- osf/external/spam/tasks.py | 17 +- osf_tests/test_node.py | 515 +++++++++++++++++++++++++++++++++++++ 2 files changed, 528 insertions(+), 4 deletions(-) diff --git a/osf/external/spam/tasks.py b/osf/external/spam/tasks.py index b1bf6791eb9..4cb4c7784b5 100644 --- a/osf/external/spam/tasks.py +++ b/osf/external/spam/tasks.py @@ -160,7 +160,7 @@ def set_collected_info(resource): any_is_spam = True set_found_spam_info(resource, client, details) - if not isinstance(resource, OSFUser): + if not isinstance(resource, OSFUser) and not creator.is_hammy: set_found_spam_info(creator, client, details) for node in nodes_to_flag: @@ -172,11 +172,16 @@ def set_collected_info(resource): if not any_is_spam: return any_is_spam + sentry.log_message( + f"Spam data detected by akismet/oops for {resource._id}:" + f"{resource.spam_pro_tip or resource.spam_data.get('oopspam_data')}" + ) + set_collected_info(resource) resource.flag_spam(skip_user_suspension=True) # set spam_data but don't flag the creator because it'll happen at the end of check_resource_for_spam_postcommit - if not isinstance(resource, OSFUser): + if not isinstance(resource, OSFUser) and not creator.is_hammy: set_collected_info(creator) creator.save() @@ -203,6 +208,10 @@ def check_resource_for_spam_postcommit(guid, content, author, author_email, requ if not resource: return f'{guid} not found' + if isinstance(resource, OSFUser) and resource.is_hammy: + sentry.log_message(f"User {guid} is not checked for spam because of ham status") + return + spammy_domains = _check_resource_for_domains(resource, content) if spammy_domains: sentry.log_message(f"Spammy domains detected for {guid}: {spammy_domains}") @@ -226,8 +235,8 @@ def check_resource_for_spam_postcommit(guid, content, author, author_email, requ resource.save() - if hasattr(resource, 'check_spam_user'): - user = OSFUser.objects.get(username=author_email) + user = OSFUser.objects.get(username=author_email) + if hasattr(resource, 'check_spam_user') and not user.is_hammy: resource.check_spam_user(user, domains=list(spammy_domains)) diff --git a/osf_tests/test_node.py b/osf_tests/test_node.py index 9d208cbfe8d..4b193b84d65 100644 --- a/osf_tests/test_node.py +++ b/osf_tests/test_node.py @@ -2576,6 +2576,108 @@ def test_check_resource_for_spam_postcommit_checks_user(self, mock_check_domains ) mock_check_user.assert_called_once_with(user, domains=[]) + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_user_is_not_spammed_because_of_spammy_domains(self, mock_check_domains, user, project, preprint): + user.date_confirmed = timezone.now() + user.save() + + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = ['spam_domain.com'] + + project.set_privacy('public') + + spam_tasks.check_resource_for_spam_postcommit( + guid=user._id, + content='Check me for spam at spam_domain.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.UNKNOWN + assert preprint.spam_data == {} + + assert project.spam_status == SpamStatus.UNKNOWN + assert project.spam_data == {} + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_creator_is_not_spammed_because_of_spammy_domains_in_node(self, mock_check_domains, user, project, preprint): + user.date_confirmed = timezone.now() + user.save() + + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = ['spam_domain.com'] + + project.set_privacy('public') + + spam_tasks.check_resource_for_spam_postcommit( + guid=project._id, + content='Check me for spam at spam_domain.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.UNKNOWN + assert preprint.spam_data == {} + + assert project.spam_status == SpamStatus.SPAM + assert project.spam_data['domains'] == ['spam_domain.com'] + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_creator_is_not_spammed_because_of_spammy_domains_in_preprint(self, mock_check_domains, user, project, preprint): + user.date_confirmed = timezone.now() + user.save() + + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = ['spam_domain.com'] + + project.set_privacy('public') + + spam_tasks.check_resource_for_spam_postcommit( + guid=preprint._id, + content='Check me for spam at spam_domain.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.SPAM + assert preprint.spam_data['domains'] == ['spam_domain.com'] + + assert project.spam_status == SpamStatus.UNKNOWN + assert project.spam_data == {} + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) @mock.patch('osf.external.spam.tasks._check_resource_for_domains') @@ -2687,6 +2789,419 @@ def test_spammed_preprint_shares_spam_data_with_creator_and_other_nodes_and_prep assert user.spam_status == SpamStatus.FLAGGED assert user.spam_data['domains'] == ['again_spam.com'] + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_user_is_not_spammed_when_akismet_detected_spam( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = [] + + project.set_privacy('public') + project2.set_privacy('public') + + with mock.patch('osf.external.spam.tasks.AkismetClient.check_content') as mock_akismet_check_content: + with mock.patch('osf.external.spam.tasks.OOPSpamClient.check_content') as mock_oops_check_content: + mock_akismet_check_content.return_value = (True, 'spam_data') + mock_oops_check_content.return_value = (False, {}) + spam_tasks.check_resource_for_spam_postcommit( + guid=user._id, + content='Check me for spam at again_spam.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.UNKNOWN + assert preprint.spam_pro_tip is None + assert preprint.spam_data == {} + + assert project.spam_status == SpamStatus.UNKNOWN + assert project.spam_pro_tip is None + assert project.spam_data == {} + + assert project2.spam_status == SpamStatus.UNKNOWN + assert project2.spam_pro_tip is None + assert project2.spam_data == {} + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_user_is_not_spammed_when_akismet_detected_spam_in_node( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = [] + + project.set_privacy('public') + project2.set_privacy('public') + + with mock.patch('osf.external.spam.tasks.AkismetClient.check_content') as mock_akismet_check_content: + with mock.patch('osf.external.spam.tasks.OOPSpamClient.check_content') as mock_oops_check_content: + mock_akismet_check_content.return_value = (True, 'spam_data') + mock_oops_check_content.return_value = (False, {}) + spam_tasks.check_resource_for_spam_postcommit( + guid=project._id, + content='Check me for spam at again_spam.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_pro_tip is None + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.FLAGGED + assert preprint.spam_pro_tip == 'spam_data' + + assert project.spam_status == SpamStatus.FLAGGED + assert project.spam_pro_tip == 'spam_data' + + assert project2.spam_status == SpamStatus.FLAGGED + assert project2.spam_pro_tip == 'spam_data' + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_user_is_not_spammed_when_akismet_detected_spam_in_preprint( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = [] + + project.set_privacy('public') + project2.set_privacy('public') + + with mock.patch('osf.external.spam.tasks.AkismetClient.check_content') as mock_akismet_check_content: + with mock.patch('osf.external.spam.tasks.OOPSpamClient.check_content') as mock_oops_check_content: + mock_akismet_check_content.return_value = (True, 'spam_data') + mock_oops_check_content.return_value = (False, {}) + spam_tasks.check_resource_for_spam_postcommit( + guid=preprint._id, + content='Check me for spam at again_spam.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_pro_tip is None + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.FLAGGED + assert preprint.spam_pro_tip == 'spam_data' + + assert project.spam_status == SpamStatus.FLAGGED + assert project.spam_pro_tip == 'spam_data' + + assert project2.spam_status == SpamStatus.FLAGGED + assert project2.spam_pro_tip == 'spam_data' + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_user_is_not_spammed_when_oops_detected_spam( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = [] + + project.set_privacy('public') + project2.set_privacy('public') + + with mock.patch('osf.external.spam.tasks.AkismetClient.check_content') as mock_akismet_check_content: + with mock.patch('osf.external.spam.tasks.OOPSpamClient.check_content') as mock_oops_check_content: + mock_akismet_check_content.return_value = (False, '') + mock_oops_check_content.return_value = (True, {'reason': 'spam'}) + spam_tasks.check_resource_for_spam_postcommit( + guid=user._id, + content='Check me for spam at again_spam.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.UNKNOWN + assert preprint.spam_data == {} + + assert project.spam_status == SpamStatus.UNKNOWN + assert project.spam_data == {} + + assert project2.spam_status == SpamStatus.UNKNOWN + assert project2.spam_data == {} + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_user_is_not_spammed_when_oops_detected_spam_in_node( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = [] + + project.set_privacy('public') + project2.set_privacy('public') + + with mock.patch('osf.external.spam.tasks.AkismetClient.check_content') as mock_akismet_check_content: + with mock.patch('osf.external.spam.tasks.OOPSpamClient.check_content') as mock_oops_check_content: + mock_akismet_check_content.return_value = (False, '') + mock_oops_check_content.return_value = (True, {'reason': 'spam'}) + spam_tasks.check_resource_for_spam_postcommit( + guid=project._id, + content='Check me for spam at again_spam.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.FLAGGED + assert preprint.spam_data['oopspam_data'] == {'reason': 'spam'} + + assert project.spam_status == SpamStatus.FLAGGED + assert project.spam_data['oopspam_data'] == {'reason': 'spam'} + + assert project2.spam_status == SpamStatus.FLAGGED + assert project2.spam_data['oopspam_data'] == {'reason': 'spam'} + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_user_is_not_spammed_when_oops_detected_spam_in_preprint( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = [] + + project.set_privacy('public') + project2.set_privacy('public') + + with mock.patch('osf.external.spam.tasks.AkismetClient.check_content') as mock_akismet_check_content: + with mock.patch('osf.external.spam.tasks.OOPSpamClient.check_content') as mock_oops_check_content: + mock_akismet_check_content.return_value = (False, '') + mock_oops_check_content.return_value = (True, {'reason': 'spam'}) + spam_tasks.check_resource_for_spam_postcommit( + guid=preprint._id, + content='Check me for spam at again_spam.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.FLAGGED + assert preprint.spam_data['oopspam_data'] == {'reason': 'spam'} + + assert project.spam_status == SpamStatus.FLAGGED + assert project.spam_data['oopspam_data'] == {'reason': 'spam'} + + assert project2.spam_status == SpamStatus.FLAGGED + assert project2.spam_data['oopspam_data'] == {'reason': 'spam'} + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_user_is_not_spammed_when_akismet_and_oops_detected_spam( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = [] + + project.set_privacy('public') + project2.set_privacy('public') + + with mock.patch('osf.external.spam.tasks.AkismetClient.check_content') as mock_akismet_check_content: + with mock.patch('osf.external.spam.tasks.OOPSpamClient.check_content') as mock_oops_check_content: + mock_akismet_check_content.return_value = (True, 'akismet spam data') + mock_oops_check_content.return_value = (True, {'reason': 'spam'}) + spam_tasks.check_resource_for_spam_postcommit( + guid=user._id, + content='Check me for spam at again_spam.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_pro_tip is None + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.UNKNOWN + assert preprint.spam_pro_tip is None + assert preprint.spam_data == {} + + assert project.spam_status == SpamStatus.UNKNOWN + assert project.spam_pro_tip is None + assert project.spam_data == {} + + assert project2.spam_status == SpamStatus.UNKNOWN + assert project2.spam_pro_tip is None + assert project2.spam_data == {} + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_user_is_not_spammed_when_akismet_and_oops_detected_spam_in_node( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = [] + + project.set_privacy('public') + project2.set_privacy('public') + + with mock.patch('osf.external.spam.tasks.AkismetClient.check_content') as mock_akismet_check_content: + with mock.patch('osf.external.spam.tasks.OOPSpamClient.check_content') as mock_oops_check_content: + mock_akismet_check_content.return_value = (True, 'akismet spam data') + mock_oops_check_content.return_value = (True, {'reason': 'spam'}) + spam_tasks.check_resource_for_spam_postcommit( + guid=project._id, + content='Check me for spam at again_spam.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_pro_tip is None + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.FLAGGED + assert preprint.spam_pro_tip == 'akismet spam data' + assert preprint.spam_data['oopspam_data'] == {'reason': 'spam'} + + assert project.spam_status == SpamStatus.FLAGGED + assert project.spam_pro_tip == 'akismet spam data' + assert project.spam_data['oopspam_data'] == {'reason': 'spam'} + + assert project2.spam_status == SpamStatus.FLAGGED + assert project2.spam_pro_tip == 'akismet spam data' + assert project2.spam_data['oopspam_data'] == {'reason': 'spam'} + + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) + @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) + @mock.patch.object(settings, 'AKISMET_ENABLED', True) + @mock.patch.object(settings, 'OOPSPAM_ENABLED', True) + @mock.patch('osf.external.spam.tasks._check_resource_for_domains') + def test_ham_user_is_not_spammed_when_akismet_and_oops_detected_spam_in_preprint( + self, mock_check_domains, user, project, project2, project3, preprint, request_headers + ): + user.spam_status = SpamStatus.HAM + user.save() + + mock_check_domains.return_value = [] + + project.set_privacy('public') + project2.set_privacy('public') + + with mock.patch('osf.external.spam.tasks.AkismetClient.check_content') as mock_akismet_check_content: + with mock.patch('osf.external.spam.tasks.OOPSpamClient.check_content') as mock_oops_check_content: + mock_akismet_check_content.return_value = (True, 'akismet spam data') + mock_oops_check_content.return_value = (True, {'reason': 'spam'}) + spam_tasks.check_resource_for_spam_postcommit( + guid=preprint._id, + content='Check me for spam at again_spam.com', + author=user.fullname, + author_email=user.username, + request_headers={} + ) + user.reload() + project.reload() + project2.reload() + preprint.reload() + + assert user.spam_status == SpamStatus.HAM + assert user.spam_pro_tip is None + assert user.spam_data == {} + + assert preprint.spam_status == SpamStatus.FLAGGED + assert preprint.spam_pro_tip == 'akismet spam data' + assert preprint.spam_data['oopspam_data'] == {'reason': 'spam'} + + assert project.spam_status == SpamStatus.FLAGGED + assert project.spam_pro_tip == 'akismet spam data' + assert project.spam_data['oopspam_data'] == {'reason': 'spam'} + + assert project2.spam_status == SpamStatus.FLAGGED + assert project2.spam_pro_tip == 'akismet spam data' + assert project2.spam_data['oopspam_data'] == {'reason': 'spam'} + @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) @mock.patch.object(settings, 'SPAM_SERVICES_ENABLED', True) @mock.patch.object(settings, 'AKISMET_ENABLED', True) From 914319e40a6cd9df473e6a732786af8bc1fb1d10 Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Mon, 24 Nov 2025 15:09:52 +0200 Subject: [PATCH 4/5] fixed old tests --- osf_tests/external/akismet/test_akismet.py | 4 ++-- osf_tests/external/oopspam/test_oopspam.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/osf_tests/external/akismet/test_akismet.py b/osf_tests/external/akismet/test_akismet.py index 46729e485e8..5b063a42e33 100644 --- a/osf_tests/external/akismet/test_akismet.py +++ b/osf_tests/external/akismet/test_akismet.py @@ -140,7 +140,7 @@ def test_do_spam_check_true(self, mock_akismet, user, request_headers): user.do_check_spam( author='test-author', - author_email='test@test.com', + author_email=user.username, content='test', request_headers=request_headers ) @@ -148,7 +148,7 @@ def test_do_spam_check_true(self, mock_akismet, user, request_headers): data = parse_qs(mock_akismet.calls[0].request.body) assert data['comment_author'] == ['test-author'] - assert data['comment_author_email'] == ['test@test.com'] + assert data['comment_author_email'] == [user.username] assert data['blog'] == [settings.DOMAIN] user.refresh_from_db() diff --git a/osf_tests/external/oopspam/test_oopspam.py b/osf_tests/external/oopspam/test_oopspam.py index 96656ecc6da..4aaa89bc3d9 100644 --- a/osf_tests/external/oopspam/test_oopspam.py +++ b/osf_tests/external/oopspam/test_oopspam.py @@ -98,7 +98,7 @@ def test_do_spam_check_true(self, mock_oopspam, user, request_headers): user.do_check_spam( author='test-author', - author_email='test@test.com', + author_email=user.username, content='test', request_headers=request_headers ) @@ -119,7 +119,7 @@ def test_do_spam_check_false(self, mock_oopspam, user, request_headers): user.do_check_spam( author='test-author', - author_email='test@test.com', + author_email=user.username, content='test', request_headers=request_headers ) From 6d8435b9d0d6799f0e4853fe88286b96721cf13f Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Wed, 26 Nov 2025 14:01:01 +0200 Subject: [PATCH 5/5] creator is spammed instead of flagged when a node contains spam --- osf/models/mixins.py | 5 +---- osf/models/user.py | 5 ++++- osf_tests/test_node.py | 17 ++++++++--------- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/osf/models/mixins.py b/osf/models/mixins.py index 2672aee3b8f..14c55e55c95 100644 --- a/osf/models/mixins.py +++ b/osf/models/mixins.py @@ -2246,10 +2246,7 @@ def suspend_spam_user(self, user, domains=None): can_change_preferences=False, ) - user.flag_spam() - if domains: - user.spam_data['domains'] = list(set(user.spam_data.get('domains', []) + domains)) - + user.confirm_spam(domains=domains or [], save=False, skip_resources_spam=True) user.save() # Make public nodes private from this contributor diff --git a/osf/models/user.py b/osf/models/user.py index 796f765b9df..fd6025998d4 100644 --- a/osf/models/user.py +++ b/osf/models/user.py @@ -1431,10 +1431,13 @@ def confirm_email(self, token, merge=False): return True - def confirm_spam(self, domains=None, save=True, train_spam_services=False): + def confirm_spam(self, domains=None, save=True, train_spam_services=False, skip_resources_spam=False): self.deactivate_account() super().confirm_spam(domains=domains, save=save, train_spam_services=train_spam_services) + if skip_resources_spam: + return + # Don't train on resources merely associated with spam user for node in self.nodes.filter(is_public=True, is_deleted=False): node.confirm_spam(domains=domains, train_spam_services=train_spam_services) diff --git a/osf_tests/test_node.py b/osf_tests/test_node.py index 4b193b84d65..ac81bf01695 100644 --- a/osf_tests/test_node.py +++ b/osf_tests/test_node.py @@ -2742,8 +2742,7 @@ def test_spammed_project_shares_spam_data_with_creator_and_other_nodes_and_prepr assert preprint.spam_status == SpamStatus.SPAM assert preprint.spam_data['domains'] == ['again_spam.com'] - # when user isn't a direct resource of spam, it's suspected to be spammed - assert user.spam_status == SpamStatus.FLAGGED + assert user.spam_status == SpamStatus.SPAM assert user.spam_data['domains'] == ['again_spam.com'] @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) @@ -2786,7 +2785,7 @@ def test_spammed_preprint_shares_spam_data_with_creator_and_other_nodes_and_prep assert preprint2.spam_status == SpamStatus.SPAM assert preprint2.spam_data['domains'] == ['again_spam.com'] - assert user.spam_status == SpamStatus.FLAGGED + assert user.spam_status == SpamStatus.SPAM assert user.spam_data['domains'] == ['again_spam.com'] @mock.patch.object(settings, 'SPAM_ACCOUNT_SUSPENSION_ENABLED', True) @@ -3312,7 +3311,7 @@ def test_akismet_spammed_node_shares_spam_data_with_creator_and_other_nodes_and_ spam_object = project akismet_spam_data = 'some spam found' oops_spam_data = {} - objects_to_be_spammed = [project2, preprint] + objects_to_be_spammed = [user, project2, preprint] self.run_akismet_and_oops_tests( mock_check_domains, user, project, project2, project3, preprint, request_headers, spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed @@ -3332,7 +3331,7 @@ def test_oops_spammed_node_shares_spam_data_with_creator_and_other_nodes_and_pre spam_object = project akismet_spam_data = '' oops_spam_data = {'reason': 'some spam info'} - objects_to_be_spammed = [project2, preprint] + objects_to_be_spammed = [user, project2, preprint] self.run_akismet_and_oops_tests( mock_check_domains, user, project, project2, project3, preprint, request_headers, spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed @@ -3352,7 +3351,7 @@ def test_akismet_and_oops_spammed_node_shares_spam_data_with_creator_and_other_n spam_object = project akismet_spam_data = 'it is a real spam!!!' oops_spam_data = {'reason': 'some spam info'} - objects_to_be_spammed = [project2, preprint] + objects_to_be_spammed = [user, project2, preprint] self.run_akismet_and_oops_tests( mock_check_domains, user, project, project2, project3, preprint, request_headers, spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed @@ -3372,7 +3371,7 @@ def test_akismet_spammed_preprint_shares_spam_data_with_creator_and_other_nodes_ spam_object = preprint akismet_spam_data = 'some spam found' oops_spam_data = {} - objects_to_be_spammed = [project2, project] + objects_to_be_spammed = [user, project2, project] self.run_akismet_and_oops_tests( mock_check_domains, user, project, project2, project3, preprint, request_headers, spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed @@ -3392,7 +3391,7 @@ def test_oops_spammed_preprint_shares_spam_data_with_creator_and_other_nodes_and spam_object = preprint akismet_spam_data = '' oops_spam_data = {'reason': 'some spam info'} - objects_to_be_spammed = [project2, project] + objects_to_be_spammed = [user, project2, project] self.run_akismet_and_oops_tests( mock_check_domains, user, project, project2, project3, preprint, request_headers, spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed @@ -3412,7 +3411,7 @@ def test_akismet_and_oops_spammed_preprint_shares_spam_data_with_creator_and_oth spam_object = preprint akismet_spam_data = 'it is a real spam!!!' oops_spam_data = {'reason': 'some spam info'} - objects_to_be_spammed = [project2, project] + objects_to_be_spammed = [user, project2, project] self.run_akismet_and_oops_tests( mock_check_domains, user, project, project2, project3, preprint, request_headers, spam_object, akismet_spam_data, oops_spam_data, objects_to_be_spammed