CenterForOpenScience · ihorsokhanexoft · Nov 19, 2025 · Nov 20, 2025 · Nov 21, 2025 · Nov 24, 2025
diff --git a/osf/external/spam/tasks.py b/osf/external/spam/tasks.py
@@ -103,6 +103,29 @@ def check_resource_with_spam_services(resource, content, author, author_email, r
     """
     Return statements used only for debugging and recording keeping
     """
+    from osf.models import OSFUser, AbstractNode, Preprint
+
+    def set_found_spam_info(resource, client, details):
+        if not resource.spam_data.get('who_flagged'):
+            resource.spam_data['who_flagged'] = client.NAME
+        elif resource.spam_data['who_flagged'] != client.NAME:
+            resource.spam_data['who_flagged'] = 'both'
+
+        if client.NAME == 'akismet':
+            resource.spam_pro_tip = details
+        if client.NAME == 'oopspam':
+            resource.spam_data['oopspam_data'] = details
+
+    def set_collected_info(resource):
+        resource.spam_data['headers'] = {
+            'Remote-Addr': request_kwargs.get('remote_addr'),
+            'User-Agent': request_kwargs.get('user_agent'),
+            'Referer': request_kwargs.get('referer'),
+        }
+        resource.spam_data['content'] = content
+        resource.spam_data['author'] = author
+        resource.spam_data['author_email'] = author_email
+
     any_is_spam = False
 
     kwargs = dict(
@@ -121,30 +144,57 @@ def check_resource_with_spam_services(resource, content, author, author_email, r
     if settings.OOPSPAM_ENABLED:
         spam_clients.append(OOPSpamClient())
 
+    if isinstance(resource, OSFUser):
+        creator = resource
+    else:
+        creator = OSFUser.objects.get(username=author_email)
+
+    nodes_to_flag = creator.nodes.filter(is_public=True, is_deleted=False)
+    preprints_to_flag = creator.preprints.filter(is_public=True, deleted__isnull=True)
+
     for client in spam_clients:
         is_spam, details = client.check_content(**kwargs)
-        if is_spam:
-            any_is_spam = True
-            if not resource.spam_data.get('who_flagged'):
-                resource.spam_data['who_flagged'] = client.NAME
-            elif resource.spam_data['who_flagged'] != client.NAME:
-                resource.spam_data['who_flagged'] = 'both'
-
-            if client.NAME == 'akismet':
-                resource.spam_pro_tip = details
-            if client.NAME == 'oopspam':
-                resource.spam_data['oopspam_data'] = details
-
-    if any_is_spam:
-        resource.spam_data['headers'] = {
-            'Remote-Addr': request_kwargs.get('remote_addr'),
-            'User-Agent': request_kwargs.get('user_agent'),
-            'Referer': request_kwargs.get('referer'),
-        }
-        resource.spam_data['content'] = content
-        resource.spam_data['author'] = author
-        resource.spam_data['author_email'] = author_email
-        resource.flag_spam()
+        if not is_spam:
+            continue
+
+        any_is_spam = True
+
+        set_found_spam_info(resource, client, details)
+        if not isinstance(resource, OSFUser) and not creator.is_hammy:
+            set_found_spam_info(creator, client, details)
+
+        for node in nodes_to_flag:
+            set_found_spam_info(node, client, details)
+
+        for preprint in preprints_to_flag:
+            set_found_spam_info(preprint, client, details)
+
+    if not any_is_spam:
+        return any_is_spam
+
+    sentry.log_message(
+        f"Spam data detected by akismet/oops for {resource._id}:"
+        f"{resource.spam_pro_tip or resource.spam_data.get('oopspam_data')}"
+    )
+
+    set_collected_info(resource)
+    resource.flag_spam(skip_user_suspension=True)
+
+    # set spam_data but don't flag the creator because it'll happen at the end of check_resource_for_spam_postcommit
+    if not isinstance(resource, OSFUser) and not creator.is_hammy:
+        set_collected_info(creator)
+        creator.save()
+
+    for node in nodes_to_flag:
+        set_collected_info(node)
+        node.flag_spam(skip_user_suspension=True)
+
+    for preprint in preprints_to_flag:
+        set_collected_info(preprint)
+        preprint.flag_spam(skip_user_suspension=True)
+
+    AbstractNode.objects.bulk_update(nodes_to_flag, ['spam_status', 'spam_data', 'spam_pro_tip'], batch_size=100)
+    Preprint.objects.bulk_update(preprints_to_flag, ['spam_status', 'spam_data', 'spam_pro_tip'], batch_size=100)
 
     return any_is_spam
 
@@ -158,6 +208,10 @@ def check_resource_for_spam_postcommit(guid, content, author, author_email, requ
     if not resource:
         return f'{guid} not found'
 
+    if isinstance(resource, OSFUser) and resource.is_hammy:
+        sentry.log_message(f"User {guid} is not checked for spam because of ham status")
+        return
+
     spammy_domains = _check_resource_for_domains(resource, content)
     if spammy_domains:
         sentry.log_message(f"Spammy domains detected for {guid}: {spammy_domains}")
@@ -181,9 +235,9 @@ def check_resource_for_spam_postcommit(guid, content, author, author_email, requ
 
     resource.save()
 
-    if hasattr(resource, 'check_spam_user'):
-        user = OSFUser.objects.get(username=author_email)
-        resource.check_spam_user(user)
+    user = OSFUser.objects.get(username=author_email)
+    if hasattr(resource, 'check_spam_user') and not user.is_hammy:
+        resource.check_spam_user(user, domains=list(spammy_domains))
 
 
 @celery_app.task(ignore_results=False, max_retries=5, default_retry_delay=60)

diff --git a/osf/models/mixins.py b/osf/models/mixins.py
@@ -2213,28 +2213,29 @@ def check_spam(self, user, saved_fields, request_headers):
             request_headers,
         )
 
-    def check_spam_user(self, user):
+    def check_spam_user(self, user, domains=None):
         if (
-                settings.SPAM_ACCOUNT_SUSPENSION_ENABLED
-                and (timezone.now() - user.date_confirmed) <= settings.SPAM_ACCOUNT_SUSPENSION_THRESHOLD
+            settings.SPAM_ACCOUNT_SUSPENSION_ENABLED
+            and (timezone.now() - user.date_confirmed) <= settings.SPAM_ACCOUNT_SUSPENSION_THRESHOLD
         ) or (
-                settings.SPAM_AUTOBAN_IP_BLOCK and self.spam_data.get('oopspam_data', None)
-                and self.spam_data['oopspam_data']['Details']['isIPBlocked']
+            settings.SPAM_AUTOBAN_IP_BLOCK and self.spam_data.get('oopspam_data', None)
+            and self.spam_data['oopspam_data']['Details']['isIPBlocked']
         ):
-            self.suspend_spam_user(user)
+            self.suspend_spam_user(user, domains=domains)
 
-    def suspend_spam_user(self, user):
+    def suspend_spam_user(self, user, domains=None):
         """
         This suspends a users account and makes all there resources private, key word here is SUSPENDS this should not
         delete the account or any info associated with it. It should not be assumed the account is spam and it should
         not be used to train spam detecting services.
         """
+        domains = domains or []
         if user.is_hammy:
             return False
-        self.confirm_spam(save=True, train_spam_services=False)
+
+        self.flag_spam(skip_user_suspension=True)
 
         # Suspend the flagged user for spam.
-        user.flag_spam()
         if not user.is_disabled:
             user.deactivate_account()
             mails.send_mail(
@@ -2244,19 +2245,21 @@ def suspend_spam_user(self, user):
                 osf_support_email=settings.OSF_SUPPORT_EMAIL,
                 can_change_preferences=False,
             )
+
+        user.confirm_spam(domains=domains or [], save=False, skip_resources_spam=True)
         user.save()
 
         # Make public nodes private from this contributor
         for node in user.all_nodes:
             if self._id != node._id and len(node.contributors) == 1 and node.is_public:
-                node.confirm_spam(save=True, train_spam_services=False)
+                node.confirm_spam(save=True, domains=domains, train_spam_services=False)
 
         # Make preprints private from this contributor
         for preprint in user.preprints.all():
             if self._id != preprint._id and len(preprint.contributors) == 1 and preprint.is_public:
-                preprint.confirm_spam(save=True, train_spam_services=False)
+                preprint.confirm_spam(save=True, domains=domains, train_spam_services=False)
 
-    def flag_spam(self):
+    def flag_spam(self, skip_user_suspension=False):
         """ Overrides SpamMixin#flag_spam.
         """
         super().flag_spam()
@@ -2272,7 +2275,7 @@ def flag_spam(self):
             )
             log.save()
 
-        if settings.SPAM_THROTTLE_AUTOBAN:
+        if settings.SPAM_THROTTLE_AUTOBAN and not skip_user_suspension:
             creator = self.creator
             yesterday = timezone.now() - timezone.timedelta(days=1)
             node_spam_count = creator.all_nodes.filter(spam_status__in=[SpamStatus.FLAGGED, SpamStatus.SPAM],

diff --git a/osf/models/spam.py b/osf/models/spam.py
@@ -68,7 +68,7 @@ class Meta:
         default=dict, blank=True, validators=[_validate_reports]
     )
 
-    def flag_spam(self):
+    def flag_spam(self, **kwargs):
         # If ham and unedited then tell user that they should read it again
         if self.spam_status == SpamStatus.UNKNOWN:
             self.spam_status = SpamStatus.FLAGGED

diff --git a/osf/models/user.py b/osf/models/user.py
@@ -1431,15 +1431,18 @@ def confirm_email(self, token, merge=False):
 
         return True
 
-    def confirm_spam(self, domains=None, save=True, train_spam_services=False):
+    def confirm_spam(self, domains=None, save=True, train_spam_services=False, skip_resources_spam=False):
         self.deactivate_account()
         super().confirm_spam(domains=domains, save=save, train_spam_services=train_spam_services)
 
+        if skip_resources_spam:
+            return
+
         # Don't train on resources merely associated with spam user
         for node in self.nodes.filter(is_public=True, is_deleted=False):
-            node.confirm_spam(train_spam_services=train_spam_services)
+            node.confirm_spam(domains=domains, train_spam_services=train_spam_services)
         for preprint in self.preprints.filter(is_public=True, deleted__isnull=True):
-            preprint.confirm_spam(train_spam_services=train_spam_services)
+            preprint.confirm_spam(domains=domains, train_spam_services=train_spam_services)
 
     def confirm_ham(self, save=False, train_spam_services=False):
         self.reactivate_account()

diff --git a/osf_tests/external/akismet/test_akismet.py b/osf_tests/external/akismet/test_akismet.py
@@ -140,15 +140,15 @@ def test_do_spam_check_true(self, mock_akismet, user, request_headers):
 
         user.do_check_spam(
             author='test-author',
-            author_email='[email protected]',
+            author_email=user.username,
             content='test',
             request_headers=request_headers
         )
 
         data = parse_qs(mock_akismet.calls[0].request.body)
 
         assert data['comment_author'] == ['test-author']
-        assert data['comment_author_email'] == ['[email protected]']
+        assert data['comment_author_email'] == [user.username]
         assert data['blog'] == [settings.DOMAIN]
 
         user.refresh_from_db()

diff --git a/osf_tests/external/oopspam/test_oopspam.py b/osf_tests/external/oopspam/test_oopspam.py
@@ -98,7 +98,7 @@ def test_do_spam_check_true(self, mock_oopspam, user, request_headers):
 
         user.do_check_spam(
             author='test-author',
-            author_email='[email protected]',
+            author_email=user.username,
             content='test',
             request_headers=request_headers
         )
@@ -119,7 +119,7 @@ def test_do_spam_check_false(self, mock_oopspam, user, request_headers):
 
         user.do_check_spam(
             author='test-author',
-            author_email='[email protected]',
+            author_email=user.username,
             content='test',
             request_headers=request_headers
         )