Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 79 additions & 25 deletions osf/external/spam/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,29 @@ def check_resource_with_spam_services(resource, content, author, author_email, r
"""
Return statements used only for debugging and recording keeping
"""
from osf.models import OSFUser, AbstractNode, Preprint

def set_found_spam_info(resource, client, details):
if not resource.spam_data.get('who_flagged'):
resource.spam_data['who_flagged'] = client.NAME
elif resource.spam_data['who_flagged'] != client.NAME:
resource.spam_data['who_flagged'] = 'both'

if client.NAME == 'akismet':
resource.spam_pro_tip = details
if client.NAME == 'oopspam':
resource.spam_data['oopspam_data'] = details

def set_collected_info(resource):
resource.spam_data['headers'] = {
'Remote-Addr': request_kwargs.get('remote_addr'),
'User-Agent': request_kwargs.get('user_agent'),
'Referer': request_kwargs.get('referer'),
}
resource.spam_data['content'] = content
resource.spam_data['author'] = author
resource.spam_data['author_email'] = author_email

any_is_spam = False

kwargs = dict(
Expand All @@ -121,30 +144,57 @@ def check_resource_with_spam_services(resource, content, author, author_email, r
if settings.OOPSPAM_ENABLED:
spam_clients.append(OOPSpamClient())

if isinstance(resource, OSFUser):
creator = resource
else:
creator = OSFUser.objects.get(username=author_email)

nodes_to_flag = creator.nodes.filter(is_public=True, is_deleted=False)
preprints_to_flag = creator.preprints.filter(is_public=True, deleted__isnull=True)

for client in spam_clients:
is_spam, details = client.check_content(**kwargs)
if is_spam:
any_is_spam = True
if not resource.spam_data.get('who_flagged'):
resource.spam_data['who_flagged'] = client.NAME
elif resource.spam_data['who_flagged'] != client.NAME:
resource.spam_data['who_flagged'] = 'both'

if client.NAME == 'akismet':
resource.spam_pro_tip = details
if client.NAME == 'oopspam':
resource.spam_data['oopspam_data'] = details

if any_is_spam:
resource.spam_data['headers'] = {
'Remote-Addr': request_kwargs.get('remote_addr'),
'User-Agent': request_kwargs.get('user_agent'),
'Referer': request_kwargs.get('referer'),
}
resource.spam_data['content'] = content
resource.spam_data['author'] = author
resource.spam_data['author_email'] = author_email
resource.flag_spam()
if not is_spam:
continue

any_is_spam = True

set_found_spam_info(resource, client, details)
if not isinstance(resource, OSFUser) and not creator.is_hammy:
set_found_spam_info(creator, client, details)

for node in nodes_to_flag:
set_found_spam_info(node, client, details)

for preprint in preprints_to_flag:
set_found_spam_info(preprint, client, details)

if not any_is_spam:
return any_is_spam

sentry.log_message(
f"Spam data detected by akismet/oops for {resource._id}:"
f"{resource.spam_pro_tip or resource.spam_data.get('oopspam_data')}"
)

set_collected_info(resource)
resource.flag_spam(skip_user_suspension=True)

# set spam_data but don't flag the creator because it'll happen at the end of check_resource_for_spam_postcommit
if not isinstance(resource, OSFUser) and not creator.is_hammy:
set_collected_info(creator)
creator.save()

for node in nodes_to_flag:
set_collected_info(node)
node.flag_spam(skip_user_suspension=True)

for preprint in preprints_to_flag:
set_collected_info(preprint)
preprint.flag_spam(skip_user_suspension=True)

AbstractNode.objects.bulk_update(nodes_to_flag, ['spam_status', 'spam_data', 'spam_pro_tip'], batch_size=100)
Preprint.objects.bulk_update(preprints_to_flag, ['spam_status', 'spam_data', 'spam_pro_tip'], batch_size=100)

return any_is_spam

Expand All @@ -158,6 +208,10 @@ def check_resource_for_spam_postcommit(guid, content, author, author_email, requ
if not resource:
return f'{guid} not found'

if isinstance(resource, OSFUser) and resource.is_hammy:
sentry.log_message(f"User {guid} is not checked for spam because of ham status")
return

spammy_domains = _check_resource_for_domains(resource, content)
if spammy_domains:
sentry.log_message(f"Spammy domains detected for {guid}: {spammy_domains}")
Expand All @@ -181,9 +235,9 @@ def check_resource_for_spam_postcommit(guid, content, author, author_email, requ

resource.save()

if hasattr(resource, 'check_spam_user'):
user = OSFUser.objects.get(username=author_email)
resource.check_spam_user(user)
user = OSFUser.objects.get(username=author_email)
if hasattr(resource, 'check_spam_user') and not user.is_hammy:
resource.check_spam_user(user, domains=list(spammy_domains))


@celery_app.task(ignore_results=False, max_retries=5, default_retry_delay=60)
Expand Down
29 changes: 16 additions & 13 deletions osf/models/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -2213,28 +2213,29 @@ def check_spam(self, user, saved_fields, request_headers):
request_headers,
)

def check_spam_user(self, user):
def check_spam_user(self, user, domains=None):
if (
settings.SPAM_ACCOUNT_SUSPENSION_ENABLED
and (timezone.now() - user.date_confirmed) <= settings.SPAM_ACCOUNT_SUSPENSION_THRESHOLD
settings.SPAM_ACCOUNT_SUSPENSION_ENABLED
and (timezone.now() - user.date_confirmed) <= settings.SPAM_ACCOUNT_SUSPENSION_THRESHOLD
) or (
settings.SPAM_AUTOBAN_IP_BLOCK and self.spam_data.get('oopspam_data', None)
and self.spam_data['oopspam_data']['Details']['isIPBlocked']
settings.SPAM_AUTOBAN_IP_BLOCK and self.spam_data.get('oopspam_data', None)
and self.spam_data['oopspam_data']['Details']['isIPBlocked']
):
self.suspend_spam_user(user)
self.suspend_spam_user(user, domains=domains)

def suspend_spam_user(self, user):
def suspend_spam_user(self, user, domains=None):
"""
This suspends a users account and makes all there resources private, key word here is SUSPENDS this should not
delete the account or any info associated with it. It should not be assumed the account is spam and it should
not be used to train spam detecting services.
"""
domains = domains or []
if user.is_hammy:
return False
self.confirm_spam(save=True, train_spam_services=False)

self.flag_spam(skip_user_suspension=True)

# Suspend the flagged user for spam.
user.flag_spam()
if not user.is_disabled:
user.deactivate_account()
mails.send_mail(
Expand All @@ -2244,19 +2245,21 @@ def suspend_spam_user(self, user):
osf_support_email=settings.OSF_SUPPORT_EMAIL,
can_change_preferences=False,
)

user.confirm_spam(domains=domains or [], save=False, skip_resources_spam=True)
user.save()

# Make public nodes private from this contributor
for node in user.all_nodes:
if self._id != node._id and len(node.contributors) == 1 and node.is_public:
node.confirm_spam(save=True, train_spam_services=False)
node.confirm_spam(save=True, domains=domains, train_spam_services=False)

# Make preprints private from this contributor
for preprint in user.preprints.all():
if self._id != preprint._id and len(preprint.contributors) == 1 and preprint.is_public:
preprint.confirm_spam(save=True, train_spam_services=False)
preprint.confirm_spam(save=True, domains=domains, train_spam_services=False)

def flag_spam(self):
def flag_spam(self, skip_user_suspension=False):
""" Overrides SpamMixin#flag_spam.
"""
super().flag_spam()
Expand All @@ -2272,7 +2275,7 @@ def flag_spam(self):
)
log.save()

if settings.SPAM_THROTTLE_AUTOBAN:
if settings.SPAM_THROTTLE_AUTOBAN and not skip_user_suspension:
creator = self.creator
yesterday = timezone.now() - timezone.timedelta(days=1)
node_spam_count = creator.all_nodes.filter(spam_status__in=[SpamStatus.FLAGGED, SpamStatus.SPAM],
Expand Down
2 changes: 1 addition & 1 deletion osf/models/spam.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class Meta:
default=dict, blank=True, validators=[_validate_reports]
)

def flag_spam(self):
def flag_spam(self, **kwargs):
# If ham and unedited then tell user that they should read it again
if self.spam_status == SpamStatus.UNKNOWN:
self.spam_status = SpamStatus.FLAGGED
Expand Down
9 changes: 6 additions & 3 deletions osf/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -1431,15 +1431,18 @@ def confirm_email(self, token, merge=False):

return True

def confirm_spam(self, domains=None, save=True, train_spam_services=False):
def confirm_spam(self, domains=None, save=True, train_spam_services=False, skip_resources_spam=False):
self.deactivate_account()
super().confirm_spam(domains=domains, save=save, train_spam_services=train_spam_services)

if skip_resources_spam:
return

# Don't train on resources merely associated with spam user
for node in self.nodes.filter(is_public=True, is_deleted=False):
node.confirm_spam(train_spam_services=train_spam_services)
node.confirm_spam(domains=domains, train_spam_services=train_spam_services)
for preprint in self.preprints.filter(is_public=True, deleted__isnull=True):
preprint.confirm_spam(train_spam_services=train_spam_services)
preprint.confirm_spam(domains=domains, train_spam_services=train_spam_services)

def confirm_ham(self, save=False, train_spam_services=False):
self.reactivate_account()
Expand Down
4 changes: 2 additions & 2 deletions osf_tests/external/akismet/test_akismet.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,15 +140,15 @@ def test_do_spam_check_true(self, mock_akismet, user, request_headers):

user.do_check_spam(
author='test-author',
author_email='[email protected]',
author_email=user.username,
content='test',
request_headers=request_headers
)

data = parse_qs(mock_akismet.calls[0].request.body)

assert data['comment_author'] == ['test-author']
assert data['comment_author_email'] == ['[email protected]']
assert data['comment_author_email'] == [user.username]
assert data['blog'] == [settings.DOMAIN]

user.refresh_from_db()
Expand Down
4 changes: 2 additions & 2 deletions osf_tests/external/oopspam/test_oopspam.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_do_spam_check_true(self, mock_oopspam, user, request_headers):

user.do_check_spam(
author='test-author',
author_email='[email protected]',
author_email=user.username,
content='test',
request_headers=request_headers
)
Expand All @@ -119,7 +119,7 @@ def test_do_spam_check_false(self, mock_oopspam, user, request_headers):

user.do_check_spam(
author='test-author',
author_email='[email protected]',
author_email=user.username,
content='test',
request_headers=request_headers
)
Expand Down
Loading
Loading