From dc0b1a8a92d8ab20eaa3a74064db3699959d64a6 Mon Sep 17 00:00:00 2001
From: Pachakutiq <101460915+PACHAKUTlQ@users.noreply.github.com>
Date: Sun, 1 Mar 2026 14:54:50 +0800
Subject: [PATCH 1/4] fix: Remove legacy analytics and recommendations
---
apps/analytics/__init__.py | 0
apps/analytics/forms.py | 61 -----
apps/analytics/tasks.py | 77 -------
apps/analytics/templates/analytics_email.txt | 20 --
apps/analytics/templates/dashboard.html | 91 --------
.../eligible_for_recommendations.html | 29 ---
.../templates/sentiment_labeler.html | 23 --
apps/analytics/views.py | 216 ------------------
apps/recommendations/__init__.py | 0
apps/recommendations/admin.py | 5 -
.../migrations/0001_initial.py | 54 -----
apps/recommendations/migrations/__init__.py | 0
apps/recommendations/models.py | 83 -------
apps/recommendations/tasks.py | 161 -------------
.../templates/recommendations.html | 49 ----
apps/recommendations/views.py | 21 --
16 files changed, 890 deletions(-)
delete mode 100644 apps/analytics/__init__.py
delete mode 100644 apps/analytics/forms.py
delete mode 100644 apps/analytics/tasks.py
delete mode 100644 apps/analytics/templates/analytics_email.txt
delete mode 100644 apps/analytics/templates/dashboard.html
delete mode 100644 apps/analytics/templates/eligible_for_recommendations.html
delete mode 100644 apps/analytics/templates/sentiment_labeler.html
delete mode 100644 apps/analytics/views.py
delete mode 100644 apps/recommendations/__init__.py
delete mode 100644 apps/recommendations/admin.py
delete mode 100644 apps/recommendations/migrations/0001_initial.py
delete mode 100644 apps/recommendations/migrations/__init__.py
delete mode 100644 apps/recommendations/models.py
delete mode 100644 apps/recommendations/tasks.py
delete mode 100644 apps/recommendations/templates/recommendations.html
delete mode 100644 apps/recommendations/views.py
diff --git a/apps/analytics/__init__.py b/apps/analytics/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/apps/analytics/forms.py b/apps/analytics/forms.py
deleted file mode 100644
index df99442..0000000
--- a/apps/analytics/forms.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from django import forms
-
-from apps.web.models import Review
-
-
-class ManualSentimentForm(forms.Form):
- DIFFICULTY_SENTIMENT_HARD = "-1"
- DIFFICULTY_SENTIMENT_SOMEWHAT_HARD = "-0.5"
- DIFFICULTY_SENTIMENT_NEUTRAL = "0"
- DIFFICULTY_SENTIMENT_SOMEWHAT_EASY = "0.5"
- DIFFICULTY_SENTIMENT_EASY = "1"
- DIFFICULTY_SENTIMENT_CHOICES = [
- (DIFFICULTY_SENTIMENT_HARD, "Hard"),
- (DIFFICULTY_SENTIMENT_SOMEWHAT_HARD, "Somewhat Hard"),
- (DIFFICULTY_SENTIMENT_NEUTRAL, "Neutral"),
- (DIFFICULTY_SENTIMENT_SOMEWHAT_EASY, "Somewhat Easy"),
- (DIFFICULTY_SENTIMENT_EASY, "Easy"),
- ]
-
- QUALITY_SENTIMENT_BAD = "-1"
- QUALITY_SENTIMENT_SOMEWHAT_BAD = "-0.5"
- QUALITY_SENTIMENT_NEUTRAL = "0"
- QUALITY_SENTIMENT_SOMEWHAT_QUALITY = "0.5"
- QUALITY_SENTIMENT_QUALITY = "1"
- QUALITY_SENTIMENT_CHOICES = [
- (QUALITY_SENTIMENT_BAD, "Bad"),
- (QUALITY_SENTIMENT_SOMEWHAT_BAD, "Somewhat Bad"),
- (QUALITY_SENTIMENT_NEUTRAL, "Neutral"),
- (QUALITY_SENTIMENT_SOMEWHAT_QUALITY, "Somewhat Good"),
- (QUALITY_SENTIMENT_QUALITY, "Good"),
- ]
-
- review_id = forms.IntegerField(
- required=True,
- widget=forms.HiddenInput(),
- )
- difficulty_sentiment = forms.ChoiceField(
- choices=DIFFICULTY_SENTIMENT_CHOICES,
- initial=DIFFICULTY_SENTIMENT_NEUTRAL,
- required=True,
- widget=forms.RadioSelect(),
- )
- quality_sentiment = forms.ChoiceField(
- choices=QUALITY_SENTIMENT_CHOICES,
- initial=QUALITY_SENTIMENT_NEUTRAL,
- required=True,
- widget=forms.RadioSelect(),
- )
-
- def clean_difficulty_sentiment(self):
- return float(self.cleaned_data["difficulty_sentiment"])
-
- def clean_quality_sentiment(self):
- return float(self.cleaned_data["quality_sentiment"])
-
- def save_sentiment(self):
- review = Review.objects.get(id=self.cleaned_data["review_id"])
- review.sentiment_labeler = Review.MANUAL_SENTIMENT_LABELER
- review.difficulty_sentiment = self.cleaned_data["difficulty_sentiment"]
- review.quality_sentiment = self.cleaned_data["quality_sentiment"]
- review.save()
diff --git a/apps/analytics/tasks.py b/apps/analytics/tasks.py
deleted file mode 100644
index 6295f6f..0000000
--- a/apps/analytics/tasks.py
+++ /dev/null
@@ -1,77 +0,0 @@
-from datetime import datetime, timedelta
-
-from celery import shared_task
-from django.conf import settings
-from django.contrib.auth.models import User
-from django.core.mail import send_mail
-from django.db.models import Q
-from django.template import Context
-from django.template.loader import get_template
-
-from apps.web.models import CourseOffering, Review, Vote
-from lib import constants, task_utils, terms
-
-
-@shared_task
-@task_utils.email_if_fails
-def send_analytics_email_update(lookback=timedelta(days=7)):
- context = _get_analytics_email_context(lookback)
- content = get_template("analytics_email.txt").render(Context(context))
- send_mail(
- "Layup List Weekly Update",
- content,
- constants.SUPPORT_EMAIL,
- [email for _, email in settings.ADMINS],
- fail_silently=False,
- )
-
-
-def _get_analytics_email_context(lookback):
- changes_since = datetime.now() - lookback
- new_query = Q(created_at__gte=changes_since)
- users = User.objects.all()
- quality_votes = Vote.objects.filter(category=Vote.CATEGORIES.QUALITY)
- quality_upvotes = quality_votes.filter(value=1)
- quality_downvotes = quality_votes.filter(value=-1)
- difficulty_votes = Vote.objects.filter(category=Vote.CATEGORIES.DIFFICULTY)
- difficulty_upvotes = difficulty_votes.filter(value=1)
- difficulty_downvotes = difficulty_votes.filter(value=-1)
- return {
- "users": {
- "all": users,
- "new": users.filter(date_joined__gte=changes_since),
- "unique_recent_logins": users.filter(last_login__gte=changes_since),
- },
- "votes": {
- "all_quality_upvotes": quality_upvotes,
- "all_quality_downvotes": quality_downvotes,
- "all_difficulty_upvotes": difficulty_upvotes,
- "all_difficulty_downvotes": difficulty_downvotes,
- "new_quality_upvotes": quality_upvotes.filter(new_query),
- "new_quality_downvotes": quality_downvotes.filter(new_query),
- "new_difficulty_upvotes": difficulty_upvotes.filter(new_query),
- "new_difficulty_downvotes": difficulty_downvotes.filter(new_query),
- },
- "reviews": {
- "all": Review.objects.all(),
- "new": Review.objects.filter(new_query),
- },
- }
-
-
-@shared_task
-@task_utils.email_if_fails
-def possibly_request_term_update():
- next_term = terms.get_next_term(constants.CURRENT_TERM)
- next_term_count = CourseOffering.objects.filter(term=next_term).count()
- if next_term_count >= constants.OFFERINGS_THRESHOLD_FOR_TERM_UPDATE:
- send_mail(
- "Term may be out of date ({} offerings with term {})".format(
- next_term_count, next_term
- ),
- "Consider modifying the environment variable.",
- constants.SUPPORT_EMAIL,
- [email for _, email in settings.ADMINS],
- fail_silently=False,
- )
- return next_term_count
diff --git a/apps/analytics/templates/analytics_email.txt b/apps/analytics/templates/analytics_email.txt
deleted file mode 100644
index 056bb25..0000000
--- a/apps/analytics/templates/analytics_email.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-{{ users.all.count }} users ({{ users.new.count }} new).
-
-{{ users.unique_recent_logins.count }} unique users logged in this past week.
-
-{{ votes.all_quality_upvotes.count }} quality upvotes ({{ votes.new_quality_upvotes.count }} new).
-
-{{ votes.all_quality_downvotes.count }} quality downvotes ({{ votes.new_quality_downvotes.count }} new).
-
-{{ votes.all_difficulty_upvotes.count }} difficulty upvotes ({{ votes.new_difficulty_upvotes.count }} new).
-
-{{ votes.all_difficulty_downvotes.count }} difficulty downvotes ({{ votes.new_difficulty_downvotes.count }} new).
-
-{{ reviews.all.count }} reviews ({{ reviews.new.count }} new).
-
-{% for review in reviews.new %}
-{{ review.term }} {{ review.course }} {{ review.professor }} {{ review.user }}
-
-{{ review.comments }}
-
-{% endfor %}
diff --git a/apps/analytics/templates/dashboard.html b/apps/analytics/templates/dashboard.html
deleted file mode 100644
index ec389a5..0000000
--- a/apps/analytics/templates/dashboard.html
+++ /dev/null
@@ -1,91 +0,0 @@
-{% extends "base.html" %}
-{% block title %}Analytics | Layup List{% endblock %}
-{% block content %}
-{% load admin_urls %}
-
-
-
Analytics
-
{{ activated_accounts }} users are activated.
-
View Crawled Data
-
Sentiment Labeler
-
-
- | Range |
- Users |
- Quality Ratings |
- Difficulty Ratings |
- Reviews |
-
-
- {% for c1, c2, c3, c4, c5 in overall_table %}
-
- | {{ c1}} |
- {{ c2 }} |
- {{ c3 }} |
- {{ c4 }} |
- {{ c5 }} |
-
- {% endfor %}
-
-
-
-
-
-
-
-
-
-
Vote Breakdown
-
-
- | Range |
- Quality Ratings |
- Difficulty Ratings |
- Total Unvotes |
-
-
- {% for c1, c2, c3, c4 in vote_table %}
-
- | {{ c1}} |
- {{ c2 }} |
- {{ c3 }} |
- {{ c4 }} |
-
- {% endfor %}
-
-
-
-
-
-
-
-
Class Breakdown
-
-
- | Class Year |
- {% for year, count in class_breakdown %}
- {{ year }} |
- {% endfor %}
-
-
-
- | # Users |
- {% for year, count in class_breakdown %}
- {{ count }} |
- {% endfor %}
-
-
-
-
-
-{% endblock %}
diff --git a/apps/analytics/templates/eligible_for_recommendations.html b/apps/analytics/templates/eligible_for_recommendations.html
deleted file mode 100644
index 317e51c..0000000
--- a/apps/analytics/templates/eligible_for_recommendations.html
+++ /dev/null
@@ -1,29 +0,0 @@
-{% extends "base.html" %}
-{% block title %}Eligible For Recommendations | Layup List{% endblock %}
-{% block content %}
-{% load admin_urls %}
-
-
-
Eligible For Recommendations ({{ users_and_votes | length }})
-
-
-
-
-
-
-
- | Good Upvotes |
- Email |
-
-
- {% for user, user_id, vote_count in users_and_votes %}
-
- | {{ vote_count }} |
- {{ user }} |
-
- {% endfor %}
-
-
-
-
-{% endblock %}
diff --git a/apps/analytics/templates/sentiment_labeler.html b/apps/analytics/templates/sentiment_labeler.html
deleted file mode 100644
index db3e032..0000000
--- a/apps/analytics/templates/sentiment_labeler.html
+++ /dev/null
@@ -1,23 +0,0 @@
-{% extends "base.html" %}
-{% block title %}Sentiment Labeler | Layup List{% endblock %}
-{% block content %}
-
-
-
Sentiment Labeler ({{ count }} remaining, {{ labeled_count }} labeled)
-
{{ review.course }} ({{ review.id }})
-
- {% if review.term %}
- {{ review.term }}
- {% if review.professor %} with {{ review.professor }}{% endif %}:
- {% endif %}
- {{ review.comments }}
-
-
-
-
-{% endblock %}
diff --git a/apps/analytics/views.py b/apps/analytics/views.py
deleted file mode 100644
index 5422f04..0000000
--- a/apps/analytics/views.py
+++ /dev/null
@@ -1,216 +0,0 @@
-import datetime
-from collections import Counter
-from random import randint
-
-import pytz
-from django.contrib.admin.views.decorators import staff_member_required
-from django.contrib.auth.decorators import user_passes_test
-from django.contrib.auth.models import User
-from django.db.models import Count
-from django.shortcuts import render
-from django.views.decorators.http import require_safe
-
-from apps.analytics.forms import ManualSentimentForm
-from apps.recommendations.models import Recommendation
-from apps.web import models
-from lib import constants
-
-LIMIT = 15
-
-
-@require_safe
-@staff_member_required
-def home(request):
- course_picker = User.objects.get(username="CoursePicker")
-
- non_zero_votes = models.Vote.objects.exclude(value=0)
- num_voters = non_zero_votes.values_list("user").distinct().count()
- num_quality_voters = (
- non_zero_votes.filter(category=models.Vote.CATEGORIES.QUALITY)
- .values_list("user")
- .distinct()
- .count()
- )
- num_difficulty_voters = (
- non_zero_votes.filter(category=models.Vote.CATEGORIES.DIFFICULTY)
- .values_list("user")
- .distinct()
- .count()
- )
- num_reviewers = (
- models.Review.objects.exclude(user=course_picker)
- .values_list("user")
- .distinct()
- .count()
- )
-
- now = datetime.datetime.now(tz=pytz.timezone("US/Eastern"))
- month_ago = ("Month", now - datetime.timedelta(days=31))
- week_ago = ("Week", now - datetime.timedelta(weeks=1))
- today = ("Today", now - datetime.timedelta(hours=24))
-
- overall_table = [
- (
- "Total",
- User.objects.count(),
- models.Vote.objects.exclude(value=0)
- .filter(category=models.Vote.CATEGORIES.QUALITY)
- .count(),
- models.Vote.objects.exclude(value=0)
- .filter(category=models.Vote.CATEGORIES.DIFFICULTY)
- .count(),
- "{} ({} exclusive)".format(
- models.Review.objects.count(),
- models.Review.objects.exclude(user=course_picker).count(),
- ),
- )
- ]
- for name, earliest_date in [month_ago, week_ago, today]:
- non_zero_votes_since = non_zero_votes.filter(created_at__gte=earliest_date)
- overall_table.append(
- (
- name,
- User.objects.filter(date_joined__gte=earliest_date).count(),
- non_zero_votes_since.filter(
- category=models.Vote.CATEGORIES.QUALITY
- ).count(),
- non_zero_votes_since.filter(
- category=models.Vote.CATEGORIES.DIFFICULTY
- ).count(),
- models.Review.objects.filter(created_at__gte=earliest_date).count(),
- )
- )
-
- vote_table = [
- (
- "Total",
- models.Vote.objects.filter(
- value__gte=1, category=models.Vote.CATEGORIES.QUALITY
- ).count(),
- models.Vote.objects.filter(
- value__gte=1, category=models.Vote.CATEGORIES.DIFFICULTY
- ).count(),
- models.Vote.objects.filter(value=0).count(),
- )
- ]
- for name, earliest_date in [month_ago, week_ago, today]:
- vote_table.append(
- (
- name,
- models.Vote.objects.filter(
- value__gte=1,
- category=models.Vote.CATEGORIES.QUALITY,
- created_at__gte=earliest_date,
- ).count(),
- models.Vote.objects.filter(
- value__gte=1,
- category=models.Vote.CATEGORIES.DIFFICULTY,
- created_at__gte=earliest_date,
- ).count(),
- models.Vote.objects.filter(
- value=0, created_at__gte=earliest_date
- ).count(),
- )
- )
-
- usernames = User.objects.exclude(id=course_picker.id).values_list(
- "username", flat=True
- )
- c = Counter()
- for username in usernames:
- year_string = username.split(".")[-1]
- c[year_string] += 1
- class_breakdown = sorted(
- [
- (
- year,
- count,
- )
- for year, count in c.items()
- if len(year) == 2
- ]
- )
-
- recommendations_last_updated = []
- for creator, description in Recommendation.CREATORS:
- rec = Recommendation.objects.filter(creator=creator).order_by("created_at")[:1]
- if rec:
- recommendations_last_updated.append((description, rec[0].created_at))
- else:
- recommendations_last_updated.append((description, "never"))
-
- return render(
- request,
- "dashboard.html",
- {
- "overall_table": overall_table,
- "vote_table": vote_table,
- "num_voters": num_voters,
- "num_quality_voters": num_quality_voters,
- "num_difficulty_voters": num_difficulty_voters,
- "num_reviewers": num_reviewers,
- "recommendations_last_updated": recommendations_last_updated,
- "activated_accounts": User.objects.filter(is_active=True).count(),
- "class_breakdown": class_breakdown,
- },
- )
-
-
-@require_safe
-@staff_member_required
-@user_passes_test(lambda u: u.is_superuser)
-def eligible_for_recommendations(request):
- eligible_users_and_votes = (
- models.Vote.objects.filter(
- value__gte=4, category=models.Vote.CATEGORIES.QUALITY
- )
- .values_list("user")
- .annotate(vote_count=Count("user"))
- .filter(vote_count__gte=constants.REC_UPVOTE_REQ)
- .order_by("-vote_count")
- .values_list("user__username", "user", "vote_count")
- )
- return render(
- request,
- "eligible_for_recommendations.html",
- {"users_and_votes": eligible_users_and_votes},
- )
-
-
-@staff_member_required
-@user_passes_test(lambda u: u.is_superuser)
-def sentiment_labeler(request):
- if request.method == "POST":
- form = ManualSentimentForm(request.POST)
- if form.is_valid():
- form.save_sentiment()
- else:
- return render(
- request,
- "sentiment_labeler.html",
- {
- "review": models.Review.objects.get(id=form.review_id),
- "form": form,
- },
- )
- unlabeled_reviews = models.Review.objects.filter(
- user=User.objects.get(username="CoursePicker"),
- ).exclude(
- sentiment_labeler=models.Review.MANUAL_SENTIMENT_LABELER,
- )
- count = unlabeled_reviews.count()
- random_index = randint(0, count - 1)
- review = unlabeled_reviews[random_index]
- form = ManualSentimentForm(initial={"review_id": review.id})
- return render(
- request,
- "sentiment_labeler.html",
- {
- "count": count,
- "labeled_count": models.Review.objects.filter(
- sentiment_labeler=models.Review.MANUAL_SENTIMENT_LABELER
- ).count(),
- "form": form,
- "review": review,
- },
- )
diff --git a/apps/recommendations/__init__.py b/apps/recommendations/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/apps/recommendations/admin.py b/apps/recommendations/admin.py
deleted file mode 100644
index 4ae9c82..0000000
--- a/apps/recommendations/admin.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from django.contrib import admin
-
-from .models import Recommendation
-
-admin.site.register(Recommendation)
diff --git a/apps/recommendations/migrations/0001_initial.py b/apps/recommendations/migrations/0001_initial.py
deleted file mode 100644
index cb06bb0..0000000
--- a/apps/recommendations/migrations/0001_initial.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Generated by Django 5.0.8 on 2024-08-22 14:35
-
-import django.db.models.deletion
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
- initial = True
-
- dependencies = [
- ("web", "0002_alter_student_unauth_session_ids"),
- ]
-
- operations = [
- migrations.CreateModel(
- name="Recommendation",
- fields=[
- (
- "id",
- models.BigAutoField(
- auto_created=True,
- primary_key=True,
- serialize=False,
- verbose_name="ID",
- ),
- ),
- (
- "creator",
- models.CharField(
- choices=[("docsim", "Document Similarity")], max_length=16
- ),
- ),
- ("weight", models.FloatField(null=True)),
- ("created_at", models.DateTimeField(auto_now_add=True)),
- ("updated_at", models.DateTimeField(auto_now=True)),
- (
- "course",
- models.ForeignKey(
- on_delete=django.db.models.deletion.CASCADE,
- related_name="recommendations",
- to="web.course",
- ),
- ),
- (
- "recommendation",
- models.ForeignKey(
- on_delete=django.db.models.deletion.CASCADE,
- related_name="recommenders",
- to="web.course",
- ),
- ),
- ],
- ),
- ]
diff --git a/apps/recommendations/migrations/__init__.py b/apps/recommendations/migrations/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/apps/recommendations/models.py b/apps/recommendations/models.py
deleted file mode 100644
index 68a2c68..0000000
--- a/apps/recommendations/models.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from __future__ import unicode_literals
-
-from django.db import models
-from django.db.models import Q
-
-from apps.web.models import Course, CourseOffering, Vote
-
-
-class GroupedRecommendation(object):
- def __init__(self, course):
- self.course = course
- self.weight = 0.0
- self.recs = []
-
- def reason(self):
- return " ".join([r.course.short_name() for r in self.recs])
-
-
-class RecommendationManager(models.Manager):
- def for_user(self, user, all_terms=False):
- interacted_courses = Vote.objects.filter(user=user).exclude(value=0)
- interacted_course_ids = interacted_courses.values_list("course_id", flat=True)
- crosslisted_interacted_course_ids = Course.objects.filter(
- crosslisted_courses__in=interacted_course_ids
- ).values_list("id", flat=True)
- upvoted_course_ids = interacted_courses.filter(
- value=1, category=Vote.CATEGORIES.QUALITY
- ).values_list("course_id", flat=True)
-
- recommendations = self.filter(course_id__in=upvoted_course_ids).exclude(
- Q(recommendation_id__in=interacted_course_ids)
- | Q(recommendation_id__in=crosslisted_interacted_course_ids)
- )
-
- if not all_terms:
- recommendations = recommendations.filter(
- recommendation_id__in=CourseOffering.objects.course_ids_for_term()
- )
-
- recommendations = recommendations.prefetch_related(
- "course",
- "recommendation",
- "recommendation__distribs",
- "recommendation__review_set",
- "recommendation__courseoffering_set",
- ).order_by("-weight")[:500]
-
- grouped_recs = {}
- for rec in recommendations:
- grouped_recs[rec.recommendation] = grouped_recs.get(
- rec.recommendation, GroupedRecommendation(rec.recommendation)
- )
- grouped_recs[rec.recommendation].weight += rec.weight
- grouped_recs[rec.recommendation].recs.append(rec)
-
- sorted_grouped_recs = sorted(grouped_recs.values(), key=lambda x: -x.weight)
-
- return sorted_grouped_recs[:30]
-
-
-class Recommendation(models.Model):
- objects = RecommendationManager()
-
- DOCUMENT_SIMILARITY = "docsim"
- CREATORS = ((DOCUMENT_SIMILARITY, "Document Similarity"),)
-
- course = models.ForeignKey(
- "web.Course", related_name="recommendations", on_delete=models.CASCADE
- )
- recommendation = models.ForeignKey(
- "web.Course", related_name="recommenders", on_delete=models.CASCADE
- )
-
- creator = models.CharField(max_length=16, choices=CREATORS)
- weight = models.FloatField(null=True)
-
- created_at = models.DateTimeField(auto_now_add=True)
- updated_at = models.DateTimeField(auto_now=True)
-
- def __unicode__(self):
- return "{} {} -> {}".format(
- self.weight, self.course.short_name(), self.recommendation
- )
diff --git a/apps/recommendations/tasks.py b/apps/recommendations/tasks.py
deleted file mode 100644
index 72b3b1c..0000000
--- a/apps/recommendations/tasks.py
+++ /dev/null
@@ -1,161 +0,0 @@
-import re
-from itertools import chain
-from time import time
-
-import numpy as np
-from celery import shared_task
-from django.db import transaction
-from django.db.models import Q
-from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
-
-from apps.recommendations.models import Recommendation
-from apps.web.models import Course
-from lib import task_utils
-
-MIN_COURSE_DESCRIPTION_LENGTH = 80
-RECOMMENDATIONS_PER_CLASS = 8
-
-PERFORM_TFIDF = True
-INCLUDE_REVIEWS = False # very noisy, turn off
-
-
-@shared_task
-@task_utils.email_if_fails
-def generate_course_description_similarity_recommendations():
- t0 = time()
- print("loading word jumbles into memory...")
- course_ids = []
- reverse_course_ids = {}
- course_descriptions = []
- i = 0
- for course in Course.objects.exclude(description=None).exclude(description=""):
- if len(course.description) < MIN_COURSE_DESCRIPTION_LENGTH:
- # these are typically uninteresting classes e.g. thesis
- continue
- course_ids.append(course.id)
- reverse_course_ids[course.id] = i
- word_jumble = [_clean_text_to_raw_words(course.description)]
- word_jumble.append(_clean_text_to_raw_words(course.title))
- # reviews are very noisy -- they tend to be similar between classes
- if INCLUDE_REVIEWS:
- for review in course.review_set.all():
- word_jumble.append(_clean_text_to_raw_words(review.comments))
- course_descriptions.append(" ".join(word_jumble))
- i += 1
- print(f"finished in {time() - t0}")
-
- t0 = time()
- print("fitting to count vectorizer...")
- count_vect = CountVectorizer()
- corpus = count_vect.fit_transform(course_descriptions)
- print(f"shape is {corpus.shape}")
- print(f"finished in {time() - t0}")
-
- if PERFORM_TFIDF:
- t0 = time()
- print("tfidf transform...")
- tfidf_transformer = TfidfTransformer()
- corpus = tfidf_transformer.fit_transform(corpus)
- print(f"shape is {corpus.shape}")
- print(f"finished in {time() - t0}")
-
- t0 = time()
- print("compute cosine similarity ")
- pairwise_similarity = corpus * corpus.T
- print(f"shape is {pairwise_similarity.shape}")
- print(f"finished in {time() - t0}")
-
- t0 = time()
- print("calculating and creating recommendations...")
- psarray = pairwise_similarity.toarray()
-
- # zero out columns corresponding to thesis, research, independent, and grad
- course_ids_to_zero = Course.objects.filter(
- Q(title__icontains="thesis")
- | Q(title__icontains="research")
- | Q(title__icontains="independent")
- | Q(title__icontains="seminar")
- | Q(title__icontains="first-year")
- | Q(title__icontains="foreign study")
- | Q(title__icontains="senior")
- | Q(title__icontains="honors")
- | Q(number__gt=99)
- ).values_list("id", flat=True)
- for zero_id in course_ids_to_zero:
- if zero_id in reverse_course_ids:
- psarray[:, reverse_course_ids[zero_id]] = 0
-
- # zero out crosslistings and same titles, so only one rep for each
- # crosslisting
- covered_ids = set()
- for i in range(psarray.shape[1]):
- if i in covered_ids:
- continue
- course_id = course_ids[i]
- course = Course.objects.get(id=course_id)
- for xlist_course in list(
- chain(
- course.crosslisted_courses.all(),
- Course.objects.filter(title=course.title),
- )
- ):
- if xlist_course == course:
- continue
- if xlist_course.id in reverse_course_ids:
- xlist_col = reverse_course_ids[xlist_course.id]
- if xlist_course.id not in covered_ids:
- psarray[:, xlist_col] = 0
- covered_ids.add(xlist_col)
- covered_ids.add(i)
-
- recommendations_to_create = []
- for i in range(psarray.shape[0]):
- current_class = Course.objects.get(id=course_ids[i])
-
- # zero out the diagonal
- zero_ids = [i]
-
- # zero out crosslisted classes
- zero_ids += list(current_class.crosslisted_courses.values_list("id", flat=True))
-
- # zero out classes with the same title
- zero_ids += list(
- Course.objects.filter(title=current_class.title).values_list(
- "id", flat=True
- )
- )
-
- for zero_id in zero_ids:
- if zero_id in reverse_course_ids:
- psarray[i, reverse_course_ids[zero_id]] = 0
-
- for other_i in np.argpartition(psarray[i, :], -RECOMMENDATIONS_PER_CLASS)[
- -RECOMMENDATIONS_PER_CLASS:
- ]:
- course_id = course_ids[other_i]
-
- recommendations_to_create.append(
- Recommendation(
- course=current_class,
- recommendation_id=course_id,
- creator=Recommendation.DOCUMENT_SIMILARITY,
- weight=psarray[i, other_i],
- )
- )
-
- with transaction.atomic():
- Recommendation.objects.filter(
- creator=Recommendation.DOCUMENT_SIMILARITY
- ).delete()
- Recommendation.objects.bulk_create(recommendations_to_create)
-
- print(f"finished in {time() - t0}")
-
-
-def _clean_text_to_raw_words(text):
- if text:
- return " ".join(
- [w for w in re.sub(r"[^a-zA-Z ]", "", text).lower().split() if len(w) > 3]
- )
- else:
- return ""
diff --git a/apps/recommendations/templates/recommendations.html b/apps/recommendations/templates/recommendations.html
deleted file mode 100644
index 70ea74f..0000000
--- a/apps/recommendations/templates/recommendations.html
+++ /dev/null
@@ -1,49 +0,0 @@
-{% extends "base.html" %}
-{% block title %}Recommendations | Layup List{% endblock %}
-{% block content %}
-
-
-
{% if request.GET.show_all %}All{% else %}{{ constants.CURRENT_TERM }}{% endif %} Recommended For You
- {% if request.user.student.can_see_recommendations %}
- {% if recommendations %}
-
These courses have similar course descriptions to courses you have upvoted. If you vote on any of the classes on this list they will go away.
-
Want better recommendations? Upvote classes that you think are good!
-
- {% if request.GET.show_all %}
- Show {{ constants.CURRENT_TERM}} only.
- {% else %}
- Show all terms.
- {% endif %}
-
-
-
- | Course |
- Similar To |
-
-
- {% for rec in recommendations %}
-
- |
- {% include "components/course_summary.html" with course=rec.course only %}
- |
-
- {{ rec.reason }}
- |
-
- {% endfor %}
-
-
- {% else %}
- No recommendations to show. Try upvoting more classes!
- {% endif %}
- {% else %}
-
- To see course recommendations, you must:
-
- - Have upvoted {{ constants.REC_UPVOTE_REQ }} classes as good.
-
-
- {% endif %}
-
-
-{% endblock %}
diff --git a/apps/recommendations/views.py b/apps/recommendations/views.py
deleted file mode 100644
index d4c5f52..0000000
--- a/apps/recommendations/views.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from django.contrib.auth.decorators import login_required
-from django.shortcuts import render
-from django.views.decorators.http import require_safe
-
-from apps.recommendations.models import Recommendation
-from lib import constants
-
-
-@require_safe
-@login_required
-def recommendations(request):
- return render(
- request,
- "recommendations.html",
- {
- "recommendations": Recommendation.objects.for_user(
- request.user, "show_all" in request.GET
- ),
- "constants": constants,
- },
- )
From ac95f39f5a1988da8372a8f6deac607bef7f7e61 Mon Sep 17 00:00:00 2001
From: Pachakutiq <101460915+PACHAKUTlQ@users.noreply.github.com>
Date: Sun, 1 Mar 2026 14:55:31 +0800
Subject: [PATCH 2/4] fix: Remove legacy django html templates
---
.../spider/templates/crawled_data_detail.html | 21 ------
apps/spider/templates/crawled_data_list.html | 33 ---------
apps/web/templates/base.html | 73 -------------------
apps/web/templates/confirmation.html | 28 -------
apps/web/templates/footer.html | 8 --
apps/web/templates/instructions.html | 24 ------
apps/web/templates/login.html | 41 -----------
apps/web/templates/logout.html | 21 ------
apps/web/templates/navbar.html | 55 --------------
.../templates/password_reset_complete.html | 12 ---
.../web/templates/password_reset_confirm.html | 26 -------
apps/web/templates/password_reset_done.html | 12 ---
apps/web/templates/password_reset_email.html | 15 ----
apps/web/templates/password_reset_form.html | 20 -----
apps/web/templates/signup.html | 37 ----------
15 files changed, 426 deletions(-)
delete mode 100644 apps/spider/templates/crawled_data_detail.html
delete mode 100644 apps/spider/templates/crawled_data_list.html
delete mode 100644 apps/web/templates/base.html
delete mode 100644 apps/web/templates/confirmation.html
delete mode 100644 apps/web/templates/footer.html
delete mode 100644 apps/web/templates/instructions.html
delete mode 100644 apps/web/templates/login.html
delete mode 100644 apps/web/templates/logout.html
delete mode 100644 apps/web/templates/navbar.html
delete mode 100644 apps/web/templates/password_reset_complete.html
delete mode 100644 apps/web/templates/password_reset_confirm.html
delete mode 100644 apps/web/templates/password_reset_done.html
delete mode 100644 apps/web/templates/password_reset_email.html
delete mode 100644 apps/web/templates/password_reset_form.html
delete mode 100644 apps/web/templates/signup.html
diff --git a/apps/spider/templates/crawled_data_detail.html b/apps/spider/templates/crawled_data_detail.html
deleted file mode 100644
index 372c36a..0000000
--- a/apps/spider/templates/crawled_data_detail.html
+++ /dev/null
@@ -1,21 +0,0 @@
-{% extends "base.html" %}
-{% block title %}{{ crawled_data }}| Layup List{% endblock %}
-{% block content %}
-
-
-
{{ crawled_data }}
- {% if crawled_data.has_change %}
-
Pending
-
-
{{ crawled_data.diff }}
- {% else %}
-
Up to date
-
{{ crawled_data.pretty_current_data }}
- {% endif %}
-
-
-
-{% endblock %}
diff --git a/apps/spider/templates/crawled_data_list.html b/apps/spider/templates/crawled_data_list.html
deleted file mode 100644
index a799b5e..0000000
--- a/apps/spider/templates/crawled_data_list.html
+++ /dev/null
@@ -1,33 +0,0 @@
-{% extends "base.html" %}
-{% block title %}Spider | Layup List{% endblock %}
-{% block content %}
-
-
-
Spider
-
-
-
-
- | Resource |
- Type |
- Pending Changes |
-
-
-
- {% for crawled_data in crawled_datas %}
-
- | {{ crawled_data.resource }} |
- {{ crawled_data.data_type }} |
-
- {% if crawled_data.has_change %}PENDING{% endif %}
- |
-
- {% endfor %}
-
-
-
-
-{% endblock %}
diff --git a/apps/web/templates/base.html b/apps/web/templates/base.html
deleted file mode 100644
index 7c8cda1..0000000
--- a/apps/web/templates/base.html
+++ /dev/null
@@ -1,73 +0,0 @@
-{% load static %}
-{% load pipeline %}
-
-
-
-
-
-
- {% block title %}UMJI's Course Review Site{% endblock %}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/apps/web/templates/confirmation.html b/apps/web/templates/confirmation.html
deleted file mode 100644
index d4a3ec5..0000000
--- a/apps/web/templates/confirmation.html
+++ /dev/null
@@ -1,28 +0,0 @@
-{% extends "base.html" %} {% block content %}
-
-
-
-
-
-
-
-
-
- {% if error %}
-
{{ error }}
- {% elif already_confirmed %}
-
This email is already confirmed. Please login.
- {% else %}
-
Email successfully confirmed! Please click here to login.
- {% endif %}
-
-
-
-
-
-
-
-{% endblock %}
diff --git a/apps/web/templates/footer.html b/apps/web/templates/footer.html
deleted file mode 100644
index ce67c25..0000000
--- a/apps/web/templates/footer.html
+++ /dev/null
@@ -1,8 +0,0 @@
-
diff --git a/apps/web/templates/instructions.html b/apps/web/templates/instructions.html
deleted file mode 100644
index 722a83b..0000000
--- a/apps/web/templates/instructions.html
+++ /dev/null
@@ -1,24 +0,0 @@
-{% extends "base.html" %} {% block content %}
-
-
-
-
-
-
-
-
Registration successful. Please check your email (and in spam folder) for a confirmation link and follow the instructions to activate your account.
-
-
-
-
-
-
-
-
-{% endblock %}
diff --git a/apps/web/templates/login.html b/apps/web/templates/login.html
deleted file mode 100644
index b5600cb..0000000
--- a/apps/web/templates/login.html
+++ /dev/null
@@ -1,41 +0,0 @@
-{% extends "base.html" %} {% block content %}
-
-{% endblock %}
diff --git a/apps/web/templates/logout.html b/apps/web/templates/logout.html
deleted file mode 100644
index d94bfb1..0000000
--- a/apps/web/templates/logout.html
+++ /dev/null
@@ -1,21 +0,0 @@
-{% extends "base.html" %} {% block content %}
-
-
-
-
-
-
-
-
You are successfully logged out. If this was a mistake, please click here to login.
-
-
-
-
-
-
-
-
-
-
-
-{% endblock %}
diff --git a/apps/web/templates/navbar.html b/apps/web/templates/navbar.html
deleted file mode 100644
index b2d5ec2..0000000
--- a/apps/web/templates/navbar.html
+++ /dev/null
@@ -1,55 +0,0 @@
-{% load static %}
-
-
diff --git a/apps/web/templates/password_reset_complete.html b/apps/web/templates/password_reset_complete.html
deleted file mode 100644
index 9e0a64b..0000000
--- a/apps/web/templates/password_reset_complete.html
+++ /dev/null
@@ -1,12 +0,0 @@
-{% extends "base.html" %}
-
-{% block title %}Password reset complete{% endblock %}
-
-{% block content %}
-
-
-
Your password has been set. You may go ahead and log in now.
-
Log in
-
-
-{% endblock %}
diff --git a/apps/web/templates/password_reset_confirm.html b/apps/web/templates/password_reset_confirm.html
deleted file mode 100644
index bad4f91..0000000
--- a/apps/web/templates/password_reset_confirm.html
+++ /dev/null
@@ -1,26 +0,0 @@
-{% extends "base.html" %}
-{% block title %}Set New Password{% endblock %}
-{% block content %}
-
-
- {% if validlink %}
-
Please enter your new password twice.
- So we can verify you typed it in correctly.
-
-
- {% else %}
-
Password reset unsuccessful
-
The password reset link was invalid,
- possibly because it has already been used.
- Please request a new password reset.
- {% endif %}
-
-
-{% endblock %}
diff --git a/apps/web/templates/password_reset_done.html b/apps/web/templates/password_reset_done.html
deleted file mode 100644
index 9142177..0000000
--- a/apps/web/templates/password_reset_done.html
+++ /dev/null
@@ -1,12 +0,0 @@
-{% extends "base.html" %}
-{% block title %}
- Password reset successful
-{% endblock %}
-{% block content %}
-
-
-
We've e-mailed you instructions for setting your password to the e-mail address you submitted.
-
You should be receiving it shortly.
-
-
-{% endblock %}
diff --git a/apps/web/templates/password_reset_email.html b/apps/web/templates/password_reset_email.html
deleted file mode 100644
index 96bbc35..0000000
--- a/apps/web/templates/password_reset_email.html
+++ /dev/null
@@ -1,15 +0,0 @@
-{% autoescape off %}
-You're receiving this e-mail because you requested a password reset for your user account at {{ site_name }}.
-
-Please go to the following page and choose a new password:
-{% block reset_link %}
-https://www.layuplist.com{% url 'password_reset_confirm' uidb64=uid token=token %}
-{% endblock %}
-
-Your username, in case you've forgotten: {{ user.username }}
-
-Thanks for using our site!
-
-LL
-
-{% endautoescape %}
diff --git a/apps/web/templates/password_reset_form.html b/apps/web/templates/password_reset_form.html
deleted file mode 100644
index 08bb429..0000000
--- a/apps/web/templates/password_reset_form.html
+++ /dev/null
@@ -1,20 +0,0 @@
-{% extends "base.html" %}
-{% block title %}Reset Password{% endblock %}
-{% block content %}
-
-
-
Forgotten your password? Enter your email address below, and we'll email instructions for setting a new one.
-
-
-
-{% endblock %}
diff --git a/apps/web/templates/signup.html b/apps/web/templates/signup.html
deleted file mode 100644
index cbcbeca..0000000
--- a/apps/web/templates/signup.html
+++ /dev/null
@@ -1,37 +0,0 @@
-{% extends "base.html" %} {% block content %}
-
-
-
-
-
-
-
-
- {{ error }}
- {% if request.GET.restriction %}
- Please signup to {{ request.GET.restriction }}.
- {% endif %}
-
-
-
-
-
-
-
-
Already have an account? Login here.
-
-
-{% endblock %}
From 2b5973d73552a44ccbdb6b029311cc4c064a07f4 Mon Sep 17 00:00:00 2001
From: Pachakutiq <101460915+PACHAKUTlQ@users.noreply.github.com>
Date: Sun, 1 Mar 2026 14:56:58 +0800
Subject: [PATCH 3/4] fix: Rename backend admin static url to /static/
---
website/settings.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/website/settings.py b/website/settings.py
index d281cf3..be6bdff 100644
--- a/website/settings.py
+++ b/website/settings.py
@@ -152,7 +152,7 @@
}
]
-STATIC_URL = "/dummy/" # Required by Django staticfiles but not used in this setup
+STATIC_URL = "/static/" # Required by Django staticfiles but not used in this setup
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
LANGUAGE_CODE = "en-us"
From 228bf9d9b02d6ab6b68ed58f44622948575a3dc3 Mon Sep 17 00:00:00 2001
From: Pachakutiq <101460915+PACHAKUTlQ@users.noreply.github.com>
Date: Sun, 1 Mar 2026 15:02:16 +0800
Subject: [PATCH 4/4] fix: Remove legacy crawler functions
---
apps/spider/crawlers/medians.py | 113 ----------------
apps/spider/crawlers/orc.py | 36 -----
apps/spider/crawlers/timetable.py | 218 ------------------------------
lib/task_utils.py | 54 --------
4 files changed, 421 deletions(-)
delete mode 100644 apps/spider/crawlers/medians.py
delete mode 100644 apps/spider/crawlers/timetable.py
delete mode 100644 lib/task_utils.py
diff --git a/apps/spider/crawlers/medians.py b/apps/spider/crawlers/medians.py
deleted file mode 100644
index 35af933..0000000
--- a/apps/spider/crawlers/medians.py
+++ /dev/null
@@ -1,113 +0,0 @@
-from urllib.parse import urljoin
-
-from apps.spider.utils import (
- clean_department_code,
- parse_number_and_subnumber,
- retrieve_soup,
-)
-from apps.web.models import Course, CourseMedian
-
-MEDIAN_PAGE_INDEX_URL = "http://www.dartmouth.edu/reg/transcript/medians/"
-MEDIANS_URL_FMT = "http://www.dartmouth.edu/reg/transcript/medians/{term}.html"
-
-
-def get_term_from_median_page_url(url):
- return url.split("/")[-1].split(".")[0]
-
-
-def crawl_median_page_urls():
- soup = retrieve_soup(MEDIAN_PAGE_INDEX_URL)
- return _retrieve_term_medians_urls_from_soup(soup)
-
-
-def _retrieve_term_medians_urls_from_soup(soup):
- return [
- urljoin("http://www.dartmouth.edu", a["href"])
- for a in soup.find_all("a", href=True)
- if _is_term_page_url(a["href"])
- ]
-
-
-def _is_term_page_url(url):
- term = get_term_from_median_page_url(url)
- return url == "/reg/transcript/medians/{term}.html".format(term=term)
-
-
-def crawl_term_medians_for_url(url):
- soup = retrieve_soup(url)
- table_rows = soup.find("table").find("tbody").find_all("tr")
- medians = [_convert_table_row_to_dict(table_row) for table_row in table_rows]
- medians.sort(cmp=_median_dict_sorter)
- return medians
-
-
-def _median_dict_sorter(a, b):
- a_section, b_section = a["section"], b["section"]
- a, b = a["course"], b["course"]
- a_department, b_department = a["department"], b["department"]
- a_number, b_number = a["number"], b["number"]
- a_subnumber, b_subnumber = a.get("subnumber"), b.get("subnumber")
- if a_department == b_department:
- if a_number == b_number:
- if a_subnumber == b_subnumber:
- return int(a_section) - int(b_section)
- else:
- if a_subnumber is None:
- return -1
- if b_subnumber is None:
- return 1
- return int(a_subnumber) - int(b_subnumber)
- else:
- return int(a_number) - int(b_number)
- else:
- return -1 if a_department < b_department else 1
-
-
-def _convert_table_row_to_dict(table_row):
- median_data = table_row.find_all("td")
- term = median_data[0].get_text(strip=True)
- course = median_data[1].get_text(strip=True)
- department = clean_department_code(course.split("-")[0])
- enrollment = int(median_data[2].get_text(strip=True))
- section = int(course.split("-")[2])
- median = median_data[3].get_text(strip=True)
- number, subnumber = parse_number_and_subnumber(course.split("-")[1])
- median_dict = {
- "course": {
- "department": department,
- "number": number,
- "subnumber": subnumber,
- },
- "enrollment": enrollment,
- "median": median,
- "section": section,
- "term": term,
- }
- return median_dict
-
-
-def import_medians(data):
- for median_data in data:
- import_median(median_data)
-
-
-def import_median(median_data):
- try:
- course = Course.objects.get(
- department=median_data["course"]["department"],
- number=median_data["course"]["number"],
- subnumber=median_data["course"]["subnumber"],
- )
- except Course.DoesNotExist:
- print("Could not find course for {}".format(median_data["course"]))
- return
- median, _ = CourseMedian.objects.update_or_create(
- course=course,
- section=median_data["section"],
- term=median_data["term"],
- defaults={
- "enrollment": median_data["enrollment"],
- "median": median_data["median"],
- },
- )
- return median
diff --git a/apps/spider/crawlers/orc.py b/apps/spider/crawlers/orc.py
index aba3fdd..529bf3a 100644
--- a/apps/spider/crawlers/orc.py
+++ b/apps/spider/crawlers/orc.py
@@ -7,36 +7,12 @@
BASE_URL = "https://www.ji.sjtu.edu.cn/"
ORC_BASE_URL = urljoin(BASE_URL, "/academics/courses/courses-by-number/")
-# ORC_UNDERGRAD_SUFFIX = "Departments-Programs-Undergraduate"
-# ORC_GRADUATE_SUFFIX = "Departments-Programs-Graduate"
COURSE_DETAIL_URL_PREFIX = (
"https://www.ji.sjtu.edu.cn/academics/courses/courses-by-number/course-info/?id="
)
UNDERGRAD_URL = ORC_BASE_URL
INSTRUCTOR_TERM_REGEX = re.compile(r"^(?P\w*)\s?(\((?P\w*)\))?")
-# SUPPLEMENT_URL = "http://dartmouth.smartcatalogiq.com/en/2016s/Supplement/Courses"
-
-# COURSE_HEADING_CORRECTIONS = {
-# "COLT": {"7 First Year Seminars": "COLT 7 First Year Seminars"},
-# "GRK": {"GRK 1.02-3.02 Intensive Greek": "GRK 1.02 Intensive Greek"},
-# "INTS": {
-# "INTS INTS 17.04 Migration Stories": "INTS 17.04 Migration Stories",
-# },
-# "MALS": {
-# "MALS MALS 368 Seeing and Feeling in Early Modern Europe": (
-# "MALS 368 Seeing and Feeling in Early Modern Europe"
-# ),
-# },
-# "PSYC": {"$name": None},
-# "QBS": {
-# "Quantitative Biomedical Sciences 132-2 Molecular Markers in Human "
-# "Health Studies Lab": (
-# "QBS 132.02 Molecular Markers in Human Health Studies Lab"
-# ),
-# },
-# }
-
def crawl_program_urls():
program_urls = set() # Initialize to empty set
@@ -122,18 +98,6 @@ def _crawl_course_data(course_url):
"url": course_url,
}
return result
- # return {
- # "course_code": "QWER1234J",
- # "course_title": "Test Course",
- # "department": "QWER",
- # "number": 1234,
- # "course_credits": 4,
- # "pre_requisites": None,
- # "description": "This is a test course",
- # "course_topics": ["Test Topic"],
- # "instructors": ["Test Instructor"],
- # "url": course_url,
- # }
def import_department(department_data):
diff --git a/apps/spider/crawlers/timetable.py b/apps/spider/crawlers/timetable.py
deleted file mode 100644
index 98d01d8..0000000
--- a/apps/spider/crawlers/timetable.py
+++ /dev/null
@@ -1,218 +0,0 @@
-import re
-
-from django.db import transaction
-
-from apps.spider.utils import int_or_none, parse_number_and_subnumber, retrieve_soup
-from apps.web.models import Course, CourseOffering, DistributiveRequirement, Instructor
-from lib.terms import split_term
-
-TIMETABLE_URL = "http://oracle-www.dartmouth.edu/dart/groucho/timetable.display_courses"
-
-DATA_TO_SEND = (
- "distribradio=alldistribs&depts=no_value&periods=no_value&"
- "distribs=no_value&distribs_i=no_value&distribs_wc=no_value&deliverymodes=no_value&pmode=public&"
- "term=&levl=&fys=n&wrt=n&pe=n&review=n&crnl=no_value&classyear=2008&"
- "searchtype=Subject+Area%28s%29&termradio=selectterms&terms=no_value&"
- "deliveryradio=selectdelivery&subjectradio=selectsubjects&hoursradio=allhours&sortorder=dept"
- "&terms={term}"
-)
-
-COURSE_TITLE_REGEX = re.compile(
- r"(.*?)(?:\s\(((?:Remote|On Campus|Individualized)[^\)]*)\))?(\(.*\))?$"
-)
-
-
-def crawl_timetable(term):
- """
- Timetable HTML is malformed. All table rows except the head do not have
- a proper starting , which requires us to:
-
- 1. Iterate over | in chunks rather than by
- 2. Remove all in the table, which otherwise breaks BeautifulSoup into
- not allowing us to iterate over all the |
-
- To iterate over the | in chunks, we get the number of columns,
- put all of the | in a generator, and pull the number of columns
- from the generator to get the row.
- """
- course_data = []
- request_data = DATA_TO_SEND.format(term=_get_timetable_term_code(term))
- soup = retrieve_soup(
- TIMETABLE_URL,
- data=request_data,
- preprocess=lambda x: re.sub(r"", "", x),
- )
-
- data_table = soup.find(class_="data-table")
- if not data_table:
- raise ValueError("No data-table found in the HTML response")
-
- num_columns = len(data_table.find_all("th"))
- assert num_columns == 20
-
- tds = data_table.find_all("td")
- assert len(tds) % num_columns == 0
-
- td_generator = (td for td in tds)
- for _ in range(len(tds) // num_columns):
- tds = [next(td_generator) for _ in range(num_columns)]
-
- number, subnumber = parse_number_and_subnumber(tds[3].get_text())
- crosslisted_courses = _parse_crosslisted_courses(tds[7].get_text(strip=True))
-
- title_match = COURSE_TITLE_REGEX.match(
- tds[5].get_text(strip=True).encode("ascii", "ignore").decode("ascii")
- )
-
- title = title_match.group(1)
- if title_match.group(3):
- title += " " + title_match.group(3)
-
- course_data.append(
- {
- "term": _convert_timetable_term_to_term(tds[0].get_text(strip=True)),
- # "crn": int(tds[1].get_text(strip=True)),
- "program": tds[2].get_text(strip=True),
- "number": number,
- "subnumber": subnumber,
- "section": int(tds[4].get_text(strip=True)),
- "title": title,
- "delivery_mode": title_match.group(2),
- "crosslisted": crosslisted_courses,
- "period": tds[8].get_text(strip=True),
- "room": tds[10].get_text(strip=True),
- "building": tds[11].get_text(strip=True),
- "instructor": _parse_instructors(tds[12].get_text(strip=True)),
- "world_culture": tds[13].get_text(strip=True),
- "distribs": _parse_distribs(tds[14].get_text(strip=True)),
- "limit": int_or_none(tds[15].get_text(strip=True)),
- # "enrollment": int_or_none(tds[16].get_text(strip=True)),
- "status": tds[17].get_text(strip=True),
- }
- )
- return course_data
-
-
-def _parse_crosslisted_courses(xlist_text):
- crosslisted_courses = []
- for course_text in xlist_text.split(",") if xlist_text else []:
- program, numbers, section = course_text.split()
- number, subnumber = parse_number_and_subnumber(numbers)
- section = int(section)
- crosslisted_courses.append(
- {
- "program": program,
- "number": number,
- "subnumber": subnumber,
- "section": section,
- }
- )
- return crosslisted_courses
-
-
-def _convert_timetable_term_to_term(timetable_term):
- assert len(timetable_term) == 6
- assert timetable_term[:2] == "20"
- month = int(timetable_term[-2:])
- year = timetable_term[2:4]
- return "{year}{season}".format(
- year=year, season={1: "W", 3: "S", 6: "X", 9: "F"}[month]
- )
-
-
-def _parse_distribs(distribs_text):
- return distribs_text.split(" or ") if distribs_text else []
-
-
-def _parse_instructors(instructors):
- return instructors.split(", ") if instructors else []
-
-
-def _get_timetable_term_code(term):
- year, term = split_term(term)
- return "20{year}0{term_number}".format(
- year=year,
- term_number={"w": 1, "s": 3, "x": 6, "f": 9}[term.lower()],
- )
-
-
-def import_timetable(timetable_data):
- for course_data in timetable_data:
- _import_course_data(course_data)
-
-
-@transaction.atomic
-def _import_course_data(course_data):
- course = _get_or_import_course(course_data)
- offering = _update_or_import_offering(course_data, course)
- _update_crosslisted_courses(course_data, course)
- _update_distribs(course_data, course)
- _update_instructors(course_data, offering)
-
-
-def _get_or_import_course(course_data):
- course, _ = Course.objects.get_or_create(
- department=course_data["program"],
- number=course_data["number"],
- subnumber=course_data["subnumber"],
- defaults={
- "title": course_data["title"],
- "source": Course.SOURCES.TIMETABLE,
- },
- )
- return course
-
-
-def _update_or_import_offering(course_data, course):
- offering, _ = CourseOffering.objects.update_or_create(
- course=course,
- section=course_data["section"],
- term=course_data["term"],
- defaults={
- "period": course_data["period"],
- "limit": course_data["limit"],
- },
- )
- return offering
-
-
-def _update_crosslisted_courses(course_data, course):
- crosslisted_courses_data = course_data["crosslisted"]
- for crosslisted_course_data in crosslisted_courses_data:
- # We ignore missing courses because they should be created later in the
- # timetable importing process.
- crosslisted_course = Course.objects.filter(
- department=crosslisted_course_data["program"],
- number=crosslisted_course_data["number"],
- subnumber=crosslisted_course_data["subnumber"],
- ).first()
- if crosslisted_course:
- course.crosslisted_courses.add(crosslisted_course)
-
-
-def _update_distribs(course_data, course):
- for distrib_name in course_data["distribs"]:
- distrib, _ = DistributiveRequirement.objects.get_or_create(
- name=distrib_name,
- defaults={
- "distributive_type": DistributiveRequirement.DISTRIBUTIVE,
- },
- )
- course.distribs.add(distrib)
-
- if course_data["world_culture"]:
- distrib, _ = DistributiveRequirement.objects.get_or_create(
- name=course_data["world_culture"],
- defaults={
- "distributive_type": DistributiveRequirement.WORLD_CULTURE,
- },
- )
- course.distribs.add(distrib)
-
-
-def _update_instructors(course_data, offering):
- for instructor_name in course_data["instructor"]:
- instructor, _ = Instructor.objects.get_or_create(
- name=instructor_name,
- )
- offering.instructors.add(instructor)
diff --git a/lib/task_utils.py b/lib/task_utils.py
deleted file mode 100644
index cdda40e..0000000
--- a/lib/task_utils.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import socket
-import traceback
-from functools import wraps
-
-from django.conf import settings
-from django.core.mail import send_mail
-
-from lib import constants
-
-
-def email_if_fails(fn):
- @wraps(fn)
- def decorated(*args, **kwargs):
- try:
- return fn(*args, **kwargs)
- except:
- if not settings.DEBUG:
- try:
- fnName = fn.func_name
- except AttributeError:
- fnName = fn.__name__
- send_error_email(
- fnName, args, kwargs, socket.gethostname(), traceback.format_exc()
- )
- raise
-
- return decorated
-
-
-def send_error_email(fnName, args, kwargs, host, formatted_exc):
- formatted_exc = formatted_exc.strip()
- contents = (
- "Task: {fnName}\nArgs: {args}\nKwargs: {kwargs}\nHost: {host}\n"
- "Error: {error}".format(
- fnName=fnName,
- args=args,
- kwargs=kwargs,
- host=host,
- error=formatted_exc,
- )
- )
- short_exc = formatted_exc.rsplit("\n")[-1]
- subject = "[celery-error] {host} {fnName} {short_exc}".format(
- host=host,
- fnName=fnName,
- short_exc=short_exc,
- )
- send_mail(
- subject,
- contents,
- constants.SUPPORT_EMAIL,
- [email for _, email in settings.ADMINS],
- fail_silently=False,
- )