From dc0b1a8a92d8ab20eaa3a74064db3699959d64a6 Mon Sep 17 00:00:00 2001 From: Pachakutiq <101460915+PACHAKUTlQ@users.noreply.github.com> Date: Sun, 1 Mar 2026 14:54:50 +0800 Subject: [PATCH 1/4] fix: Remove legacy analytics and recommendations --- apps/analytics/__init__.py | 0 apps/analytics/forms.py | 61 ----- apps/analytics/tasks.py | 77 ------- apps/analytics/templates/analytics_email.txt | 20 -- apps/analytics/templates/dashboard.html | 91 -------- .../eligible_for_recommendations.html | 29 --- .../templates/sentiment_labeler.html | 23 -- apps/analytics/views.py | 216 ------------------ apps/recommendations/__init__.py | 0 apps/recommendations/admin.py | 5 - .../migrations/0001_initial.py | 54 ----- apps/recommendations/migrations/__init__.py | 0 apps/recommendations/models.py | 83 ------- apps/recommendations/tasks.py | 161 ------------- .../templates/recommendations.html | 49 ---- apps/recommendations/views.py | 21 -- 16 files changed, 890 deletions(-) delete mode 100644 apps/analytics/__init__.py delete mode 100644 apps/analytics/forms.py delete mode 100644 apps/analytics/tasks.py delete mode 100644 apps/analytics/templates/analytics_email.txt delete mode 100644 apps/analytics/templates/dashboard.html delete mode 100644 apps/analytics/templates/eligible_for_recommendations.html delete mode 100644 apps/analytics/templates/sentiment_labeler.html delete mode 100644 apps/analytics/views.py delete mode 100644 apps/recommendations/__init__.py delete mode 100644 apps/recommendations/admin.py delete mode 100644 apps/recommendations/migrations/0001_initial.py delete mode 100644 apps/recommendations/migrations/__init__.py delete mode 100644 apps/recommendations/models.py delete mode 100644 apps/recommendations/tasks.py delete mode 100644 apps/recommendations/templates/recommendations.html delete mode 100644 apps/recommendations/views.py diff --git a/apps/analytics/__init__.py b/apps/analytics/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/apps/analytics/forms.py b/apps/analytics/forms.py deleted file mode 100644 index df99442..0000000 --- a/apps/analytics/forms.py +++ /dev/null @@ -1,61 +0,0 @@ -from django import forms - -from apps.web.models import Review - - -class ManualSentimentForm(forms.Form): - DIFFICULTY_SENTIMENT_HARD = "-1" - DIFFICULTY_SENTIMENT_SOMEWHAT_HARD = "-0.5" - DIFFICULTY_SENTIMENT_NEUTRAL = "0" - DIFFICULTY_SENTIMENT_SOMEWHAT_EASY = "0.5" - DIFFICULTY_SENTIMENT_EASY = "1" - DIFFICULTY_SENTIMENT_CHOICES = [ - (DIFFICULTY_SENTIMENT_HARD, "Hard"), - (DIFFICULTY_SENTIMENT_SOMEWHAT_HARD, "Somewhat Hard"), - (DIFFICULTY_SENTIMENT_NEUTRAL, "Neutral"), - (DIFFICULTY_SENTIMENT_SOMEWHAT_EASY, "Somewhat Easy"), - (DIFFICULTY_SENTIMENT_EASY, "Easy"), - ] - - QUALITY_SENTIMENT_BAD = "-1" - QUALITY_SENTIMENT_SOMEWHAT_BAD = "-0.5" - QUALITY_SENTIMENT_NEUTRAL = "0" - QUALITY_SENTIMENT_SOMEWHAT_QUALITY = "0.5" - QUALITY_SENTIMENT_QUALITY = "1" - QUALITY_SENTIMENT_CHOICES = [ - (QUALITY_SENTIMENT_BAD, "Bad"), - (QUALITY_SENTIMENT_SOMEWHAT_BAD, "Somewhat Bad"), - (QUALITY_SENTIMENT_NEUTRAL, "Neutral"), - (QUALITY_SENTIMENT_SOMEWHAT_QUALITY, "Somewhat Good"), - (QUALITY_SENTIMENT_QUALITY, "Good"), - ] - - review_id = forms.IntegerField( - required=True, - widget=forms.HiddenInput(), - ) - difficulty_sentiment = forms.ChoiceField( - choices=DIFFICULTY_SENTIMENT_CHOICES, - initial=DIFFICULTY_SENTIMENT_NEUTRAL, - required=True, - widget=forms.RadioSelect(), - ) - quality_sentiment = forms.ChoiceField( - choices=QUALITY_SENTIMENT_CHOICES, - initial=QUALITY_SENTIMENT_NEUTRAL, - required=True, - widget=forms.RadioSelect(), - ) - - def clean_difficulty_sentiment(self): - return float(self.cleaned_data["difficulty_sentiment"]) - - def clean_quality_sentiment(self): - return float(self.cleaned_data["quality_sentiment"]) - - def save_sentiment(self): - review = Review.objects.get(id=self.cleaned_data["review_id"]) - review.sentiment_labeler = Review.MANUAL_SENTIMENT_LABELER - review.difficulty_sentiment = self.cleaned_data["difficulty_sentiment"] - review.quality_sentiment = self.cleaned_data["quality_sentiment"] - review.save() diff --git a/apps/analytics/tasks.py b/apps/analytics/tasks.py deleted file mode 100644 index 6295f6f..0000000 --- a/apps/analytics/tasks.py +++ /dev/null @@ -1,77 +0,0 @@ -from datetime import datetime, timedelta - -from celery import shared_task -from django.conf import settings -from django.contrib.auth.models import User -from django.core.mail import send_mail -from django.db.models import Q -from django.template import Context -from django.template.loader import get_template - -from apps.web.models import CourseOffering, Review, Vote -from lib import constants, task_utils, terms - - -@shared_task -@task_utils.email_if_fails -def send_analytics_email_update(lookback=timedelta(days=7)): - context = _get_analytics_email_context(lookback) - content = get_template("analytics_email.txt").render(Context(context)) - send_mail( - "Layup List Weekly Update", - content, - constants.SUPPORT_EMAIL, - [email for _, email in settings.ADMINS], - fail_silently=False, - ) - - -def _get_analytics_email_context(lookback): - changes_since = datetime.now() - lookback - new_query = Q(created_at__gte=changes_since) - users = User.objects.all() - quality_votes = Vote.objects.filter(category=Vote.CATEGORIES.QUALITY) - quality_upvotes = quality_votes.filter(value=1) - quality_downvotes = quality_votes.filter(value=-1) - difficulty_votes = Vote.objects.filter(category=Vote.CATEGORIES.DIFFICULTY) - difficulty_upvotes = difficulty_votes.filter(value=1) - difficulty_downvotes = difficulty_votes.filter(value=-1) - return { - "users": { - "all": users, - "new": users.filter(date_joined__gte=changes_since), - "unique_recent_logins": users.filter(last_login__gte=changes_since), - }, - "votes": { - "all_quality_upvotes": quality_upvotes, - "all_quality_downvotes": quality_downvotes, - "all_difficulty_upvotes": difficulty_upvotes, - "all_difficulty_downvotes": difficulty_downvotes, - "new_quality_upvotes": quality_upvotes.filter(new_query), - "new_quality_downvotes": quality_downvotes.filter(new_query), - "new_difficulty_upvotes": difficulty_upvotes.filter(new_query), - "new_difficulty_downvotes": difficulty_downvotes.filter(new_query), - }, - "reviews": { - "all": Review.objects.all(), - "new": Review.objects.filter(new_query), - }, - } - - -@shared_task -@task_utils.email_if_fails -def possibly_request_term_update(): - next_term = terms.get_next_term(constants.CURRENT_TERM) - next_term_count = CourseOffering.objects.filter(term=next_term).count() - if next_term_count >= constants.OFFERINGS_THRESHOLD_FOR_TERM_UPDATE: - send_mail( - "Term may be out of date ({} offerings with term {})".format( - next_term_count, next_term - ), - "Consider modifying the environment variable.", - constants.SUPPORT_EMAIL, - [email for _, email in settings.ADMINS], - fail_silently=False, - ) - return next_term_count diff --git a/apps/analytics/templates/analytics_email.txt b/apps/analytics/templates/analytics_email.txt deleted file mode 100644 index 056bb25..0000000 --- a/apps/analytics/templates/analytics_email.txt +++ /dev/null @@ -1,20 +0,0 @@ -{{ users.all.count }} users ({{ users.new.count }} new). - -{{ users.unique_recent_logins.count }} unique users logged in this past week. - -{{ votes.all_quality_upvotes.count }} quality upvotes ({{ votes.new_quality_upvotes.count }} new). - -{{ votes.all_quality_downvotes.count }} quality downvotes ({{ votes.new_quality_downvotes.count }} new). - -{{ votes.all_difficulty_upvotes.count }} difficulty upvotes ({{ votes.new_difficulty_upvotes.count }} new). - -{{ votes.all_difficulty_downvotes.count }} difficulty downvotes ({{ votes.new_difficulty_downvotes.count }} new). - -{{ reviews.all.count }} reviews ({{ reviews.new.count }} new). - -{% for review in reviews.new %} -{{ review.term }} {{ review.course }} {{ review.professor }} {{ review.user }} - -{{ review.comments }} - -{% endfor %} diff --git a/apps/analytics/templates/dashboard.html b/apps/analytics/templates/dashboard.html deleted file mode 100644 index ec389a5..0000000 --- a/apps/analytics/templates/dashboard.html +++ /dev/null @@ -1,91 +0,0 @@ -{% extends "base.html" %} -{% block title %}Analytics | Layup List{% endblock %} -{% block content %} -{% load admin_urls %} -
-
-

Analytics

-

{{ activated_accounts }} users are activated.

-

View Crawled Data

-

Sentiment Labeler - - - - - - - - - - {% for c1, c2, c3, c4, c5 in overall_table %} - - - - - - - - {% endfor %} - -
RangeUsersQuality RatingsDifficulty RatingsReviews
{{ c1}}{{ c2 }}{{ c3 }}{{ c4 }}{{ c5 }}
-

-
- -
-
-

Recommendations

-

View users eligible for recommendations.

- -
-
- -
-
-

Vote Breakdown

- - - - - - - - - {% for c1, c2, c3, c4 in vote_table %} - - - - - - - {% endfor %} - -
RangeQuality RatingsDifficulty RatingsTotal Unvotes
{{ c1}}{{ c2 }}{{ c3 }}{{ c4 }}
-
-
- -
-
-

Class Breakdown

- - - - {% for year, count in class_breakdown %} - - {% endfor %} - - - - - {% for year, count in class_breakdown %} - - {% endfor %} - - -
Class Year{{ year }}
# Users{{ count }}
-
-
-{% endblock %} diff --git a/apps/analytics/templates/eligible_for_recommendations.html b/apps/analytics/templates/eligible_for_recommendations.html deleted file mode 100644 index 317e51c..0000000 --- a/apps/analytics/templates/eligible_for_recommendations.html +++ /dev/null @@ -1,29 +0,0 @@ -{% extends "base.html" %} -{% block title %}Eligible For Recommendations | Layup List{% endblock %} -{% block content %} -{% load admin_urls %} -
-
-

Eligible For Recommendations ({{ users_and_votes | length }})

-
-
- -
-
- - - - - - - {% for user, user_id, vote_count in users_and_votes %} - - - - - {% endfor %} - -
Good UpvotesEmail
{{ vote_count }}{{ user }}
-
-
-{% endblock %} diff --git a/apps/analytics/templates/sentiment_labeler.html b/apps/analytics/templates/sentiment_labeler.html deleted file mode 100644 index db3e032..0000000 --- a/apps/analytics/templates/sentiment_labeler.html +++ /dev/null @@ -1,23 +0,0 @@ -{% extends "base.html" %} -{% block title %}Sentiment Labeler | Layup List{% endblock %} -{% block content %} -
-
-

Sentiment Labeler ({{ count }} remaining, {{ labeled_count }} labeled)

-

{{ review.course }} ({{ review.id }})

-

- {% if review.term %} - {{ review.term }} - {% if review.professor %} with {{ review.professor }}{% endif %}: - {% endif %} - {{ review.comments }} -

-
- {% load crispy_forms_tags %} - {% csrf_token %} - {{ form | crispy }} - -
-
-
-{% endblock %} diff --git a/apps/analytics/views.py b/apps/analytics/views.py deleted file mode 100644 index 5422f04..0000000 --- a/apps/analytics/views.py +++ /dev/null @@ -1,216 +0,0 @@ -import datetime -from collections import Counter -from random import randint - -import pytz -from django.contrib.admin.views.decorators import staff_member_required -from django.contrib.auth.decorators import user_passes_test -from django.contrib.auth.models import User -from django.db.models import Count -from django.shortcuts import render -from django.views.decorators.http import require_safe - -from apps.analytics.forms import ManualSentimentForm -from apps.recommendations.models import Recommendation -from apps.web import models -from lib import constants - -LIMIT = 15 - - -@require_safe -@staff_member_required -def home(request): - course_picker = User.objects.get(username="CoursePicker") - - non_zero_votes = models.Vote.objects.exclude(value=0) - num_voters = non_zero_votes.values_list("user").distinct().count() - num_quality_voters = ( - non_zero_votes.filter(category=models.Vote.CATEGORIES.QUALITY) - .values_list("user") - .distinct() - .count() - ) - num_difficulty_voters = ( - non_zero_votes.filter(category=models.Vote.CATEGORIES.DIFFICULTY) - .values_list("user") - .distinct() - .count() - ) - num_reviewers = ( - models.Review.objects.exclude(user=course_picker) - .values_list("user") - .distinct() - .count() - ) - - now = datetime.datetime.now(tz=pytz.timezone("US/Eastern")) - month_ago = ("Month", now - datetime.timedelta(days=31)) - week_ago = ("Week", now - datetime.timedelta(weeks=1)) - today = ("Today", now - datetime.timedelta(hours=24)) - - overall_table = [ - ( - "Total", - User.objects.count(), - models.Vote.objects.exclude(value=0) - .filter(category=models.Vote.CATEGORIES.QUALITY) - .count(), - models.Vote.objects.exclude(value=0) - .filter(category=models.Vote.CATEGORIES.DIFFICULTY) - .count(), - "{} ({} exclusive)".format( - models.Review.objects.count(), - models.Review.objects.exclude(user=course_picker).count(), - ), - ) - ] - for name, earliest_date in [month_ago, week_ago, today]: - non_zero_votes_since = non_zero_votes.filter(created_at__gte=earliest_date) - overall_table.append( - ( - name, - User.objects.filter(date_joined__gte=earliest_date).count(), - non_zero_votes_since.filter( - category=models.Vote.CATEGORIES.QUALITY - ).count(), - non_zero_votes_since.filter( - category=models.Vote.CATEGORIES.DIFFICULTY - ).count(), - models.Review.objects.filter(created_at__gte=earliest_date).count(), - ) - ) - - vote_table = [ - ( - "Total", - models.Vote.objects.filter( - value__gte=1, category=models.Vote.CATEGORIES.QUALITY - ).count(), - models.Vote.objects.filter( - value__gte=1, category=models.Vote.CATEGORIES.DIFFICULTY - ).count(), - models.Vote.objects.filter(value=0).count(), - ) - ] - for name, earliest_date in [month_ago, week_ago, today]: - vote_table.append( - ( - name, - models.Vote.objects.filter( - value__gte=1, - category=models.Vote.CATEGORIES.QUALITY, - created_at__gte=earliest_date, - ).count(), - models.Vote.objects.filter( - value__gte=1, - category=models.Vote.CATEGORIES.DIFFICULTY, - created_at__gte=earliest_date, - ).count(), - models.Vote.objects.filter( - value=0, created_at__gte=earliest_date - ).count(), - ) - ) - - usernames = User.objects.exclude(id=course_picker.id).values_list( - "username", flat=True - ) - c = Counter() - for username in usernames: - year_string = username.split(".")[-1] - c[year_string] += 1 - class_breakdown = sorted( - [ - ( - year, - count, - ) - for year, count in c.items() - if len(year) == 2 - ] - ) - - recommendations_last_updated = [] - for creator, description in Recommendation.CREATORS: - rec = Recommendation.objects.filter(creator=creator).order_by("created_at")[:1] - if rec: - recommendations_last_updated.append((description, rec[0].created_at)) - else: - recommendations_last_updated.append((description, "never")) - - return render( - request, - "dashboard.html", - { - "overall_table": overall_table, - "vote_table": vote_table, - "num_voters": num_voters, - "num_quality_voters": num_quality_voters, - "num_difficulty_voters": num_difficulty_voters, - "num_reviewers": num_reviewers, - "recommendations_last_updated": recommendations_last_updated, - "activated_accounts": User.objects.filter(is_active=True).count(), - "class_breakdown": class_breakdown, - }, - ) - - -@require_safe -@staff_member_required -@user_passes_test(lambda u: u.is_superuser) -def eligible_for_recommendations(request): - eligible_users_and_votes = ( - models.Vote.objects.filter( - value__gte=4, category=models.Vote.CATEGORIES.QUALITY - ) - .values_list("user") - .annotate(vote_count=Count("user")) - .filter(vote_count__gte=constants.REC_UPVOTE_REQ) - .order_by("-vote_count") - .values_list("user__username", "user", "vote_count") - ) - return render( - request, - "eligible_for_recommendations.html", - {"users_and_votes": eligible_users_and_votes}, - ) - - -@staff_member_required -@user_passes_test(lambda u: u.is_superuser) -def sentiment_labeler(request): - if request.method == "POST": - form = ManualSentimentForm(request.POST) - if form.is_valid(): - form.save_sentiment() - else: - return render( - request, - "sentiment_labeler.html", - { - "review": models.Review.objects.get(id=form.review_id), - "form": form, - }, - ) - unlabeled_reviews = models.Review.objects.filter( - user=User.objects.get(username="CoursePicker"), - ).exclude( - sentiment_labeler=models.Review.MANUAL_SENTIMENT_LABELER, - ) - count = unlabeled_reviews.count() - random_index = randint(0, count - 1) - review = unlabeled_reviews[random_index] - form = ManualSentimentForm(initial={"review_id": review.id}) - return render( - request, - "sentiment_labeler.html", - { - "count": count, - "labeled_count": models.Review.objects.filter( - sentiment_labeler=models.Review.MANUAL_SENTIMENT_LABELER - ).count(), - "form": form, - "review": review, - }, - ) diff --git a/apps/recommendations/__init__.py b/apps/recommendations/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/apps/recommendations/admin.py b/apps/recommendations/admin.py deleted file mode 100644 index 4ae9c82..0000000 --- a/apps/recommendations/admin.py +++ /dev/null @@ -1,5 +0,0 @@ -from django.contrib import admin - -from .models import Recommendation - -admin.site.register(Recommendation) diff --git a/apps/recommendations/migrations/0001_initial.py b/apps/recommendations/migrations/0001_initial.py deleted file mode 100644 index cb06bb0..0000000 --- a/apps/recommendations/migrations/0001_initial.py +++ /dev/null @@ -1,54 +0,0 @@ -# Generated by Django 5.0.8 on 2024-08-22 14:35 - -import django.db.models.deletion -from django.db import migrations, models - - -class Migration(migrations.Migration): - initial = True - - dependencies = [ - ("web", "0002_alter_student_unauth_session_ids"), - ] - - operations = [ - migrations.CreateModel( - name="Recommendation", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ( - "creator", - models.CharField( - choices=[("docsim", "Document Similarity")], max_length=16 - ), - ), - ("weight", models.FloatField(null=True)), - ("created_at", models.DateTimeField(auto_now_add=True)), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "course", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="recommendations", - to="web.course", - ), - ), - ( - "recommendation", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="recommenders", - to="web.course", - ), - ), - ], - ), - ] diff --git a/apps/recommendations/migrations/__init__.py b/apps/recommendations/migrations/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/apps/recommendations/models.py b/apps/recommendations/models.py deleted file mode 100644 index 68a2c68..0000000 --- a/apps/recommendations/models.py +++ /dev/null @@ -1,83 +0,0 @@ -from __future__ import unicode_literals - -from django.db import models -from django.db.models import Q - -from apps.web.models import Course, CourseOffering, Vote - - -class GroupedRecommendation(object): - def __init__(self, course): - self.course = course - self.weight = 0.0 - self.recs = [] - - def reason(self): - return " ".join([r.course.short_name() for r in self.recs]) - - -class RecommendationManager(models.Manager): - def for_user(self, user, all_terms=False): - interacted_courses = Vote.objects.filter(user=user).exclude(value=0) - interacted_course_ids = interacted_courses.values_list("course_id", flat=True) - crosslisted_interacted_course_ids = Course.objects.filter( - crosslisted_courses__in=interacted_course_ids - ).values_list("id", flat=True) - upvoted_course_ids = interacted_courses.filter( - value=1, category=Vote.CATEGORIES.QUALITY - ).values_list("course_id", flat=True) - - recommendations = self.filter(course_id__in=upvoted_course_ids).exclude( - Q(recommendation_id__in=interacted_course_ids) - | Q(recommendation_id__in=crosslisted_interacted_course_ids) - ) - - if not all_terms: - recommendations = recommendations.filter( - recommendation_id__in=CourseOffering.objects.course_ids_for_term() - ) - - recommendations = recommendations.prefetch_related( - "course", - "recommendation", - "recommendation__distribs", - "recommendation__review_set", - "recommendation__courseoffering_set", - ).order_by("-weight")[:500] - - grouped_recs = {} - for rec in recommendations: - grouped_recs[rec.recommendation] = grouped_recs.get( - rec.recommendation, GroupedRecommendation(rec.recommendation) - ) - grouped_recs[rec.recommendation].weight += rec.weight - grouped_recs[rec.recommendation].recs.append(rec) - - sorted_grouped_recs = sorted(grouped_recs.values(), key=lambda x: -x.weight) - - return sorted_grouped_recs[:30] - - -class Recommendation(models.Model): - objects = RecommendationManager() - - DOCUMENT_SIMILARITY = "docsim" - CREATORS = ((DOCUMENT_SIMILARITY, "Document Similarity"),) - - course = models.ForeignKey( - "web.Course", related_name="recommendations", on_delete=models.CASCADE - ) - recommendation = models.ForeignKey( - "web.Course", related_name="recommenders", on_delete=models.CASCADE - ) - - creator = models.CharField(max_length=16, choices=CREATORS) - weight = models.FloatField(null=True) - - created_at = models.DateTimeField(auto_now_add=True) - updated_at = models.DateTimeField(auto_now=True) - - def __unicode__(self): - return "{} {} -> {}".format( - self.weight, self.course.short_name(), self.recommendation - ) diff --git a/apps/recommendations/tasks.py b/apps/recommendations/tasks.py deleted file mode 100644 index 72b3b1c..0000000 --- a/apps/recommendations/tasks.py +++ /dev/null @@ -1,161 +0,0 @@ -import re -from itertools import chain -from time import time - -import numpy as np -from celery import shared_task -from django.db import transaction -from django.db.models import Q -from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer - -from apps.recommendations.models import Recommendation -from apps.web.models import Course -from lib import task_utils - -MIN_COURSE_DESCRIPTION_LENGTH = 80 -RECOMMENDATIONS_PER_CLASS = 8 - -PERFORM_TFIDF = True -INCLUDE_REVIEWS = False # very noisy, turn off - - -@shared_task -@task_utils.email_if_fails -def generate_course_description_similarity_recommendations(): - t0 = time() - print("loading word jumbles into memory...") - course_ids = [] - reverse_course_ids = {} - course_descriptions = [] - i = 0 - for course in Course.objects.exclude(description=None).exclude(description=""): - if len(course.description) < MIN_COURSE_DESCRIPTION_LENGTH: - # these are typically uninteresting classes e.g. thesis - continue - course_ids.append(course.id) - reverse_course_ids[course.id] = i - word_jumble = [_clean_text_to_raw_words(course.description)] - word_jumble.append(_clean_text_to_raw_words(course.title)) - # reviews are very noisy -- they tend to be similar between classes - if INCLUDE_REVIEWS: - for review in course.review_set.all(): - word_jumble.append(_clean_text_to_raw_words(review.comments)) - course_descriptions.append(" ".join(word_jumble)) - i += 1 - print(f"finished in {time() - t0}") - - t0 = time() - print("fitting to count vectorizer...") - count_vect = CountVectorizer() - corpus = count_vect.fit_transform(course_descriptions) - print(f"shape is {corpus.shape}") - print(f"finished in {time() - t0}") - - if PERFORM_TFIDF: - t0 = time() - print("tfidf transform...") - tfidf_transformer = TfidfTransformer() - corpus = tfidf_transformer.fit_transform(corpus) - print(f"shape is {corpus.shape}") - print(f"finished in {time() - t0}") - - t0 = time() - print("compute cosine similarity ") - pairwise_similarity = corpus * corpus.T - print(f"shape is {pairwise_similarity.shape}") - print(f"finished in {time() - t0}") - - t0 = time() - print("calculating and creating recommendations...") - psarray = pairwise_similarity.toarray() - - # zero out columns corresponding to thesis, research, independent, and grad - course_ids_to_zero = Course.objects.filter( - Q(title__icontains="thesis") - | Q(title__icontains="research") - | Q(title__icontains="independent") - | Q(title__icontains="seminar") - | Q(title__icontains="first-year") - | Q(title__icontains="foreign study") - | Q(title__icontains="senior") - | Q(title__icontains="honors") - | Q(number__gt=99) - ).values_list("id", flat=True) - for zero_id in course_ids_to_zero: - if zero_id in reverse_course_ids: - psarray[:, reverse_course_ids[zero_id]] = 0 - - # zero out crosslistings and same titles, so only one rep for each - # crosslisting - covered_ids = set() - for i in range(psarray.shape[1]): - if i in covered_ids: - continue - course_id = course_ids[i] - course = Course.objects.get(id=course_id) - for xlist_course in list( - chain( - course.crosslisted_courses.all(), - Course.objects.filter(title=course.title), - ) - ): - if xlist_course == course: - continue - if xlist_course.id in reverse_course_ids: - xlist_col = reverse_course_ids[xlist_course.id] - if xlist_course.id not in covered_ids: - psarray[:, xlist_col] = 0 - covered_ids.add(xlist_col) - covered_ids.add(i) - - recommendations_to_create = [] - for i in range(psarray.shape[0]): - current_class = Course.objects.get(id=course_ids[i]) - - # zero out the diagonal - zero_ids = [i] - - # zero out crosslisted classes - zero_ids += list(current_class.crosslisted_courses.values_list("id", flat=True)) - - # zero out classes with the same title - zero_ids += list( - Course.objects.filter(title=current_class.title).values_list( - "id", flat=True - ) - ) - - for zero_id in zero_ids: - if zero_id in reverse_course_ids: - psarray[i, reverse_course_ids[zero_id]] = 0 - - for other_i in np.argpartition(psarray[i, :], -RECOMMENDATIONS_PER_CLASS)[ - -RECOMMENDATIONS_PER_CLASS: - ]: - course_id = course_ids[other_i] - - recommendations_to_create.append( - Recommendation( - course=current_class, - recommendation_id=course_id, - creator=Recommendation.DOCUMENT_SIMILARITY, - weight=psarray[i, other_i], - ) - ) - - with transaction.atomic(): - Recommendation.objects.filter( - creator=Recommendation.DOCUMENT_SIMILARITY - ).delete() - Recommendation.objects.bulk_create(recommendations_to_create) - - print(f"finished in {time() - t0}") - - -def _clean_text_to_raw_words(text): - if text: - return " ".join( - [w for w in re.sub(r"[^a-zA-Z ]", "", text).lower().split() if len(w) > 3] - ) - else: - return "" diff --git a/apps/recommendations/templates/recommendations.html b/apps/recommendations/templates/recommendations.html deleted file mode 100644 index 70ea74f..0000000 --- a/apps/recommendations/templates/recommendations.html +++ /dev/null @@ -1,49 +0,0 @@ -{% extends "base.html" %} -{% block title %}Recommendations | Layup List{% endblock %} -{% block content %} -
-
-

{% if request.GET.show_all %}All{% else %}{{ constants.CURRENT_TERM }}{% endif %} Recommended For You

- {% if request.user.student.can_see_recommendations %} - {% if recommendations %} -

These courses have similar course descriptions to courses you have upvoted. If you vote on any of the classes on this list they will go away.

-

Want better recommendations? Upvote classes that you think are good!

-

- {% if request.GET.show_all %} - Show {{ constants.CURRENT_TERM}} only. - {% else %} - Show all terms. - {% endif %} -

- - - - - - - {% for rec in recommendations %} - - - - - {% endfor %} - -
CourseSimilar To
- {% include "components/course_summary.html" with course=rec.course only %} - -

{{ rec.reason }}

-
- {% else %} - No recommendations to show. Try upvoting more classes! - {% endif %} - {% else %} -
- To see course recommendations, you must: -
    -
  • Have upvoted {{ constants.REC_UPVOTE_REQ }} classes as good.
  • -
-
- {% endif %} -
-
-{% endblock %} diff --git a/apps/recommendations/views.py b/apps/recommendations/views.py deleted file mode 100644 index d4c5f52..0000000 --- a/apps/recommendations/views.py +++ /dev/null @@ -1,21 +0,0 @@ -from django.contrib.auth.decorators import login_required -from django.shortcuts import render -from django.views.decorators.http import require_safe - -from apps.recommendations.models import Recommendation -from lib import constants - - -@require_safe -@login_required -def recommendations(request): - return render( - request, - "recommendations.html", - { - "recommendations": Recommendation.objects.for_user( - request.user, "show_all" in request.GET - ), - "constants": constants, - }, - ) From ac95f39f5a1988da8372a8f6deac607bef7f7e61 Mon Sep 17 00:00:00 2001 From: Pachakutiq <101460915+PACHAKUTlQ@users.noreply.github.com> Date: Sun, 1 Mar 2026 14:55:31 +0800 Subject: [PATCH 2/4] fix: Remove legacy django html templates --- .../spider/templates/crawled_data_detail.html | 21 ------ apps/spider/templates/crawled_data_list.html | 33 --------- apps/web/templates/base.html | 73 ------------------- apps/web/templates/confirmation.html | 28 ------- apps/web/templates/footer.html | 8 -- apps/web/templates/instructions.html | 24 ------ apps/web/templates/login.html | 41 ----------- apps/web/templates/logout.html | 21 ------ apps/web/templates/navbar.html | 55 -------------- .../templates/password_reset_complete.html | 12 --- .../web/templates/password_reset_confirm.html | 26 ------- apps/web/templates/password_reset_done.html | 12 --- apps/web/templates/password_reset_email.html | 15 ---- apps/web/templates/password_reset_form.html | 20 ----- apps/web/templates/signup.html | 37 ---------- 15 files changed, 426 deletions(-) delete mode 100644 apps/spider/templates/crawled_data_detail.html delete mode 100644 apps/spider/templates/crawled_data_list.html delete mode 100644 apps/web/templates/base.html delete mode 100644 apps/web/templates/confirmation.html delete mode 100644 apps/web/templates/footer.html delete mode 100644 apps/web/templates/instructions.html delete mode 100644 apps/web/templates/login.html delete mode 100644 apps/web/templates/logout.html delete mode 100644 apps/web/templates/navbar.html delete mode 100644 apps/web/templates/password_reset_complete.html delete mode 100644 apps/web/templates/password_reset_confirm.html delete mode 100644 apps/web/templates/password_reset_done.html delete mode 100644 apps/web/templates/password_reset_email.html delete mode 100644 apps/web/templates/password_reset_form.html delete mode 100644 apps/web/templates/signup.html diff --git a/apps/spider/templates/crawled_data_detail.html b/apps/spider/templates/crawled_data_detail.html deleted file mode 100644 index 372c36a..0000000 --- a/apps/spider/templates/crawled_data_detail.html +++ /dev/null @@ -1,21 +0,0 @@ -{% extends "base.html" %} -{% block title %}{{ crawled_data }}| Layup List{% endblock %} -{% block content %} -
-
-

{{ crawled_data }}

- {% if crawled_data.has_change %} -

Pending

-
- {% csrf_token %} - -
-
{{ crawled_data.diff }}
- {% else %} -

Up to date

-
{{ crawled_data.pretty_current_data }}
- {% endif %} - -
-
-{% endblock %} diff --git a/apps/spider/templates/crawled_data_list.html b/apps/spider/templates/crawled_data_list.html deleted file mode 100644 index a799b5e..0000000 --- a/apps/spider/templates/crawled_data_list.html +++ /dev/null @@ -1,33 +0,0 @@ -{% extends "base.html" %} -{% block title %}Spider | Layup List{% endblock %} -{% block content %} -
-
-

Spider

-
- {% csrf_token %} - -
- - - - - - - - - - {% for crawled_data in crawled_datas %} - - - - - - {% endfor %} - -
ResourceTypePending Changes
{{ crawled_data.resource }}{{ crawled_data.data_type }} - {% if crawled_data.has_change %}PENDING{% endif %} -
-
-
-{% endblock %} diff --git a/apps/web/templates/base.html b/apps/web/templates/base.html deleted file mode 100644 index 7c8cda1..0000000 --- a/apps/web/templates/base.html +++ /dev/null @@ -1,73 +0,0 @@ -{% load static %} -{% load pipeline %} - - - - - - - {% block title %}UMJI's Course Review Site{% endblock %} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/apps/web/templates/confirmation.html b/apps/web/templates/confirmation.html deleted file mode 100644 index d4a3ec5..0000000 --- a/apps/web/templates/confirmation.html +++ /dev/null @@ -1,28 +0,0 @@ -{% extends "base.html" %} {% block content %} - -
-
-
-
-
-
-
-
- -
-
- {% if error %} -

{{ error }}

- {% elif already_confirmed %} -

This email is already confirmed. Please login.

- {% else %} -

Email successfully confirmed! Please click here to login.

- {% endif %} -
-
-
-
-
-
- -{% endblock %} diff --git a/apps/web/templates/footer.html b/apps/web/templates/footer.html deleted file mode 100644 index ce67c25..0000000 --- a/apps/web/templates/footer.html +++ /dev/null @@ -1,8 +0,0 @@ - diff --git a/apps/web/templates/instructions.html b/apps/web/templates/instructions.html deleted file mode 100644 index 722a83b..0000000 --- a/apps/web/templates/instructions.html +++ /dev/null @@ -1,24 +0,0 @@ -{% extends "base.html" %} {% block content %} -
-
-
-

-

- -
-
-
-
-
-
-
-

Registration successful. Please check your email (and in spam folder) for a confirmation link and follow the instructions to activate your account.

-
-
-
- -
-
-
- -{% endblock %} diff --git a/apps/web/templates/login.html b/apps/web/templates/login.html deleted file mode 100644 index b5600cb..0000000 --- a/apps/web/templates/login.html +++ /dev/null @@ -1,41 +0,0 @@ -{% extends "base.html" %} {% block content %} -
-
-
-

Login

-
-
-
-
-
- -
-
-

{{ error }}

-
-
-
-
-
-
- {% csrf_token %} - - -
- - -
- -
-
-
-
-
-
-
-

Don't have an account yet? Sign up here.

-

Reset password

-
-
-
-{% endblock %} diff --git a/apps/web/templates/logout.html b/apps/web/templates/logout.html deleted file mode 100644 index d94bfb1..0000000 --- a/apps/web/templates/logout.html +++ /dev/null @@ -1,21 +0,0 @@ -{% extends "base.html" %} {% block content %} -
-
-
-
- -
-
-

You are successfully logged out. If this was a mistake, please click here to login.

- -
-
- - - -
- -
-
-
-{% endblock %} diff --git a/apps/web/templates/navbar.html b/apps/web/templates/navbar.html deleted file mode 100644 index b2d5ec2..0000000 --- a/apps/web/templates/navbar.html +++ /dev/null @@ -1,55 +0,0 @@ -{% load static %} - - diff --git a/apps/web/templates/password_reset_complete.html b/apps/web/templates/password_reset_complete.html deleted file mode 100644 index 9e0a64b..0000000 --- a/apps/web/templates/password_reset_complete.html +++ /dev/null @@ -1,12 +0,0 @@ -{% extends "base.html" %} - -{% block title %}Password reset complete{% endblock %} - -{% block content %} -
-
-

Your password has been set. You may go ahead and log in now.

-

Log in

-
-
-{% endblock %} diff --git a/apps/web/templates/password_reset_confirm.html b/apps/web/templates/password_reset_confirm.html deleted file mode 100644 index bad4f91..0000000 --- a/apps/web/templates/password_reset_confirm.html +++ /dev/null @@ -1,26 +0,0 @@ -{% extends "base.html" %} -{% block title %}Set New Password{% endblock %} -{% block content %} -
-
- {% if validlink %} -

Please enter your new password twice.
- So we can verify you typed it in correctly.

- -
-
- {% load crispy_forms_tags %} - {% csrf_token %} - {{ form | crispy }} - -
-
- {% else %} -

Password reset unsuccessful

-

The password reset link was invalid,
- possibly because it has already been used.
- Please request a new password reset.

- {% endif %} -
-
-{% endblock %} diff --git a/apps/web/templates/password_reset_done.html b/apps/web/templates/password_reset_done.html deleted file mode 100644 index 9142177..0000000 --- a/apps/web/templates/password_reset_done.html +++ /dev/null @@ -1,12 +0,0 @@ -{% extends "base.html" %} -{% block title %} - Password reset successful -{% endblock %} -{% block content %} -
-
-

We've e-mailed you instructions for setting your password to the e-mail address you submitted.

-

You should be receiving it shortly.

-
-
-{% endblock %} diff --git a/apps/web/templates/password_reset_email.html b/apps/web/templates/password_reset_email.html deleted file mode 100644 index 96bbc35..0000000 --- a/apps/web/templates/password_reset_email.html +++ /dev/null @@ -1,15 +0,0 @@ -{% autoescape off %} -You're receiving this e-mail because you requested a password reset for your user account at {{ site_name }}. - -Please go to the following page and choose a new password: -{% block reset_link %} -https://www.layuplist.com{% url 'password_reset_confirm' uidb64=uid token=token %} -{% endblock %} - -Your username, in case you've forgotten: {{ user.username }} - -Thanks for using our site! - -LL - -{% endautoescape %} diff --git a/apps/web/templates/password_reset_form.html b/apps/web/templates/password_reset_form.html deleted file mode 100644 index 08bb429..0000000 --- a/apps/web/templates/password_reset_form.html +++ /dev/null @@ -1,20 +0,0 @@ -{% extends "base.html" %} -{% block title %}Reset Password{% endblock %} -{% block content %} -
-
-

Forgotten your password? Enter your email address below, and we'll email instructions for setting a new one.

-
-
- -
-
- {% load crispy_forms_tags %} - {% csrf_token %} - {{ form | crispy }} - -
-
-
-
-{% endblock %} diff --git a/apps/web/templates/signup.html b/apps/web/templates/signup.html deleted file mode 100644 index cbcbeca..0000000 --- a/apps/web/templates/signup.html +++ /dev/null @@ -1,37 +0,0 @@ -{% extends "base.html" %} {% block content %} -
-
-

Sign up

-
-
-
-
-
- -
-
-

- {{ error }} - {% if request.GET.restriction %} - Please signup to {{ request.GET.restriction }}. - {% endif %} -

-
-
- -
-
-
- {% load crispy_forms_tags %} - {% csrf_token %} - {{ form | crispy }} - -
-
-
-
-
-

Already have an account? Login here.

-
-
-{% endblock %} From 2b5973d73552a44ccbdb6b029311cc4c064a07f4 Mon Sep 17 00:00:00 2001 From: Pachakutiq <101460915+PACHAKUTlQ@users.noreply.github.com> Date: Sun, 1 Mar 2026 14:56:58 +0800 Subject: [PATCH 3/4] fix: Rename backend admin static url to /static/ --- website/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/settings.py b/website/settings.py index d281cf3..be6bdff 100644 --- a/website/settings.py +++ b/website/settings.py @@ -152,7 +152,7 @@ } ] -STATIC_URL = "/dummy/" # Required by Django staticfiles but not used in this setup +STATIC_URL = "/static/" # Required by Django staticfiles but not used in this setup DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" LANGUAGE_CODE = "en-us" From 228bf9d9b02d6ab6b68ed58f44622948575a3dc3 Mon Sep 17 00:00:00 2001 From: Pachakutiq <101460915+PACHAKUTlQ@users.noreply.github.com> Date: Sun, 1 Mar 2026 15:02:16 +0800 Subject: [PATCH 4/4] fix: Remove legacy crawler functions --- apps/spider/crawlers/medians.py | 113 ---------------- apps/spider/crawlers/orc.py | 36 ----- apps/spider/crawlers/timetable.py | 218 ------------------------------ lib/task_utils.py | 54 -------- 4 files changed, 421 deletions(-) delete mode 100644 apps/spider/crawlers/medians.py delete mode 100644 apps/spider/crawlers/timetable.py delete mode 100644 lib/task_utils.py diff --git a/apps/spider/crawlers/medians.py b/apps/spider/crawlers/medians.py deleted file mode 100644 index 35af933..0000000 --- a/apps/spider/crawlers/medians.py +++ /dev/null @@ -1,113 +0,0 @@ -from urllib.parse import urljoin - -from apps.spider.utils import ( - clean_department_code, - parse_number_and_subnumber, - retrieve_soup, -) -from apps.web.models import Course, CourseMedian - -MEDIAN_PAGE_INDEX_URL = "http://www.dartmouth.edu/reg/transcript/medians/" -MEDIANS_URL_FMT = "http://www.dartmouth.edu/reg/transcript/medians/{term}.html" - - -def get_term_from_median_page_url(url): - return url.split("/")[-1].split(".")[0] - - -def crawl_median_page_urls(): - soup = retrieve_soup(MEDIAN_PAGE_INDEX_URL) - return _retrieve_term_medians_urls_from_soup(soup) - - -def _retrieve_term_medians_urls_from_soup(soup): - return [ - urljoin("http://www.dartmouth.edu", a["href"]) - for a in soup.find_all("a", href=True) - if _is_term_page_url(a["href"]) - ] - - -def _is_term_page_url(url): - term = get_term_from_median_page_url(url) - return url == "/reg/transcript/medians/{term}.html".format(term=term) - - -def crawl_term_medians_for_url(url): - soup = retrieve_soup(url) - table_rows = soup.find("table").find("tbody").find_all("tr") - medians = [_convert_table_row_to_dict(table_row) for table_row in table_rows] - medians.sort(cmp=_median_dict_sorter) - return medians - - -def _median_dict_sorter(a, b): - a_section, b_section = a["section"], b["section"] - a, b = a["course"], b["course"] - a_department, b_department = a["department"], b["department"] - a_number, b_number = a["number"], b["number"] - a_subnumber, b_subnumber = a.get("subnumber"), b.get("subnumber") - if a_department == b_department: - if a_number == b_number: - if a_subnumber == b_subnumber: - return int(a_section) - int(b_section) - else: - if a_subnumber is None: - return -1 - if b_subnumber is None: - return 1 - return int(a_subnumber) - int(b_subnumber) - else: - return int(a_number) - int(b_number) - else: - return -1 if a_department < b_department else 1 - - -def _convert_table_row_to_dict(table_row): - median_data = table_row.find_all("td") - term = median_data[0].get_text(strip=True) - course = median_data[1].get_text(strip=True) - department = clean_department_code(course.split("-")[0]) - enrollment = int(median_data[2].get_text(strip=True)) - section = int(course.split("-")[2]) - median = median_data[3].get_text(strip=True) - number, subnumber = parse_number_and_subnumber(course.split("-")[1]) - median_dict = { - "course": { - "department": department, - "number": number, - "subnumber": subnumber, - }, - "enrollment": enrollment, - "median": median, - "section": section, - "term": term, - } - return median_dict - - -def import_medians(data): - for median_data in data: - import_median(median_data) - - -def import_median(median_data): - try: - course = Course.objects.get( - department=median_data["course"]["department"], - number=median_data["course"]["number"], - subnumber=median_data["course"]["subnumber"], - ) - except Course.DoesNotExist: - print("Could not find course for {}".format(median_data["course"])) - return - median, _ = CourseMedian.objects.update_or_create( - course=course, - section=median_data["section"], - term=median_data["term"], - defaults={ - "enrollment": median_data["enrollment"], - "median": median_data["median"], - }, - ) - return median diff --git a/apps/spider/crawlers/orc.py b/apps/spider/crawlers/orc.py index aba3fdd..529bf3a 100644 --- a/apps/spider/crawlers/orc.py +++ b/apps/spider/crawlers/orc.py @@ -7,36 +7,12 @@ BASE_URL = "https://www.ji.sjtu.edu.cn/" ORC_BASE_URL = urljoin(BASE_URL, "/academics/courses/courses-by-number/") -# ORC_UNDERGRAD_SUFFIX = "Departments-Programs-Undergraduate" -# ORC_GRADUATE_SUFFIX = "Departments-Programs-Graduate" COURSE_DETAIL_URL_PREFIX = ( "https://www.ji.sjtu.edu.cn/academics/courses/courses-by-number/course-info/?id=" ) UNDERGRAD_URL = ORC_BASE_URL INSTRUCTOR_TERM_REGEX = re.compile(r"^(?P\w*)\s?(\((?P\w*)\))?") -# SUPPLEMENT_URL = "http://dartmouth.smartcatalogiq.com/en/2016s/Supplement/Courses" - -# COURSE_HEADING_CORRECTIONS = { -# "COLT": {"7 First Year Seminars": "COLT 7 First Year Seminars"}, -# "GRK": {"GRK 1.02-3.02 Intensive Greek": "GRK 1.02 Intensive Greek"}, -# "INTS": { -# "INTS INTS 17.04 Migration Stories": "INTS 17.04 Migration Stories", -# }, -# "MALS": { -# "MALS MALS 368 Seeing and Feeling in Early Modern Europe": ( -# "MALS 368 Seeing and Feeling in Early Modern Europe" -# ), -# }, -# "PSYC": {"$name": None}, -# "QBS": { -# "Quantitative Biomedical Sciences 132-2 Molecular Markers in Human " -# "Health Studies Lab": ( -# "QBS 132.02 Molecular Markers in Human Health Studies Lab" -# ), -# }, -# } - def crawl_program_urls(): program_urls = set() # Initialize to empty set @@ -122,18 +98,6 @@ def _crawl_course_data(course_url): "url": course_url, } return result - # return { - # "course_code": "QWER1234J", - # "course_title": "Test Course", - # "department": "QWER", - # "number": 1234, - # "course_credits": 4, - # "pre_requisites": None, - # "description": "This is a test course", - # "course_topics": ["Test Topic"], - # "instructors": ["Test Instructor"], - # "url": course_url, - # } def import_department(department_data): diff --git a/apps/spider/crawlers/timetable.py b/apps/spider/crawlers/timetable.py deleted file mode 100644 index 98d01d8..0000000 --- a/apps/spider/crawlers/timetable.py +++ /dev/null @@ -1,218 +0,0 @@ -import re - -from django.db import transaction - -from apps.spider.utils import int_or_none, parse_number_and_subnumber, retrieve_soup -from apps.web.models import Course, CourseOffering, DistributiveRequirement, Instructor -from lib.terms import split_term - -TIMETABLE_URL = "http://oracle-www.dartmouth.edu/dart/groucho/timetable.display_courses" - -DATA_TO_SEND = ( - "distribradio=alldistribs&depts=no_value&periods=no_value&" - "distribs=no_value&distribs_i=no_value&distribs_wc=no_value&deliverymodes=no_value&pmode=public&" - "term=&levl=&fys=n&wrt=n&pe=n&review=n&crnl=no_value&classyear=2008&" - "searchtype=Subject+Area%28s%29&termradio=selectterms&terms=no_value&" - "deliveryradio=selectdelivery&subjectradio=selectsubjects&hoursradio=allhours&sortorder=dept" - "&terms={term}" -) - -COURSE_TITLE_REGEX = re.compile( - r"(.*?)(?:\s\(((?:Remote|On Campus|Individualized)[^\)]*)\))?(\(.*\))?$" -) - - -def crawl_timetable(term): - """ - Timetable HTML is malformed. All table rows except the head do not have - a proper starting , which requires us to: - - 1. Iterate over in chunks rather than by - 2. Remove all in the table, which otherwise breaks BeautifulSoup into - not allowing us to iterate over all the - - To iterate over the in chunks, we get the number of columns, - put all of the in a generator, and pull the number of columns - from the generator to get the row. - """ - course_data = [] - request_data = DATA_TO_SEND.format(term=_get_timetable_term_code(term)) - soup = retrieve_soup( - TIMETABLE_URL, - data=request_data, - preprocess=lambda x: re.sub(r"", "", x), - ) - - data_table = soup.find(class_="data-table") - if not data_table: - raise ValueError("No data-table found in the HTML response") - - num_columns = len(data_table.find_all("th")) - assert num_columns == 20 - - tds = data_table.find_all("td") - assert len(tds) % num_columns == 0 - - td_generator = (td for td in tds) - for _ in range(len(tds) // num_columns): - tds = [next(td_generator) for _ in range(num_columns)] - - number, subnumber = parse_number_and_subnumber(tds[3].get_text()) - crosslisted_courses = _parse_crosslisted_courses(tds[7].get_text(strip=True)) - - title_match = COURSE_TITLE_REGEX.match( - tds[5].get_text(strip=True).encode("ascii", "ignore").decode("ascii") - ) - - title = title_match.group(1) - if title_match.group(3): - title += " " + title_match.group(3) - - course_data.append( - { - "term": _convert_timetable_term_to_term(tds[0].get_text(strip=True)), - # "crn": int(tds[1].get_text(strip=True)), - "program": tds[2].get_text(strip=True), - "number": number, - "subnumber": subnumber, - "section": int(tds[4].get_text(strip=True)), - "title": title, - "delivery_mode": title_match.group(2), - "crosslisted": crosslisted_courses, - "period": tds[8].get_text(strip=True), - "room": tds[10].get_text(strip=True), - "building": tds[11].get_text(strip=True), - "instructor": _parse_instructors(tds[12].get_text(strip=True)), - "world_culture": tds[13].get_text(strip=True), - "distribs": _parse_distribs(tds[14].get_text(strip=True)), - "limit": int_or_none(tds[15].get_text(strip=True)), - # "enrollment": int_or_none(tds[16].get_text(strip=True)), - "status": tds[17].get_text(strip=True), - } - ) - return course_data - - -def _parse_crosslisted_courses(xlist_text): - crosslisted_courses = [] - for course_text in xlist_text.split(",") if xlist_text else []: - program, numbers, section = course_text.split() - number, subnumber = parse_number_and_subnumber(numbers) - section = int(section) - crosslisted_courses.append( - { - "program": program, - "number": number, - "subnumber": subnumber, - "section": section, - } - ) - return crosslisted_courses - - -def _convert_timetable_term_to_term(timetable_term): - assert len(timetable_term) == 6 - assert timetable_term[:2] == "20" - month = int(timetable_term[-2:]) - year = timetable_term[2:4] - return "{year}{season}".format( - year=year, season={1: "W", 3: "S", 6: "X", 9: "F"}[month] - ) - - -def _parse_distribs(distribs_text): - return distribs_text.split(" or ") if distribs_text else [] - - -def _parse_instructors(instructors): - return instructors.split(", ") if instructors else [] - - -def _get_timetable_term_code(term): - year, term = split_term(term) - return "20{year}0{term_number}".format( - year=year, - term_number={"w": 1, "s": 3, "x": 6, "f": 9}[term.lower()], - ) - - -def import_timetable(timetable_data): - for course_data in timetable_data: - _import_course_data(course_data) - - -@transaction.atomic -def _import_course_data(course_data): - course = _get_or_import_course(course_data) - offering = _update_or_import_offering(course_data, course) - _update_crosslisted_courses(course_data, course) - _update_distribs(course_data, course) - _update_instructors(course_data, offering) - - -def _get_or_import_course(course_data): - course, _ = Course.objects.get_or_create( - department=course_data["program"], - number=course_data["number"], - subnumber=course_data["subnumber"], - defaults={ - "title": course_data["title"], - "source": Course.SOURCES.TIMETABLE, - }, - ) - return course - - -def _update_or_import_offering(course_data, course): - offering, _ = CourseOffering.objects.update_or_create( - course=course, - section=course_data["section"], - term=course_data["term"], - defaults={ - "period": course_data["period"], - "limit": course_data["limit"], - }, - ) - return offering - - -def _update_crosslisted_courses(course_data, course): - crosslisted_courses_data = course_data["crosslisted"] - for crosslisted_course_data in crosslisted_courses_data: - # We ignore missing courses because they should be created later in the - # timetable importing process. - crosslisted_course = Course.objects.filter( - department=crosslisted_course_data["program"], - number=crosslisted_course_data["number"], - subnumber=crosslisted_course_data["subnumber"], - ).first() - if crosslisted_course: - course.crosslisted_courses.add(crosslisted_course) - - -def _update_distribs(course_data, course): - for distrib_name in course_data["distribs"]: - distrib, _ = DistributiveRequirement.objects.get_or_create( - name=distrib_name, - defaults={ - "distributive_type": DistributiveRequirement.DISTRIBUTIVE, - }, - ) - course.distribs.add(distrib) - - if course_data["world_culture"]: - distrib, _ = DistributiveRequirement.objects.get_or_create( - name=course_data["world_culture"], - defaults={ - "distributive_type": DistributiveRequirement.WORLD_CULTURE, - }, - ) - course.distribs.add(distrib) - - -def _update_instructors(course_data, offering): - for instructor_name in course_data["instructor"]: - instructor, _ = Instructor.objects.get_or_create( - name=instructor_name, - ) - offering.instructors.add(instructor) diff --git a/lib/task_utils.py b/lib/task_utils.py deleted file mode 100644 index cdda40e..0000000 --- a/lib/task_utils.py +++ /dev/null @@ -1,54 +0,0 @@ -import socket -import traceback -from functools import wraps - -from django.conf import settings -from django.core.mail import send_mail - -from lib import constants - - -def email_if_fails(fn): - @wraps(fn) - def decorated(*args, **kwargs): - try: - return fn(*args, **kwargs) - except: - if not settings.DEBUG: - try: - fnName = fn.func_name - except AttributeError: - fnName = fn.__name__ - send_error_email( - fnName, args, kwargs, socket.gethostname(), traceback.format_exc() - ) - raise - - return decorated - - -def send_error_email(fnName, args, kwargs, host, formatted_exc): - formatted_exc = formatted_exc.strip() - contents = ( - "Task: {fnName}\nArgs: {args}\nKwargs: {kwargs}\nHost: {host}\n" - "Error: {error}".format( - fnName=fnName, - args=args, - kwargs=kwargs, - host=host, - error=formatted_exc, - ) - ) - short_exc = formatted_exc.rsplit("\n")[-1] - subject = "[celery-error] {host} {fnName} {short_exc}".format( - host=host, - fnName=fnName, - short_exc=short_exc, - ) - send_mail( - subject, - contents, - constants.SUPPORT_EMAIL, - [email for _, email in settings.ADMINS], - fail_silently=False, - )