Skip to content

Commit b9d1f73

Browse files
committed
Materialized views
1 parent f858d5b commit b9d1f73

File tree

3 files changed

+381
-2
lines changed

3 files changed

+381
-2
lines changed

backend/PennCourses/settings/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
"plan",
5555
"review",
5656
"degree",
57+
"django_pgviews",
5758
]
5859

5960
MIDDLEWARE = [
Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,277 @@
1+
"""
2+
Materialized Views - Django ORM Approach
3+
=========================================
4+
5+
This is the RECOMMENDED approach. It reuses your existing review_averages() logic
6+
but captures the generated SQL for materialized views.
7+
8+
Key insight: Your review_averages() function already generates the perfect SQL.
9+
We just need to capture it and use it for the materialized view definition.
10+
"""
11+
12+
from django_pgviews import view as pg
13+
from django.db import models
14+
15+
16+
def _get_course_review_sql():
17+
"""
18+
Generate SQL for course reviews by calling your existing course_reviews() function
19+
and extracting the SQL it generates.
20+
"""
21+
from courses.models import Course
22+
from review.annotations import review_averages
23+
from review.views import section_filters_pcr
24+
from django.db.models import Q, OuterRef
25+
26+
# Start with a base queryset
27+
queryset = Course.objects.all()
28+
29+
# Apply the same review_averages logic as course_reviews()
30+
queryset = review_averages(
31+
queryset,
32+
reviewbit_subfilters=Q(review__section__course__topic=OuterRef("topic")),
33+
section_subfilters=(section_filters_pcr & Q(course__topic=OuterRef("topic"))),
34+
fields=['course_quality', 'difficulty', 'instructor_quality', 'work_required'],
35+
prefix="",
36+
extra_metrics=False,
37+
)
38+
39+
# Select only the fields we want in the view
40+
queryset = queryset.values(
41+
'id',
42+
'topic_id',
43+
'semester',
44+
'full_code',
45+
'title',
46+
'course_quality',
47+
'difficulty',
48+
'instructor_quality',
49+
'work_required',
50+
)
51+
52+
# Get the compiled SQL
53+
compiler = queryset.query.get_compiler(using=queryset.db)
54+
sql, params = compiler.as_sql()
55+
56+
# Return just the SQL (django-pgviews handles parameters)
57+
return sql
58+
59+
60+
def _get_section_review_sql():
61+
"""
62+
Generate SQL for section reviews by calling sections_with_reviews().
63+
"""
64+
from courses.models import Section, sections_with_reviews
65+
66+
# Start with base queryset
67+
queryset = Section.objects.all()
68+
69+
# Apply sections_with_reviews() to get the annotated queryset
70+
queryset = sections_with_reviews(queryset)
71+
72+
# Select only the fields we want
73+
queryset = queryset.values(
74+
'id',
75+
'course_id',
76+
'code',
77+
'course__topic_id',
78+
'course_quality',
79+
'difficulty',
80+
'instructor_quality',
81+
'work_required',
82+
)
83+
84+
# Get the compiled SQL
85+
compiler = queryset.query.get_compiler(using=queryset.db)
86+
sql, params = compiler.as_sql()
87+
88+
return sql
89+
90+
91+
def _get_recent_course_review_sql():
92+
"""
93+
Generate SQL for recent course reviews (most recent semester only).
94+
"""
95+
from courses.models import Course
96+
from review.annotations import review_averages
97+
from review.views import section_filters_pcr
98+
from django.db.models import Q, OuterRef, Subquery, Max, Value
99+
100+
queryset = Course.objects.all()
101+
102+
# Get matching reviews (same logic as course_reviews but filter to recent)
103+
from review.models import Review
104+
matching_reviews = Review.objects.filter(
105+
section__course__topic=OuterRef("course__topic"),
106+
responses__gt=0
107+
)
108+
109+
# Get most recent semester
110+
recent_sem_subquery = Subquery(
111+
matching_reviews
112+
.annotate(common=Value(1))
113+
.values("common")
114+
.annotate(max_semester=Max("section__course__semester"))
115+
.values("max_semester")[:1]
116+
)
117+
118+
# Apply review_averages with recent semester filter
119+
reviewbit_subfilters = (
120+
Q(review__section__course__topic=OuterRef("topic"))
121+
& Q(review__section__course__semester=recent_sem_subquery)
122+
)
123+
124+
section_subfilters = (
125+
section_filters_pcr
126+
& Q(course__topic=OuterRef("topic"))
127+
& Q(course__semester=recent_sem_subquery)
128+
)
129+
130+
queryset = review_averages(
131+
queryset,
132+
reviewbit_subfilters=reviewbit_subfilters,
133+
section_subfilters=section_subfilters,
134+
fields=['course_quality', 'difficulty', 'instructor_quality', 'work_required'],
135+
prefix="recent_",
136+
semester_aggregations=True,
137+
extra_metrics=False,
138+
)
139+
140+
queryset = queryset.values(
141+
'id',
142+
'topic_id',
143+
'semester',
144+
'recent_course_quality',
145+
'recent_difficulty',
146+
'recent_instructor_quality',
147+
'recent_work_required',
148+
'recent_semester_calc',
149+
'recent_semester_count',
150+
)
151+
152+
compiler = queryset.query.get_compiler(using=queryset.db)
153+
sql, params = compiler.as_sql()
154+
155+
return sql
156+
157+
158+
# ============================================================================
159+
# Materialized View Definitions
160+
# ============================================================================
161+
162+
class CourseReviewMaterialized(pg.MaterializedView):
163+
"""
164+
Materialized view caching course review averages.
165+
166+
Replicates Course.with_reviews / course_reviews() but as a cached view.
167+
Refresh with: REFRESH MATERIALIZED VIEW CONCURRENTLY course_review_averages_mv
168+
"""
169+
170+
concurrent_index = 'id'
171+
172+
# Generate SQL using your existing Django ORM logic
173+
sql = _get_course_review_sql()
174+
175+
# Fields match what's selected in the query
176+
topic_id = models.IntegerField(null=True)
177+
semester = models.CharField(max_length=5)
178+
full_code = models.CharField(max_length=16)
179+
title = models.TextField()
180+
181+
course_quality = models.FloatField(null=True)
182+
difficulty = models.FloatField(null=True)
183+
instructor_quality = models.FloatField(null=True)
184+
work_required = models.FloatField(null=True)
185+
186+
class Meta:
187+
managed = False
188+
db_table = 'course_review_averages_mv'
189+
190+
191+
class SectionReviewMaterialized(pg.MaterializedView):
192+
"""
193+
Materialized view caching section review averages.
194+
195+
Replicates Section.with_reviews / sections_with_reviews() but as a cached view.
196+
Refresh with: REFRESH MATERIALIZED VIEW CONCURRENTLY section_review_averages_mv
197+
"""
198+
199+
concurrent_index = 'id'
200+
201+
sql = _get_section_review_sql()
202+
203+
course_id = models.IntegerField()
204+
code = models.CharField(max_length=16)
205+
topic_id = models.IntegerField(null=True, db_column='course__topic_id')
206+
207+
course_quality = models.FloatField(null=True)
208+
difficulty = models.FloatField(null=True)
209+
instructor_quality = models.FloatField(null=True)
210+
work_required = models.FloatField(null=True)
211+
212+
class Meta:
213+
managed = False
214+
db_table = 'section_review_averages_mv'
215+
216+
217+
class RecentCourseReviewMaterialized(pg.MaterializedView):
218+
"""
219+
Materialized view caching recent semester course review averages.
220+
221+
Only includes reviews from the most recent semester per topic.
222+
Refresh with: REFRESH MATERIALIZED VIEW CONCURRENTLY recent_course_review_averages_mv
223+
"""
224+
225+
concurrent_index = 'id'
226+
227+
sql = _get_recent_course_review_sql()
228+
229+
topic_id = models.IntegerField(null=True)
230+
semester = models.CharField(max_length=5)
231+
232+
recent_course_quality = models.FloatField(null=True)
233+
recent_difficulty = models.FloatField(null=True)
234+
recent_instructor_quality = models.FloatField(null=True)
235+
recent_work_required = models.FloatField(null=True)
236+
237+
recent_semester_calc = models.CharField(max_length=5, null=True)
238+
recent_semester_count = models.IntegerField(null=True)
239+
240+
class Meta:
241+
managed = False
242+
db_table = 'recent_course_review_averages_mv'
243+
244+
245+
# ============================================================================
246+
# Usage Notes
247+
# ============================================================================
248+
249+
"""
250+
After creating these views, use them in your managers:
251+
252+
from review.materialized_views import (
253+
CourseReviewMaterialized,
254+
SectionReviewMaterialized,
255+
RecentCourseReviewMaterialized,
256+
)
257+
258+
class OptimizedCourseManager(models.Manager):
259+
def get_queryset(self):
260+
qs = super().get_queryset()
261+
262+
# Left join to materialized view
263+
qs = qs.extra(
264+
select={
265+
'course_quality': 'crm.course_quality',
266+
'difficulty': 'crm.difficulty',
267+
'instructor_quality': 'crm.instructor_quality',
268+
'work_required': 'crm.work_required',
269+
},
270+
tables=['course_review_averages_mv crm'],
271+
where=['crm.id = courses_course.id'],
272+
)
273+
274+
return qs
275+
276+
Or use annotations with subqueries (shown in materialized_managers.py).
277+
"""

0 commit comments

Comments
 (0)