Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 57 additions & 11 deletions website/admin/person_admin.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from django import forms
from django.contrib import admin
from django.core.files import File
from website.models import Position, Person, ProjectRole
from website.models import Position, Person, ProjectRole, Publication, Talk
from website.models.position import Title
from website.models.person import PERSON_THUMBNAIL_SIZE
from easy_thumbnails.exceptions import InvalidImageFormatError # for handling invalid images
from website.admin_list_filters import PositionRoleListFilter, PositionTitleListFilter
from website.admin.utils import get_active_professors_queryset, get_active_mentors_queryset
from website.admin.utils import (get_active_professors_queryset, get_active_mentors_queryset,
related_count_subquery)
from image_cropping import ImageCroppingMixin
from image_cropping.widgets import EasterEggCropImageWidget
import website.utils.fileutils as ml_fileutils
Expand Down Expand Up @@ -179,22 +180,67 @@ def save_model(self, request, obj, form, change):

# The list display lets us control what is shown in the default persons table at Home > Website > People
# info on displaying multiple entries comes from http://stackoverflow.com/questions/9164610/custom-columns-using-django-admin
list_display = ('get_full_name', 'get_display_thumbnail', 'get_current_title', 'get_current_role', 'is_active',
'get_start_date', 'get_cur_pos_start_date', 'get_end_date', 'recent_projects', 'get_project_count', 'get_pub_count',
'get_talk_count', 'display_time_current_position', 'display_total_time_as_member')
# The count columns (project_count / pub_count / talk_count) read annotations
# set in get_queryset() rather than the per-row model count methods (#1346).
list_display = ('get_full_name', 'get_display_thumbnail', 'get_current_title', 'get_current_role', 'is_active',
'get_start_date', 'get_cur_pos_start_date', 'get_end_date', 'recent_projects', 'project_count', 'pub_count',
'talk_count', 'display_time_current_position', 'display_total_time_as_member')

list_filter = (PositionRoleListFilter, PositionTitleListFilter)

# The changelist renders ~14 columns of position/count data per person; cap
# the page so the per-row thumbnail filesystem check stays bounded (#1346).
list_per_page = 50

def get_queryset(self, request):
"""Make the People changelist issue a roughly constant number of queries
regardless of how many people are listed (the #1346 perf audit).

- ``position_set`` is prefetched because nearly every column ("current
title/role", dates, durations, is_active) and the Role filter funnel
through ``Person.get_latest_position``, which now reads this prefetch
cache instead of issuing ``.latest()`` per row.
- ``projectrole_set__project`` backs :meth:`recent_projects`.
- the three ``_*_count`` annotations back the sortable count columns,
replacing three per-row ``COUNT(*)`` queries with scalar subqueries.
"""
return (super().get_queryset(request)
.prefetch_related('position_set', 'projectrole_set__project')
.annotate(
_project_count=related_count_subquery(ProjectRole, 'person'),
_pub_count=related_count_subquery(Publication, 'authors'),
_talk_count=related_count_subquery(Talk, 'authors'),
))

def recent_projects(self, obj):
# Get the three most recent projects for this person based on start_date
recent_projects = (ProjectRole.objects.filter(person=obj)
.order_by('-start_date')[:3])

# Return the project names as a comma-separated string
return ', '.join([str(project.project) for project in recent_projects])
"""The person's three most recent project roles (by start_date), as a
comma-separated list of project names. Reads ``obj.projectrole_set.all()``
(prefetched with its ``project`` in :meth:`get_queryset`) and sorts in
Python, so it adds no per-row queries on the changelist."""
roles = sorted(obj.projectrole_set.all(),
key=lambda role: role.start_date, reverse=True)[:3]
return ', '.join(str(role.project) for role in roles)

recent_projects.short_description = 'Recent Projects' # Sets column name in admin interface

def project_count(self, obj):
"""Number of project roles (annotated in get_queryset; sortable)."""
return obj._project_count
project_count.short_description = 'Projects'
project_count.admin_order_field = '_project_count'

def pub_count(self, obj):
"""Number of publications authored (annotated in get_queryset; sortable)."""
return obj._pub_count
pub_count.short_description = 'Pubs'
pub_count.admin_order_field = '_pub_count'

def talk_count(self, obj):
"""Number of talks given (annotated in get_queryset; sortable)."""
return obj._talk_count
talk_count.short_description = 'Talks'
talk_count.admin_order_field = '_talk_count'

def display_time_current_position(self, obj):
"""Displays the time in the current position"""
duration = obj.get_time_in_current_position
Expand Down
139 changes: 133 additions & 6 deletions website/admin/project_admin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from django.contrib import admin
from django.contrib.admin import widgets
from website.models import Project, Banner, Photo, ProjectRole, Grant
from website.models import (Project, Banner, Photo, ProjectRole, Grant,
Publication, Talk, Video, Person)
from website.models.project import PROJECT_THUMBNAIL_SIZE
from website.admin_list_filters import ActiveProjectsFilter
from image_cropping import ImageCroppingMixin
Expand All @@ -9,9 +10,27 @@
from easy_thumbnails.files import get_thumbnailer # for generating thumbnails
import os # for checking if thumbnail file exists
from django import forms
from django.db.models import F
from django.db.models import F, Q, Count, OuterRef, Subquery, IntegerField, Value
from django.db.models.functions import Greatest, Coalesce
from website.admin.utils import related_count_subquery
from website.admin.admin_site import ml_admin_site


def _contributor_count_subquery():
"""Distinct count of people who either hold a ProjectRole on the project OR
are an author on one of its publications — matching Project.get_contributors,
which unions those two sets (#1346). One correlated subquery: count distinct
Person ids matching either relation for the outer project, grouped by a
constant so it returns a single scalar."""
people = (Person.objects
.filter(Q(projectrole__project=OuterRef('pk')) |
Q(publication__projects=OuterRef('pk')))
.order_by()
.values(_grp=Value(1)) # group by a constant -> one row
.annotate(_c=Count('pk', distinct=True))
.values('_c')[:1])
return Coalesce(Subquery(people, output_field=IntegerField()), 0)

class ProjectRoleInline(admin.TabularInline):
model = ProjectRole
extra = 1 # Number of extra forms displayed
Expand Down Expand Up @@ -53,11 +72,16 @@ class ProjectAdmin(ImageCroppingMixin, admin.ModelAdmin):

# The list display lets us control what is shown in the Project table at Home > Website > Project
# info on displaying multiple entries comes from http://stackoverflow.com/questions/9164610/custom-columns-using-django-admin
# The count / most-recent-artifact columns read annotations set in
# get_queryset() (sortable) rather than the per-row model methods (#1346).
list_display = ('name', 'is_visible', 'get_display_thumbnail', 'start_date', 'end_date', 'has_ended',
'get_contributor_count', 'get_people_count',
'get_current_member_count', 'get_past_member_count',
'get_most_recent_artifact_date', 'get_most_recent_artifact_type',
'get_publication_count', 'get_video_count', 'get_talk_count', 'get_banner_count')
'contributor_count', 'people_count',
'current_member_count', 'past_member_count',
'most_recent_artifact_date', 'most_recent_artifact_type',
'pub_count', 'video_count', 'talk_count', 'banner_count')

# Bounds the per-row gallery-image filesystem check on the changelist (#1346).
list_per_page = 50

fieldsets = [
(None, {'fields': ['name', 'short_name', 'is_visible']}),
Expand All @@ -68,6 +92,109 @@ class ProjectAdmin(ImageCroppingMixin, admin.ModelAdmin):

list_filter = (ActiveProjectsFilter, 'is_visible')

def get_queryset(self, request):
"""Collapse the Project changelist's ~10 per-row count/aggregate columns
into a single query (#1346). Each count is an independent scalar subquery
(see related_count_subquery), so the joins don't multiply each other; the
three most-recent-artifact dates are scalar subqueries combined into one
sortable date. With the default 100 projects this turns ~1,000+ queries
into a handful.

The model methods (get_publication_count, get_most_recent_artifact, ...)
are left intact for the public site / detail views; only the changelist
columns are repointed at these annotations.
"""
def latest_artifact_date(model):
# Most recent artifact date for this project, as a scalar subquery.
return Subquery(
model.objects.filter(projects=OuterRef('pk'))
.order_by('-date').values('date')[:1]
)

return (super().get_queryset(request).annotate(
_pub_count=related_count_subquery(Publication, 'projects'),
_talk_count=related_count_subquery(Talk, 'projects'),
_video_count=related_count_subquery(Video, 'projects'),
_banner_count=related_count_subquery(Banner, 'project'),
_people_count=related_count_subquery(
ProjectRole, 'project', count_field='person', distinct=True),
_current_member_count=related_count_subquery(
ProjectRole, 'project', count_field='person', distinct=True,
extra_filter=Q(end_date__isnull=True)),
_past_member_count=related_count_subquery(
ProjectRole, 'project', count_field='person', distinct=True,
extra_filter=Q(end_date__isnull=False)),
_contributor_count=_contributor_count_subquery(),
_pub_max_date=latest_artifact_date(Publication),
_talk_max_date=latest_artifact_date(Talk),
_video_max_date=latest_artifact_date(Video),
).annotate(
# Greatest ignores NULLs on Postgres, so this is the max of whichever
# artifact types exist (NULL only when the project has none).
_recent_artifact_date=Greatest(
'_pub_max_date', '_talk_max_date', '_video_max_date'),
))

# --- Annotation-backed changelist columns (sortable; see get_queryset) ---

def pub_count(self, obj):
return obj._pub_count
pub_count.short_description = 'Pubs'
pub_count.admin_order_field = '_pub_count'

def video_count(self, obj):
return obj._video_count
video_count.short_description = 'Videos'
video_count.admin_order_field = '_video_count'

def talk_count(self, obj):
return obj._talk_count
talk_count.short_description = 'Talks'
talk_count.admin_order_field = '_talk_count'

def banner_count(self, obj):
return obj._banner_count
banner_count.short_description = 'Banners'
banner_count.admin_order_field = '_banner_count'

def people_count(self, obj):
return obj._people_count
people_count.short_description = 'Num People'
people_count.admin_order_field = '_people_count'

def current_member_count(self, obj):
return obj._current_member_count
current_member_count.short_description = 'Num Current Members'
current_member_count.admin_order_field = '_current_member_count'

def past_member_count(self, obj):
return obj._past_member_count
past_member_count.short_description = 'Num Past Members'
past_member_count.admin_order_field = '_past_member_count'

def contributor_count(self, obj):
return obj._contributor_count
contributor_count.short_description = 'Contributors'
contributor_count.admin_order_field = '_contributor_count'

def most_recent_artifact_date(self, obj):
return obj._recent_artifact_date
most_recent_artifact_date.short_description = 'Most Recent Artifact Date'
most_recent_artifact_date.admin_order_field = '_recent_artifact_date'

def most_recent_artifact_type(self, obj):
"""Type ('Publication' / 'Talk' / 'Video') of the most recent artifact,
derived from the three annotated max-dates. Ties resolve in the same
order as Project.get_most_recent_artifact (pub, then talk, then video)."""
candidates = [('Publication', obj._pub_max_date),
('Talk', obj._talk_max_date),
('Video', obj._video_max_date)]
candidates = [(label, d) for label, d in candidates if d is not None]
if not candidates:
return None
return max(candidates, key=lambda c: c[1])[0]
most_recent_artifact_type.short_description = 'Most Recent Artifact Type'

def get_display_thumbnail(self, obj):
if obj.gallery_image and os.path.isfile(obj.gallery_image.path):
# Use easy_thumbnails to generate a thumbnail
Expand Down
38 changes: 37 additions & 1 deletion website/admin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,49 @@
person_admin.py and position_admin.py.
"""

from django.db.models import Q, Case, When, Value, IntegerField
from django.db.models import Q, Case, When, Value, IntegerField, Count, OuterRef, Subquery
from django.db.models.functions import Coalesce
from django.utils import timezone

from website.models import Person, Position
from website.models.position import Title


def related_count_subquery(model, fk_lookup, *, count_field='pk', distinct=False,
extra_filter=None):
"""Build a correlated-subquery annotation that counts related ``model`` rows
for each outer row, for use in a ``ModelAdmin.get_queryset()`` override.

Why a subquery instead of ``annotate(Count(...))``: stacking several
``Count()`` annotations over different multi-valued relations in one query
makes the joins multiply rows (a cartesian blow-up) and, without care, wrong
counts. Each count here is an independent scalar subquery, so N count columns
cost N cheap correlated subqueries in a single changelist query — not a
growing pile of joins, and not one query per row.

Args:
model: the related model to count (e.g. ``Publication``).
fk_lookup: the field on ``model`` pointing back to the outer row
(e.g. ``'projects'`` for ``Publication.projects``, ``'person'`` for
``ProjectRole.person``). ``OuterRef('pk')`` is matched against it.
count_field: field to count; defaults to ``'pk'`` (i.e. count rows).
distinct: count distinct values of ``count_field`` (e.g. distinct people).
extra_filter: an optional ``Q`` further constraining the related rows.

Returns:
An expression resolving to an int (0 when there are no related rows).
"""
qs = model.objects.filter(**{fk_lookup: OuterRef('pk')})
if extra_filter is not None:
qs = qs.filter(extra_filter)
# Group by the outer key so the subquery returns exactly one row (the count).
qs = (qs.order_by()
.values(fk_lookup)
.annotate(_c=Count(count_field, distinct=distinct))
.values('_c')[:1])
return Coalesce(Subquery(qs, output_field=IntegerField()), 0)


def get_active_professors_queryset(prioritized_name=Person.DIRECTOR_NAME):
"""
Build a queryset of currently active professors, optionally prioritizing a specific person.
Expand Down
6 changes: 5 additions & 1 deletion website/admin_list_filters/position_role_list_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,12 @@ def queryset(self, request, queryset):

# Compare the requested value (either True or False)
# to decide how to filter the queryset.
# prefetch_related('position_set') so the membership checks below
# (is_current_member, is_alumni_member, ...) — which all read the
# person's positions — resolve from one prefetched query instead of
# ~2 queries per person on every People changelist load.
filtered_person_ids = []
for person in Person.objects.all():
for person in Person.objects.all().prefetch_related('position_set'):
if person.is_current_member is True and self.value() is None:
filtered_person_ids.append(person.id)
elif person.is_alumni_member is True and person.is_current_member is False and self.value() == "past_member":
Expand Down
Loading
Loading