From f497eead460241830b7d268fabdf968151c4d6bd Mon Sep 17 00:00:00 2001 From: Jon Froehlich Date: Thu, 18 Jun 2026 12:08:16 -0700 Subject: [PATCH] feat(admin): flag missing project PI/Co-PI in data health; fix get_pis/get_co_pis (#1182) Add a read-only "project-leadership" data-health check that flags projects with no PI on record, or ongoing projects whose PI role(s) have all ended (no currently-active PI). Co-PI absence is surfaced as context but not flagged, since many projects legitimately have only a PI. Also fix Project.get_pis()/get_co_pis(), which filtered on a nonexistent "pi_member" field (the real field is lead_project_role) and so raised FieldError on any call. They now filter on lead_project_role and return a deduplicated QuerySet of Person objects as their docstrings promise. No callers existed, so changing the return type from person IDs to Person objects is safe. Adds regression tests for the new check and for get_pis/get_co_pis. Co-Authored-By: Claude Opus 4.8 (1M context) --- website/admin/data_health/checks/__init__.py | 1 + .../data_health/checks/project_leadership.py | 88 +++++++++++++++++++ website/models/project.py | 28 +++--- website/tests/test_data_health.py | 73 ++++++++++++++- website/tests/test_project.py | 55 ++++++++++++ 5 files changed, 234 insertions(+), 11 deletions(-) create mode 100644 website/admin/data_health/checks/project_leadership.py diff --git a/website/admin/data_health/checks/__init__.py b/website/admin/data_health/checks/__init__.py index 1af4a5ee..ca356579 100644 --- a/website/admin/data_health/checks/__init__.py +++ b/website/admin/data_health/checks/__init__.py @@ -9,6 +9,7 @@ media_integrity, publication_quality, project_health, + project_leadership, position_integrity, news_health, ) diff --git a/website/admin/data_health/checks/project_leadership.py b/website/admin/data_health/checks/project_leadership.py new file mode 100644 index 00000000..f155b411 --- /dev/null +++ b/website/admin/data_health/checks/project_leadership.py @@ -0,0 +1,88 @@ +""" +Data-health check: projects with missing PI/Co-PI leadership info (#1182). + +PI/Co-PI tracking lives on ``ProjectRole.lead_project_role`` (values ``PI`` / +``Co-PI``), not on the ``Project`` itself. This check flags two gaps: + +- **no PI** — the project has no ``ProjectRole`` marked PI at all. Per the model + help text, most projects should have Jon Froehlich as PI, so a project with + zero PI roles is almost always an oversight. +- **no active PI** — the project hasn't ended, but every PI role on it has an + end date in the past, so nobody is currently the PI of a live project. + +Co-PI absence is intentionally *not* flagged: many projects legitimately have +only a PI. The Co-PI count is surfaced as context. Read-only. +""" + +from datetime import date + +from website.admin.data_health.registry import HealthCheck, register_check +from website.models import Project +from website.models.project_role import LeadProjectRoleTypes + + +@register_check +class ProjectLeadershipCheck(HealthCheck): + slug = 'project-leadership' + title = 'Project leadership (PI/Co-PI)' + description = ( + 'Projects with no PI on record, or ongoing projects whose PI role(s) ' + 'have all ended (no currently-active PI).' + ) + group = 'Projects' + columns = [ + 'id', 'name', 'short_name', 'is_visible', 'has_ended', + 'pi_count', 'active_pi_count', 'copi_count', 'pis', 'issues', + ] + + def get_rows(self): + today = date.today() + rows = [] + projects = Project.objects.all().prefetch_related('projectrole_set__person') + for project in projects: + roles = list(project.projectrole_set.all()) + pi_roles = [ + r for r in roles + if r.lead_project_role == LeadProjectRoleTypes.PI + ] + copi_roles = [ + r for r in roles + if r.lead_project_role == LeadProjectRoleTypes.CO_PI + ] + + def _is_active(role): + return ( + role.start_date and role.start_date <= today + and (role.end_date is None or role.end_date >= today) + ) + + active_pi_count = sum(1 for r in pi_roles if _is_active(r)) + has_ended = project.has_ended() + + issues = [] + if not pi_roles: + issues.append('no PI') + elif not has_ended and active_pi_count == 0: + issues.append('no active PI') + + if not issues: + continue + + pi_names = ', '.join( + r.person.get_full_name() for r in pi_roles + ) + rows.append({ + 'id': project.pk, + 'name': project.name, + 'short_name': project.short_name, + 'is_visible': bool(project.is_visible), + 'has_ended': has_ended, + 'pi_count': len(pi_roles), + 'active_pi_count': active_pi_count, + 'copi_count': len(copi_roles), + 'pis': pi_names, + 'issues': ', '.join(issues), + }) + + rows.sort(key=lambda r: r['name'].lower()) + return rows diff --git a/website/models/project.py b/website/models/project.py index abaefc33..58dd113d 100644 --- a/website/models/project.py +++ b/website/models/project.py @@ -335,18 +335,26 @@ def get_thumbnail_alt_text(self): return self.thumbnail_alt_text def get_pis(self): - """Returns the PIs for the project as a QuerySet of Person objects""" - pis_queryset = (self.projectrole_set - .filter(pi_member=LeadProjectRoleTypes.PI) - .values_list('person', flat=True)) - return pis_queryset + """Returns the PIs for the project as a QuerySet of Person objects. + + PI status is tracked on ``ProjectRole.lead_project_role`` (not on the + Project itself), so we filter the project's roles by that field. + """ + return Person.objects.filter( + projectrole__project=self, + projectrole__lead_project_role=LeadProjectRoleTypes.PI, + ).distinct() def get_co_pis(self): - """Returns the Co-PIs for this project as a QuerySet of Person objects""" - copis_queryset = (self.projectrole_set - .filter(pi_member=LeadProjectRoleTypes.CO_PI) - .values_list('person', flat=True)) - return copis_queryset + """Returns the Co-PIs for this project as a QuerySet of Person objects. + + Co-PI status is tracked on ``ProjectRole.lead_project_role`` (not on the + Project itself), so we filter the project's roles by that field. + """ + return Person.objects.filter( + projectrole__project=self, + projectrole__lead_project_role=LeadProjectRoleTypes.CO_PI, + ).distinct() def has_award(self): """ diff --git a/website/tests/test_data_health.py b/website/tests/test_data_health.py index afcece2d..fc1df993 100644 --- a/website/tests/test_data_health.py +++ b/website/tests/test_data_health.py @@ -173,6 +173,74 @@ def test_flags_incomplete_project(self): self.assertIn("no publication", rows["Lonely Project"]["issues"]) +class ProjectLeadershipCheckTests(DatabaseTestCase): + def _add_role(self, project, lead_role, start_date, end_date=None): + from website.models import ProjectRole + + person = self.make_person(first_name="Lead", last_name="Person") + return ProjectRole.objects.create( + person=person, + project=project, + lead_project_role=lead_role, + start_date=start_date, + end_date=end_date, + ) + + def test_flags_project_with_no_pi(self): + proj = self.make_project(name="PI-less Project") + rows = {r["name"]: r for r in get_check("project-leadership").get_rows()} + self.assertIn("PI-less Project", rows) + self.assertEqual(rows["PI-less Project"]["issues"], "no PI") + self.assertEqual(rows["PI-less Project"]["pi_count"], 0) + + def test_ongoing_project_with_only_an_ended_pi_flagged_no_active_pi(self): + from datetime import date, timedelta + from website.models.project_role import LeadProjectRoleTypes + + proj = self.make_project(name="Stale Lead Project") # no end_date => ongoing + self._add_role( + proj, + LeadProjectRoleTypes.PI, + start_date=date.today() - timedelta(days=400), + end_date=date.today() - timedelta(days=30), + ) + rows = {r["name"]: r for r in get_check("project-leadership").get_rows()} + self.assertIn("Stale Lead Project", rows) + self.assertEqual(rows["Stale Lead Project"]["issues"], "no active PI") + self.assertEqual(rows["Stale Lead Project"]["pi_count"], 1) + self.assertEqual(rows["Stale Lead Project"]["active_pi_count"], 0) + + def test_project_with_active_pi_not_flagged(self): + from datetime import date, timedelta + from website.models.project_role import LeadProjectRoleTypes + + proj = self.make_project(name="Well-Led Project") + self._add_role( + proj, + LeadProjectRoleTypes.PI, + start_date=date.today() - timedelta(days=30), + ) + names = {r["name"] for r in get_check("project-leadership").get_rows()} + self.assertNotIn("Well-Led Project", names) + + def test_ended_project_with_ended_pi_not_flagged(self): + from datetime import date, timedelta + from website.models.project_role import LeadProjectRoleTypes + + proj = self.make_project( + name="Wrapped-Up Project", + end_date=date.today() - timedelta(days=10), + ) + self._add_role( + proj, + LeadProjectRoleTypes.PI, + start_date=date.today() - timedelta(days=400), + end_date=date.today() - timedelta(days=20), + ) + names = {r["name"] for r in get_check("project-leadership").get_rows()} + self.assertNotIn("Wrapped-Up Project", names) + + class PositionIntegrityCheckTests(DatabaseTestCase): def test_no_position_and_self_advisor(self): from datetime import date as _date @@ -243,7 +311,10 @@ def test_get_rows_does_not_mutate_db(self): self.make_person(first_name="Jane", last_name="Doe") self.make_person(first_name="Jane", last_name="Doe") before = (Person.objects.count(), Publication.objects.count()) - for slug in ("duplicate-people", "url-name-collisions", "position-integrity"): + for slug in ( + "duplicate-people", "url-name-collisions", "position-integrity", + "project-leadership", + ): get_check(slug).get_rows() after = (Person.objects.count(), Publication.objects.count()) self.assertEqual(before, after) diff --git a/website/tests/test_project.py b/website/tests/test_project.py index 91e25c95..2bd2a634 100644 --- a/website/tests/test_project.py +++ b/website/tests/test_project.py @@ -101,3 +101,58 @@ def test_long_completed_role_outranks_short_ongoing_role(self): result = list(project.get_people()) self.assertEqual(result[0], long_done) self.assertEqual(result[1], short_ongoing) + + +# --- get_pis / get_co_pis regression -------------------------------------- + + +class ProjectGetPisCoPisTests(DatabaseTestCase): + """ + Regression for Project.get_pis / get_co_pis (models/project.py). + + Both methods filtered on a nonexistent ``pi_member`` field (the real field + is ``lead_project_role``), so any call raised FieldError. The fix filters + on ``lead_project_role`` and returns a QuerySet of Person objects as the + docstring promises (#1182). + """ + + def _add_role(self, person, project, lead_role): + from website.models import ProjectRole + return ProjectRole.objects.create( + person=person, project=project, + lead_project_role=lead_role, start_date=date.today(), + ) + + def test_get_pis_and_co_pis_return_correct_people(self): + from website.models import Project + from website.models.project_role import LeadProjectRoleTypes + + project = Project.objects.create(name="Lab Project", short_name="lab") + pi = self.make_person(first_name="Jon", last_name="Froehlich") + co_pi = self.make_person(first_name="Co", last_name="Investigator") + student = self.make_person(first_name="Grad", last_name="Student") + self._add_role(pi, project, LeadProjectRoleTypes.PI) + self._add_role(co_pi, project, LeadProjectRoleTypes.CO_PI) + self._add_role(student, project, LeadProjectRoleTypes.STUDENT_LEAD) + + self.assertEqual(list(project.get_pis()), [pi]) + self.assertEqual(list(project.get_co_pis()), [co_pi]) + + def test_no_pi_returns_empty_queryset(self): + from website.models import Project + + project = Project.objects.create(name="Empty Project", short_name="empty") + self.assertEqual(list(project.get_pis()), []) + self.assertEqual(list(project.get_co_pis()), []) + + def test_duplicate_pi_roles_deduplicated(self): + """A person with two PI roles on one project appears once.""" + from website.models import Project + from website.models.project_role import LeadProjectRoleTypes + + project = Project.objects.create(name="Dup Project", short_name="dup") + pi = self.make_person(first_name="Repeat", last_name="Lead") + self._add_role(pi, project, LeadProjectRoleTypes.PI) + self._add_role(pi, project, LeadProjectRoleTypes.PI) + + self.assertEqual(list(project.get_pis()), [pi])