diff --git a/cms/envs/common.py b/cms/envs/common.py index 44c4ee4e2f9b..b456579fb622 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -898,6 +898,9 @@ def make_lms_template_path(settings): 'openedx_events', + # Core models to represent courses + "openedx_catalog", + # Core apps that power libraries "openedx_content", *openedx_content_backcompat_apps_to_install(), diff --git a/lms/envs/common.py b/lms/envs/common.py index 917dd025e96f..27e0306711e1 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -2020,6 +2020,9 @@ 'openedx_events', + # Core models to represent courses + "openedx_catalog", + # Core apps that power libraries "openedx_content", *openedx_content_backcompat_apps_to_install(), diff --git a/openedx/core/djangoapps/content/course_overviews/migrations/0030_backfill_new_catalog_courseruns.py b/openedx/core/djangoapps/content/course_overviews/migrations/0030_backfill_new_catalog_courseruns.py new file mode 100644 index 000000000000..63641f3e95d4 --- /dev/null +++ b/openedx/core/djangoapps/content/course_overviews/migrations/0030_backfill_new_catalog_courseruns.py @@ -0,0 +1,150 @@ +""" +Data migration to populate the new CourseRun and CatalogCourse models. +""" + +# Generated by Django 5.2.11 on 2026-02-13 21:47 +import logging + +from django.conf import settings +from django.db import migrations +from organizations.api import ensure_organization, exceptions as org_exceptions + +log = logging.getLogger(__name__) + +# https://github.com/openedx/openedx-platform/issues/38036 +NORMALIZE_LANGUAGE_CODES = { + "zh-hans": "zh-cn", + "zh-hant": "zh-hk", + "ca@valencia": "ca-es-valencia", +} + + +def backfill_openedx_catalog(apps, schema_editor): + """ + Populate the new CourseRun and CatalogCourse models. + """ + # CourseOverview is a cache model derived from modulestore; modulestore is the source of truth for courses, so we'll + # use it to get the list of "all courses on the system" to populate the new CourseRun and CatalogCourse models. + CourseIndex = apps.get_model("split_modulestore_django", "SplitModulestoreCourseIndex") + CourseOverview = apps.get_model("course_overviews", "CourseOverview") + CatalogCourse = apps.get_model("openedx_catalog", "CatalogCourse") + CourseRun = apps.get_model("openedx_catalog", "CourseRun") + + created_catalog_course_ids: set[int] = set() + all_course_runs = CourseIndex.objects.filter(base_store="mongodb", library_version="").order_by("course_id") + for course_run in all_course_runs: + org_code: str = course_run.course_id.org + course_code: str = course_run.course_id.course + run_code: str = course_run.course_id.run + + # Ensure that the Organization exists. + try: + org_data = ensure_organization(org_code) + except org_exceptions.InvalidOrganizationException as exc: + # Note: IFF the org exists among the modulestore courses but not in the Organizations database table, + # and if auto-create is disabled (it's enabled by default), this will raise InvalidOrganizationException. It + # would be up to the operator to decide how they want to resolve that. + raise ValueError( + f'The organization short code "{org_code}" exists in modulestore ({course_run.course_id}) but ' + "not the Organizations table, and auto-creating organizations is disabled. You can resolve this by " + "creating the Organization manually (e.g. from the Django admin) or turning on auto-creation. " + "You can set active=False to prevent this Organization from being used other than for historical data. " + ) from exc + if org_data["short_name"] != org_code: + # On most installations, the 'short_name' database column is case insensitive (unfortunately) + log.warning( + 'The course with ID "%s" does not match its Organization.short_name "%s"', + course_run.course_id, + org_data["short_name"], + ) + + # Fetch the CourseOverview if it exists + try: + course_overview = CourseOverview.objects.get(id=course_run.course_id) + except CourseOverview.DoesNotExist: + course_overview = None # Course exists in modulestore but details aren't cached into CourseOverview yet + display_name: str = (course_overview.display_name if course_overview else None) or course_code + + # Determine the course language. + # Note that in Studio, the options for course language generally came from the ALL_LANGUAGES setting, which is + # mostly two-letter language codes with no locale, except it uses "zh_HANS" for Mandarin and "zh_HANT" for + # Cantonese. We normalize those to "zh-cn" and "zh-hk" for consistency with our platform UI languages / + # Transifex, but you can still access the "old" version using the CatalogCourse.language_short + # getter/setter for backwards compatbility. See https://github.com/openedx/openedx-platform/issues/38036 + language = settings.LANGUAGE_CODE + if course_overview and course_overview.language: + language = course_overview.language.lower() + language = language.replace("_", "-") # Ensure we use hyphens for consistency (`en-us` not `en_us`) + # Normalize this language code. The previous/non-normalized code will still be available via the + # "language_short" property for backwards compatibility. + language = NORMALIZE_LANGUAGE_CODES.get(language, language) + if len(language) > 2 and language[2] != "-": + # This seems like an invalid value; revert to the default: + log.warning( + 'The course with ID "%s" has invalid language "%s" - using default language "%s" instead.', + course_run.course_id, + language, + settings.LANGUAGE_CODE, + ) + language = settings.LANGUAGE_CODE + + # Ensure that the CatalogCourse exists. + cc, cc_created = CatalogCourse.objects.get_or_create( + org_id=org_data["id"], + course_code=course_code, + defaults={ + "display_name": display_name, + "language": language, + }, + ) + if cc_created: + created_catalog_course_ids.add(cc.pk) + elif cc.pk in created_catalog_course_ids: + # This CatalogCourse was previously created during this same migration + # Check if all the runs have the same display_name: + if ( + course_overview + and course_overview.display_name + and course_overview.display_name != cc.display_name + and cc.display_name != course_code + ): + # The runs have different names, so just use the course code as the common catalog course name. + cc.display_name = course_code + cc.save(update_fields=["display_name"]) + + if cc.course_code != course_code: + raise ValueError( + f"The course {course_run.course_id} exists in modulestore with a different capitalization of its " + f'course code compared to other instances of the same run ("{course_code}" vs "{cc.course_code}"). ' + "This really should not happen. To fix it, delete the inconsistent course runs (!). " + ) + + # Create the CourseRun + new_run, run_created = CourseRun.objects.get_or_create( + catalog_course=cc, + run=run_code, + course_id=course_run.course_id, + defaults={"display_name": display_name}, + ) + + # Correct the "created" timestamp. Since it has auto_now_add=True, we can't set its value except using update() + # The CourseOverview should have the "created" date unless it's missing or the course was created before + # the CourseOverview model existed. In any case, it should be good enough. Otherwise use the default (now). + if course_overview: + if course_overview.created < cc.created and cc.pk in created_catalog_course_ids: + # Use the 'created' date from the oldest course run that we process. + CatalogCourse.objects.filter(pk=cc.pk).update(created=course_overview.created) + if run_created: + CourseRun.objects.filter(pk=new_run.pk).update(created=course_overview.created) + + +class Migration(migrations.Migration): + dependencies = [ + ("openedx_catalog", "0001_initial"), + ("course_overviews", "0029_alter_historicalcourseoverview_options"), + ("split_modulestore_django", "0003_alter_historicalsplitmodulestorecourseindex_options"), + ] + + operations = [ + migrations.RunPython(backfill_openedx_catalog, reverse_code=migrations.RunPython.noop), + ] diff --git a/openedx/core/djangoapps/content/course_overviews/signals.py b/openedx/core/djangoapps/content/course_overviews/signals.py index 4ae2f4bf0f8a..20c75da758cb 100644 --- a/openedx/core/djangoapps/content/course_overviews/signals.py +++ b/openedx/core/djangoapps/content/course_overviews/signals.py @@ -2,7 +2,6 @@ Signal handler for invalidating cached course overviews """ - import logging from django.db import transaction @@ -10,6 +9,8 @@ from django.dispatch import Signal from django.dispatch.dispatcher import receiver +from openedx_catalog import api as catalog_api +from openedx_catalog.models_api import CourseRun from openedx.core.djangoapps.signals.signals import COURSE_CERT_DATE_CHANGE from xmodule.data import CertificatesDisplayBehaviors from xmodule.modulestore.django import SignalHandler @@ -33,6 +34,8 @@ def _listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable """ Catches the signal that a course has been published in Studio and updates the corresponding CourseOverview cache entry. + + Also sync course data to the openedx_catalog CourseRun model. """ try: previous_course_overview = CourseOverview.objects.get(id=course_key) @@ -41,6 +44,51 @@ def _listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable updated_course_overview = CourseOverview.load_from_module_store(course_key) _check_for_course_changes(previous_course_overview, updated_course_overview) + # Currently, SplitModulestoreCourseIndex is the ultimate source of truth for + # which courses exist. When a course is published, we sync that data to + # CourseOverview, and from CourseOverview to CourseRun. + + # In the future, CourseRun will be the "source of truth" and each CourseRun + # may optionally point to content and get synced to CourseOverview. + + # Ensure a CourseRun exists for this course + try: + course_run = catalog_api.get_course_run(course_key) + except CourseRun.DoesNotExist: + # Presumably this is a newly-created course. Create the CourseRun. + course_run = catalog_api.create_course_run_for_modulestore_course_with( + course_id=course_key, + display_name=updated_course_overview.display_name, + language_short=updated_course_overview.language, + ) + + # Keep the CourseRun up to date as the course is edited: + if updated_course_overview.display_name != course_run.display_name: + catalog_api.sync_course_run_details(course_key, display_name=updated_course_overview.display_name) + # If this course is the only run in the CatalogCourse, should we update the display_name of + # the CatalogCourse to match the run's new name? Currently the only way to edit the name of + # a CatalogCourse is via the Django admin. But it's also not used anywhere yet. + + if ( + updated_course_overview.language + and updated_course_overview.language != course_run.catalog_course.language_short + ): + if course_run.catalog_course.runs.count() == 1: + # This is the only run in this CatalogCourse. Update the language of the CatalogCourse + catalog_api.update_catalog_course( + course_run.catalog_course, + language_short=updated_course_overview.language, + ) + else: + LOG.warning( + 'Course run "%s" language "%s" does not match its catalog course language, "%s"', + str(course_key), + updated_course_overview.language, + course_run.catalog_course.language_short, + ) + + # In the future, this will also sync schedule and other metadata to the CourseRun's related models + @receiver(SignalHandler.course_deleted) def _listen_for_course_delete(sender, course_key, **kwargs): # pylint: disable=unused-argument diff --git a/openedx/core/djangoapps/content/course_overviews/tests/test_sync_with_openedx_catalog.py b/openedx/core/djangoapps/content/course_overviews/tests/test_sync_with_openedx_catalog.py new file mode 100644 index 000000000000..5187e5bd1e3d --- /dev/null +++ b/openedx/core/djangoapps/content/course_overviews/tests/test_sync_with_openedx_catalog.py @@ -0,0 +1,146 @@ +""" +Test that changes to courses get synced into the new openedx_catalog models. +""" + +from openedx_catalog import api as catalog_api + +from cms.djangoapps.contentstore.views.course import rerun_course +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.tests.django_utils import ( + TEST_DATA_ONLY_SPLIT_MODULESTORE_DRAFT_PREFERRED, + ModuleStoreTestCase, + ImmediateOnCommitMixin, +) +from xmodule.modulestore.tests.factories import CourseFactory + + +class CourseOverviewSyncTestCase(ImmediateOnCommitMixin, ModuleStoreTestCase): + """ + Test that changes to courses get synced into the new openedx_catalog models. + """ + + MODULESTORE = TEST_DATA_ONLY_SPLIT_MODULESTORE_DRAFT_PREFERRED + ENABLED_SIGNALS = ["course_published"] + + def test_courserun_creation(self) -> None: + """ + Tests that when a course is created, the `CourseRun` record gets created. + + (Also the corresponding `CatalogCourse`.) + """ + course = CourseFactory.create(display_name="Intro to Testing", emit_signals=True) + course_id = course.location.context_key + + run = catalog_api.get_course_run(course_id) + assert run.display_name == "Intro to Testing" + assert run.course_id == course_id + assert run.catalog_course.course_code == course_id.course + assert run.catalog_course.org_code == course_id.org + + def test_courserun_sync(self) -> None: + """ + Tests that when a course is updated, the catalog records get updated. + + Because the "language" of a course cannot be set in Studio before you + create the course, when a Catalog Course has only a single run, we need + to keep the language of the catalog course in sync with any changes to + the language field of the course run. (Because authors necessarily + create a new course with the default language then edit it to have the + correct language that they actually intended to use for that [catalog] + course.) This is in contrast with display_name, which can actually be + set before creating a course. + """ + # Create a course + course = CourseFactory.create(display_name="Intro to Testing", emit_signals=True) + course_id = course.location.context_key + run = catalog_api.get_course_run(course_id) + assert run.display_name == "Intro to Testing" + assert run.catalog_course.language_short == "en" + + # Update the course's display_name and language: + course.language = "es" + course.display_name = "Introducción a las pruebas" + self.store.update_item(course, ModuleStoreEnum.UserID.test) + + # Check if the catalog data is updated: + run.refresh_from_db() + assert run.display_name == "Introducción a las pruebas" + assert run.catalog_course.language_short == "es" + # Note: for now we don't update the display_name of the catalog course after it has been created. + # We _could_ decide to sync the name from run -> catalog course if there is only one run. + assert run.catalog_course.display_name == "Intro to Testing" + + def test_courserun_sync(self) -> None: + """ + Tests that when a course is updated, the catalog records get updated. + + Because the "language" of a course cannot be set in Studio before you + create the course, when a Catalog Course has only a single run, we need + to keep the language of the catalog course in sync with any changes to + the language field of the course run. (Because authors necessarily + create a new course with the default language then edit it to have the + correct language that they actually intended to use for that [catalog] + course.) This is in contrast with display_name, which can actually be + set before creating a course. + """ + # Create a course + course = CourseFactory.create(display_name="Intro to Testing", emit_signals=True) + course_id = course.location.context_key + run = catalog_api.get_course_run(course_id) + assert run.display_name == "Intro to Testing" + assert run.catalog_course.language_short == "en" + + # Update the course's display_name and language: + course.language = "es" + course.display_name = "Introducción a las pruebas" + self.store.update_item(course, ModuleStoreEnum.UserID.test) + + # Check if the catalog data is updated: + run.refresh_from_db() + assert run.display_name == "Introducción a las pruebas" + assert run.catalog_course.language_short == "es" + # Note: for now we don't update the display_name of the catalog course after it has been created. + # We _could_ decide to sync the name from run -> catalog course if there is only one run. + assert run.catalog_course.display_name == "Intro to Testing" + + def test_courserun_of_many_sync(self) -> None: + """ + Tests that when a course is updated, the catalog records get updated, + but if there are several runs of the same course, the changes don't + propagate to the `CatalogCourse` and only affect the `CourseRun. + """ + # Create a course + course = CourseFactory.create(display_name="Intro to Testing", emit_signals=True) + course_id = course.location.context_key + run = catalog_api.get_course_run(course_id) + assert run.display_name == "Intro to Testing" + assert run.catalog_course.language_short == "en" + + # re-run the course: + new_run_course_id = rerun_course( + self.user, + source_course_key=course_id, + org=course_id.org, + number=course_id.course, + run="newRUN", + fields={"display_name": "Intro to Testing TEMPORARY NAME"}, + background=False, + ) + + # Update the re-run's display_name and language: + new_course = self.store.get_course(new_run_course_id) + new_course.language = "es" + new_course.display_name = "Introducción a las pruebas" + self.store.update_item(new_course, self.user.id) + + # Check if the catalog data is updated correctly. + # The original CourseRun object should be unchanged: + run.refresh_from_db() + assert run.display_name == "Intro to Testing" + assert run.catalog_course.language_short == "en" + # The new CourseRun object should be created: + new_run = catalog_api.get_course_run(new_run_course_id) + assert new_run.display_name == "Introducción a las pruebas" + # Changing the language of the second run doesn't affect the lanugage of the overall catalog course (since the + # first run is still in English) + assert new_run.catalog_course.language_short == "en" diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index 05c9a594b7ec..9040d88e51cb 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -808,7 +808,7 @@ openedx-authz==0.22.0 # via -r requirements/edx/kernel.in openedx-calc==4.0.3 # via -r requirements/edx/kernel.in -openedx-core==0.35.0 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/catalog # via # -c requirements/constraints.txt # -r requirements/edx/kernel.in diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index 8e3f7bb8799b..74eea0bf584d 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -1365,7 +1365,7 @@ openedx-calc==4.0.3 # via # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt -openedx-core==0.35.0 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/catalog # via # -c requirements/constraints.txt # -r requirements/edx/doc.txt diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index e46f772e584c..8247101cb401 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -984,7 +984,7 @@ openedx-authz==0.22.0 # via -r requirements/edx/base.txt openedx-calc==4.0.3 # via -r requirements/edx/base.txt -openedx-core==0.35.0 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/catalog # via # -c requirements/constraints.txt # -r requirements/edx/base.txt diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 6cb004cc1798..6c93732f87c6 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -1034,7 +1034,7 @@ openedx-authz==0.22.0 # via -r requirements/edx/base.txt openedx-calc==4.0.3 # via -r requirements/edx/base.txt -openedx-core==0.35.0 +openedx-core @ git+https://github.com/openedx/openedx-core.git@braden/catalog # via # -c requirements/constraints.txt # -r requirements/edx/base.txt