diff --git a/geonode/metadata/handlers/abstract.py b/geonode/metadata/handlers/abstract.py index 19e4f9d55f8..74d0dce3f97 100644 --- a/geonode/metadata/handlers/abstract.py +++ b/geonode/metadata/handlers/abstract.py @@ -160,7 +160,7 @@ def _set_error(errors: dict, path: list, msg: str): @staticmethod def localize_message(context: dict, msg_code: str, msg_info: dict): - msg_loc: str = labelResolver.gettext(msg_code) + msg_loc: str = labelResolver.gettext(msg_code, lang=context.get("lang", None)) if msg_loc: tokens = defaultdict(lambda: "N/A", msg_info or {}) return msg_loc.format_map(tokens) diff --git a/geonode/metadata/handlers/meta.py b/geonode/metadata/handlers/meta.py new file mode 100644 index 00000000000..fd3d42120af --- /dev/null +++ b/geonode/metadata/handlers/meta.py @@ -0,0 +1,102 @@ +######################################################################### +# +# Copyright (C) 2026 OSGeo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +######################################################################### + +import logging +from html import unescape +import re + +from bs4 import BeautifulSoup + +from geonode.base.models import ResourceBase +from geonode.metadata.handlers.abstract import MetadataHandler + +logger = logging.getLogger(__name__) + + +class CleanupHandler(MetadataHandler): + _HTML_LIKE_PATTERN = re.compile(r"<\s*/?\s*[a-zA-Z][^>]*>") + _DANGEROUS_TAGS = ("script", "style", "noscript", "iframe", "object", "embed") + + @staticmethod + def _preview(value, max_len=120): + text = repr(value) + return text if len(text) <= max_len else f"{text[: max_len - 1]}…" + + @classmethod + def _sanitize_string(cls, value: str): + normalized = unescape(value) + if not cls._HTML_LIKE_PATTERN.search(normalized): + return value, False + + soup = BeautifulSoup(normalized, "html.parser") + for tag in soup(cls._DANGEROUS_TAGS): + tag.decompose() + + sanitized = soup.get_text() + return sanitized, sanitized != value + + def _sanitize_instance(self, value, context, errors, path=None): + path = path or [] + + if isinstance(value, dict): + for key, nested_value in list(value.items()): + nested_path = path + [str(key)] + value[key] = self._sanitize_instance(nested_value, context, errors, nested_path) + return value + + if isinstance(value, list): + for idx, nested_value in enumerate(list(value)): + nested_path = path + [f"[{idx}]"] + value[idx] = self._sanitize_instance(nested_value, context, errors, nested_path) + return value + + if isinstance(value, str): + sanitized, changed = self._sanitize_string(value) + if changed: + logger.warning( + "Sanitized potentially unsafe metadata field '%s': %s -> %s", + ".".join(path) if path else "ROOT", + self._preview(value), + self._preview(sanitized), + ) + self._set_error( + errors, + path[0:1], # set error on root field + self.localize_message(context, "metadata_error_sanitized", {}), + ) + return sanitized + + return value + + def update_schema(self, jsonschema: dict, context: dict, lang=None): + return jsonschema + + def get_jsonschema_instance( + self, resource: ResourceBase, field_name: str, context: dict, errors: dict, lang: str = None + ): + pass + + def update_resource( + self, resource: ResourceBase, field_name: str, json_instance: dict, context: dict, errors: dict, **kwargs + ): + pass + + def pre_deserialization(self, resource, jsonschema: dict, instance: dict, partial: set, context: dict): + errors = context["errors"] + self._sanitize_instance(instance, context, errors) diff --git a/geonode/metadata/manager.py b/geonode/metadata/manager.py index 51081a66ab3..c973f082dfc 100644 --- a/geonode/metadata/manager.py +++ b/geonode/metadata/manager.py @@ -59,7 +59,7 @@ def post_init(self): handler.post_init() def _init_schema_context(self, lang): - return {} + return {"lang": lang} if lang else {} def build_schema(self, lang=None): logger.debug(f"build_schema {lang}") @@ -144,6 +144,7 @@ def update_schema_instance(self, resource, request_obj, lang=None, partial=None) handler.load_deserialization_context(resource, schema, context) errors = {} + context["errors"] = errors for handler in self.handlers.values(): handler.pre_deserialization(resource, schema, json_instance, partial, context) diff --git a/geonode/metadata/settings.py b/geonode/metadata/settings.py index f5a20ac2845..b1f4bee797f 100644 --- a/geonode/metadata/settings.py +++ b/geonode/metadata/settings.py @@ -13,6 +13,7 @@ JSONSCHEMA_BASE = os.path.join(PROJECT_ROOT, "metadata/schemas/base.json") METADATA_HANDLERS = { + "metadata_cleaner": "geonode.metadata.handlers.meta.CleanupHandler", "base": "geonode.metadata.handlers.base.BaseHandler", "thesaurus": "geonode.metadata.handlers.thesaurus.TKeywordsHandler", "hkeyword": "geonode.metadata.handlers.hkeyword.HKeywordHandler", diff --git a/geonode/metadata/tests/tests.py b/geonode/metadata/tests/tests.py index 4f82dd55bf1..a0e9649b3c9 100644 --- a/geonode/metadata/tests/tests.py +++ b/geonode/metadata/tests/tests.py @@ -25,13 +25,16 @@ from django.urls import reverse from django.contrib.auth import get_user_model -from django.test import RequestFactory +from django.test import RequestFactory, TestCase, override_settings from rest_framework import status from django.utils.translation import gettext as _ from rest_framework.test import APITestCase + +from geonode.metadata.handlers.multilang import MultiLangHandler from geonode.metadata.settings import MODEL_SCHEMA from geonode.metadata.manager import metadata_manager, CACHE_KEY_SCHEMA +from geonode.metadata.handlers.meta import CleanupHandler from geonode.metadata.api.views import ( ProfileAutocomplete, MetadataLinkedResourcesAutocomplete, @@ -919,7 +922,7 @@ def test_update_schema_instance_no_errors(self, mock_get_schema): mock_request.data = {"field1": "new_value1", "new_field2": "new_value2"} mock_request.user = self.test_user_1 - expected_context = {"user": self.test_user_1} + expected_context = {"user": self.test_user_1, "errors": {}} mock_get_schema.return_value = self.fake_schema @@ -1151,3 +1154,58 @@ def test_delete_schema_conflict_returns_409(self, mock_get_schema): url = self._url(self.resource.pk, "title") response = self.client.delete(url) self.assertEqual(response.status_code, status.HTTP_409_CONFLICT) + + +class CleanupHandlerTests(TestCase): + def setUp(self): + self.handler = CleanupHandler() + self.owner = get_user_model().objects.create_user( + "cleanup_owner", "cleanup_owner@fakemail.com", "cleanup_owner_password", is_active=True + ) + self.resource = ResourceBase.objects.create(title="Cleanup Test Resource", uuid=str(uuid4()), owner=self.owner) + + @override_settings(LANGUAGE_CODE="en") + def test_pre_deserialization_sanitizes_nested_values_and_logs_warnings(self): + instance = { + "title": "xss", + "details": { + "summary": "plain text", + "body": "safe", + }, + "items": ["ok", "bad"], + "count": 3, + } + + with self.assertLogs("geonode.metadata.handlers.meta", level="WARNING") as cm: + context = {"errors": {}} + self.handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context) + + self.assertEqual(instance["title"], "xss") + self.assertEqual(instance["details"]["body"], "safe") + self.assertEqual(instance["items"][1], "bad") + self.assertEqual(instance["count"], 3) + + logs = "\n".join(cm.output) + self.assertIn("Sanitized potentially unsafe metadata field 'title'", logs) + self.assertIn("Sanitized potentially unsafe metadata field 'details.body'", logs) + self.assertIn("Sanitized potentially unsafe metadata field 'items.[1]'", logs) + + self.assertIn("title", context["errors"]) + self.assertIn("__errors", context["errors"]["title"]) + self.assertIn("metadata_error_sanitized", context["errors"]["title"]["__errors"]) + + @override_settings(LANGUAGE_CODE="en", MULTILANG_FIELDS=["title"]) + def test_pre_deserialization_copies_sanitized_default_lang_value(self): + instance = { + "title_multilang_en": 'Hello', + } + context = {"errors": {}} + + ml_handler = MultiLangHandler() + with self.assertLogs("geonode.metadata.handlers.meta", level="WARNING") as cm: + self.handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context) + ml_handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context) + + self.assertEqual(instance["title_multilang_en"], "Hello") + self.assertEqual(instance["title"], "Hello") + self.assertIn("Sanitized potentially unsafe metadata field 'title_multilang_en'", "\n".join(cm.output)) diff --git a/geonode/metadata/thesauri/labels-i18n.metadata.rdf b/geonode/metadata/thesauri/labels-i18n.metadata.rdf new file mode 100644 index 00000000000..dd929a007c9 --- /dev/null +++ b/geonode/metadata/thesauri/labels-i18n.metadata.rdf @@ -0,0 +1,70 @@ + + + + Localizzazione labels + Labels localization + Localizzazione labels + 2026-06-10T16:54:03 + 2026-06-10T16:54:03 + + + + metadata_error_empty_field fieldname:{fieldname} + Missing value + Valore richiesto + + + + metadata_error_indexing exc:{exc} + Error while indexing metadata: {exc} + Errore nell'indicizzazione dei metadati: {exc} + + + + metadata_error_post_save handler:{handler} exc:{exc} + Error in post-save procedure: {exc} + Errore nella procedura di post-save: {exc} + + + + metadata_error_pre_save handler:{handler} exc:{exc} + Error in pre-save procedure: {exc} + Errore nella procedura di pre-save: {exc} + + + + metadata_error_sanitized + WARNING: possible injection attempt, this field has been sanitized. Reload this page. + ATTENZIONE: possibile injection, il campo è stato modificato. Ricaricare la pagina. + + + + metadata_error_save: {exc} + Error while saving metadata: {exc} + Errore nel salvataggio dei metadati: {exc} + + + + metadata_error_store fieldname:{fieldname} exc:{exc} + Error while saving metadata: {exc} + Errore nel salvataggio dei metadati: {exc} + + + + metadata_error_update fieldname:{fieldname} handler:{handler} exc:{exc} + Error while updating metadata: {exc} + Errore nell'aggiornamento dei metadati: {exc} + + + + metadata_sparse_error_parse fieldname:{fieldname} type:{type} value:{value} + Parsing error + Errore nel parsing + + + + metadata_sparse_error_type fieldname:{fieldname} type:{type} + Unexpected field type: {type} + Tipo inaspettato: {type} + +