-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Sanitize metadata input #14322
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Sanitize metadata input #14322
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,102 @@ | ||
| ######################################################################### | ||
| # | ||
| # Copyright (C) 2026 OSGeo | ||
| # | ||
| # This program is free software: you can redistribute it and/or modify | ||
| # it under the terms of the GNU General Public License as published by | ||
| # the Free Software Foundation, either version 3 of the License, or | ||
| # (at your option) any later version. | ||
| # | ||
| # This program is distributed in the hope that it will be useful, | ||
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| # GNU General Public License for more details. | ||
| # | ||
| # You should have received a copy of the GNU General Public License | ||
| # along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| # | ||
| ######################################################################### | ||
|
|
||
| import logging | ||
| from html import unescape | ||
| import re | ||
|
|
||
| from bs4 import BeautifulSoup | ||
|
|
||
| from geonode.base.models import ResourceBase | ||
| from geonode.metadata.handlers.abstract import MetadataHandler | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| class CleanupHandler(MetadataHandler): | ||
| _HTML_LIKE_PATTERN = re.compile(r"<\s*/?\s*[a-zA-Z][^>]*>") | ||
| _DANGEROUS_TAGS = ("script", "style", "noscript", "iframe", "object", "embed") | ||
|
|
||
| @staticmethod | ||
| def _preview(value, max_len=120): | ||
| text = repr(value) | ||
| return text if len(text) <= max_len else f"{text[: max_len - 1]}…" | ||
|
|
||
| @classmethod | ||
| def _sanitize_string(cls, value: str): | ||
| normalized = unescape(value) | ||
| if not cls._HTML_LIKE_PATTERN.search(normalized): | ||
| return value, False | ||
|
|
||
| soup = BeautifulSoup(normalized, "html.parser") | ||
| for tag in soup(cls._DANGEROUS_TAGS): | ||
| tag.decompose() | ||
|
|
||
| sanitized = soup.get_text() | ||
| return sanitized, sanitized != value | ||
|
|
||
| def _sanitize_instance(self, value, context, errors, path=None): | ||
| path = path or [] | ||
|
|
||
| if isinstance(value, dict): | ||
| for key, nested_value in list(value.items()): | ||
| nested_path = path + [str(key)] | ||
| value[key] = self._sanitize_instance(nested_value, context, errors, nested_path) | ||
| return value | ||
|
|
||
| if isinstance(value, list): | ||
| for idx, nested_value in enumerate(list(value)): | ||
| nested_path = path + [f"[{idx}]"] | ||
| value[idx] = self._sanitize_instance(nested_value, context, errors, nested_path) | ||
| return value | ||
|
|
||
| if isinstance(value, str): | ||
| sanitized, changed = self._sanitize_string(value) | ||
| if changed: | ||
| logger.warning( | ||
| "Sanitized potentially unsafe metadata field '%s': %s -> %s", | ||
| ".".join(path) if path else "ROOT", | ||
| self._preview(value), | ||
| self._preview(sanitized), | ||
| ) | ||
| self._set_error( | ||
| errors, | ||
| path[0:1], # set error on root field | ||
| self.localize_message(context, "metadata_error_sanitized", {}), | ||
| ) | ||
| return sanitized | ||
|
|
||
| return value | ||
|
|
||
| def update_schema(self, jsonschema: dict, context: dict, lang=None): | ||
| return jsonschema | ||
|
|
||
| def get_jsonschema_instance( | ||
| self, resource: ResourceBase, field_name: str, context: dict, errors: dict, lang: str = None | ||
| ): | ||
| pass | ||
|
|
||
| def update_resource( | ||
| self, resource: ResourceBase, field_name: str, json_instance: dict, context: dict, errors: dict, **kwargs | ||
| ): | ||
| pass | ||
|
|
||
| def pre_deserialization(self, resource, jsonschema: dict, instance: dict, partial: set, context: dict): | ||
| errors = context["errors"] | ||
| self._sanitize_instance(instance, context, errors) | ||
|
Comment on lines
+100
to
+102
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -25,13 +25,16 @@ | |||||||||||
|
|
||||||||||||
| from django.urls import reverse | ||||||||||||
| from django.contrib.auth import get_user_model | ||||||||||||
| from django.test import RequestFactory | ||||||||||||
| from django.test import RequestFactory, TestCase, override_settings | ||||||||||||
| from rest_framework import status | ||||||||||||
| from django.utils.translation import gettext as _ | ||||||||||||
|
|
||||||||||||
| from rest_framework.test import APITestCase | ||||||||||||
|
|
||||||||||||
| from geonode.metadata.handlers.multilang import MultiLangHandler | ||||||||||||
| from geonode.metadata.settings import MODEL_SCHEMA | ||||||||||||
| from geonode.metadata.manager import metadata_manager, CACHE_KEY_SCHEMA | ||||||||||||
| from geonode.metadata.handlers.meta import CleanupHandler | ||||||||||||
| from geonode.metadata.api.views import ( | ||||||||||||
| ProfileAutocomplete, | ||||||||||||
| MetadataLinkedResourcesAutocomplete, | ||||||||||||
|
|
@@ -919,7 +922,7 @@ def test_update_schema_instance_no_errors(self, mock_get_schema): | |||||||||||
| mock_request.data = {"field1": "new_value1", "new_field2": "new_value2"} | ||||||||||||
| mock_request.user = self.test_user_1 | ||||||||||||
|
|
||||||||||||
| expected_context = {"user": self.test_user_1} | ||||||||||||
| expected_context = {"user": self.test_user_1, "errors": {}} | ||||||||||||
|
|
||||||||||||
| mock_get_schema.return_value = self.fake_schema | ||||||||||||
|
|
||||||||||||
|
|
@@ -1151,3 +1154,58 @@ def test_delete_schema_conflict_returns_409(self, mock_get_schema): | |||||||||||
| url = self._url(self.resource.pk, "title") | ||||||||||||
| response = self.client.delete(url) | ||||||||||||
| self.assertEqual(response.status_code, status.HTTP_409_CONFLICT) | ||||||||||||
|
|
||||||||||||
|
|
||||||||||||
| class CleanupHandlerTests(TestCase): | ||||||||||||
| def setUp(self): | ||||||||||||
| self.handler = CleanupHandler() | ||||||||||||
| self.owner = get_user_model().objects.create_user( | ||||||||||||
| "cleanup_owner", "cleanup_owner@fakemail.com", "cleanup_owner_password", is_active=True | ||||||||||||
| ) | ||||||||||||
| self.resource = ResourceBase.objects.create(title="Cleanup Test Resource", uuid=str(uuid4()), owner=self.owner) | ||||||||||||
|
|
||||||||||||
| @override_settings(LANGUAGE_CODE="en") | ||||||||||||
| def test_pre_deserialization_sanitizes_nested_values_and_logs_warnings(self): | ||||||||||||
| instance = { | ||||||||||||
| "title": "<i>xss</i><img src=/ onerror=\"alert('XSS');\" />", | ||||||||||||
| "details": { | ||||||||||||
| "summary": "plain text", | ||||||||||||
| "body": "<script>alert(1)</script>safe", | ||||||||||||
| }, | ||||||||||||
| "items": ["ok", "<b>bad</b>"], | ||||||||||||
| "count": 3, | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| with self.assertLogs("geonode.metadata.handlers.meta", level="WARNING") as cm: | ||||||||||||
| context = {"errors": {}} | ||||||||||||
| self.handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context) | ||||||||||||
|
|
||||||||||||
| self.assertEqual(instance["title"], "xss") | ||||||||||||
| self.assertEqual(instance["details"]["body"], "safe") | ||||||||||||
| self.assertEqual(instance["items"][1], "bad") | ||||||||||||
| self.assertEqual(instance["count"], 3) | ||||||||||||
|
|
||||||||||||
| logs = "\n".join(cm.output) | ||||||||||||
| self.assertIn("Sanitized potentially unsafe metadata field 'title'", logs) | ||||||||||||
| self.assertIn("Sanitized potentially unsafe metadata field 'details.body'", logs) | ||||||||||||
| self.assertIn("Sanitized potentially unsafe metadata field 'items.[1]'", logs) | ||||||||||||
|
|
||||||||||||
| self.assertIn("title", context["errors"]) | ||||||||||||
| self.assertIn("__errors", context["errors"]["title"]) | ||||||||||||
| self.assertIn("metadata_error_sanitized", context["errors"]["title"]["__errors"]) | ||||||||||||
|
|
||||||||||||
| @override_settings(LANGUAGE_CODE="en", MULTILANG_FIELDS=["title"]) | ||||||||||||
| def test_pre_deserialization_copies_sanitized_default_lang_value(self): | ||||||||||||
| instance = { | ||||||||||||
| "title_multilang_en": '<span>Hello</span><img src=x onerror="alert(1)" />', | ||||||||||||
| } | ||||||||||||
| context = {"errors": {}} | ||||||||||||
|
|
||||||||||||
| ml_handler = MultiLangHandler() | ||||||||||||
| with self.assertLogs("geonode.metadata.handlers.meta", level="WARNING") as cm: | ||||||||||||
| self.handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context) | ||||||||||||
| ml_handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context) | ||||||||||||
|
|
||||||||||||
| self.assertEqual(instance["title_multilang_en"], "Hello") | ||||||||||||
| self.assertEqual(instance["title"], "Hello") | ||||||||||||
| self.assertIn("Sanitized potentially unsafe metadata field 'title_multilang_en'", "\n".join(cm.output)) | ||||||||||||
|
Comment on lines
+1209
to
+1211
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The assertion
Suggested change
|
||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| <?xml version='1.0' encoding='UTF-8'?> | ||
| <rdf:RDF xmlns="http://www.w3.org/2004/02/skos/core#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/"> | ||
| <ConceptScheme rdf:about="https://i18n.geonode.org"> | ||
| <dc:title>Localizzazione labels</dc:title> | ||
| <dc:title xml:lang="en">Labels localization</dc:title> | ||
| <dc:title xml:lang="it">Localizzazione labels</dc:title> | ||
|
Comment on lines
+4
to
+6
|
||
| <dcterms:issued>2026-06-10T16:54:03</dcterms:issued> | ||
| <dcterms:modified>2026-06-10T16:54:03</dcterms:modified> | ||
| </ConceptScheme> | ||
| <Concept rdf:about="metadata_error_empty_field"> | ||
| <inScheme rdf:resource="https://i18n.geonode.org"/> | ||
| <altLabel>metadata_error_empty_field fieldname:{fieldname}</altLabel> | ||
| <prefLabel xml:lang="en">Missing value</prefLabel> | ||
| <prefLabel xml:lang="it">Valore richiesto</prefLabel> | ||
| </Concept> | ||
| <Concept rdf:about="metadata_error_indexing"> | ||
| <inScheme rdf:resource="https://i18n.geonode.org"/> | ||
| <altLabel>metadata_error_indexing exc:{exc}</altLabel> | ||
| <prefLabel xml:lang="en">Error while indexing metadata: {exc}</prefLabel> | ||
| <prefLabel xml:lang="it">Errore nell'indicizzazione dei metadati: {exc}</prefLabel> | ||
| </Concept> | ||
| <Concept rdf:about="metadata_error_post_save"> | ||
| <inScheme rdf:resource="https://i18n.geonode.org"/> | ||
| <altLabel>metadata_error_post_save handler:{handler} exc:{exc}</altLabel> | ||
| <prefLabel xml:lang="en">Error in post-save procedure: {exc}</prefLabel> | ||
| <prefLabel xml:lang="it">Errore nella procedura di post-save: {exc}</prefLabel> | ||
| </Concept> | ||
| <Concept rdf:about="metadata_error_pre_save"> | ||
| <inScheme rdf:resource="https://i18n.geonode.org"/> | ||
| <altLabel>metadata_error_pre_save handler:{handler} exc:{exc}</altLabel> | ||
| <prefLabel xml:lang="en">Error in pre-save procedure: {exc}</prefLabel> | ||
| <prefLabel xml:lang="it">Errore nella procedura di pre-save: {exc}</prefLabel> | ||
| </Concept> | ||
| <Concept rdf:about="metadata_error_sanitized"> | ||
| <inScheme rdf:resource="https://i18n.geonode.org"/> | ||
| <altLabel>metadata_error_sanitized</altLabel> | ||
|
|
||
| <prefLabel xml:lang="en">WARNING: possible injection attempt, this field has been sanitized. Reload this page.</prefLabel> | ||
| <prefLabel xml:lang="it">ATTENZIONE: possibile injection, il campo è stato modificato. Ricaricare la pagina.</prefLabel> | ||
| </Concept> | ||
| <Concept rdf:about="metadata_error_save"> | ||
| <inScheme rdf:resource="https://i18n.geonode.org"/> | ||
| <altLabel>metadata_error_save: {exc}</altLabel> | ||
| <prefLabel xml:lang="en">Error while saving metadata: {exc}</prefLabel> | ||
| <prefLabel xml:lang="it">Errore nel salvataggio dei metadati: {exc}</prefLabel> | ||
| </Concept> | ||
| <Concept rdf:about="metadata_error_store"> | ||
| <inScheme rdf:resource="https://i18n.geonode.org"/> | ||
| <altLabel>metadata_error_store fieldname:{fieldname} exc:{exc}</altLabel> | ||
| <prefLabel xml:lang="en">Error while saving metadata: {exc}</prefLabel> | ||
| <prefLabel xml:lang="it">Errore nel salvataggio dei metadati: {exc}</prefLabel> | ||
| </Concept> | ||
| <Concept rdf:about="metadata_error_update"> | ||
| <inScheme rdf:resource="https://i18n.geonode.org"/> | ||
| <altLabel>metadata_error_update fieldname:{fieldname} handler:{handler} exc:{exc}</altLabel> | ||
| <prefLabel xml:lang="en">Error while updating metadata: {exc}</prefLabel> | ||
| <prefLabel xml:lang="it">Errore nell'aggiornamento dei metadati: {exc}</prefLabel> | ||
| </Concept> | ||
| <Concept rdf:about="metadata_sparse_error_parse"> | ||
| <inScheme rdf:resource="https://i18n.geonode.org"/> | ||
| <altLabel>metadata_sparse_error_parse fieldname:{fieldname} type:{type} value:{value}</altLabel> | ||
| <prefLabel xml:lang="en">Parsing error</prefLabel> | ||
| <prefLabel xml:lang="it">Errore nel parsing</prefLabel> | ||
| </Concept> | ||
| <Concept rdf:about="metadata_sparse_error_type"> | ||
| <inScheme rdf:resource="https://i18n.geonode.org"/> | ||
| <altLabel>metadata_sparse_error_type fieldname:{fieldname} type:{type}</altLabel> | ||
| <prefLabel xml:lang="en">Unexpected field type: {type}</prefLabel> | ||
| <prefLabel xml:lang="it">Tipo inaspettato: {type}</prefLabel> | ||
| </Concept> | ||
| </rdf:RDF> | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Accessing
context["errors"]directly will raise aKeyErrorif the"errors"key is not present in thecontextdictionary. This is particularly problematic because the newly added unit tests passcontext={}. Usingcontext.setdefault("errors", {})ensures that the dictionary is safely initialized if missing.