Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion geonode/metadata/handlers/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def _set_error(errors: dict, path: list, msg: str):

@staticmethod
def localize_message(context: dict, msg_code: str, msg_info: dict):
msg_loc: str = labelResolver.gettext(msg_code)
msg_loc: str = labelResolver.gettext(msg_code, lang=context.get("lang", None))
if msg_loc:
tokens = defaultdict(lambda: "N/A", msg_info or {})
return msg_loc.format_map(tokens)
Expand Down
102 changes: 102 additions & 0 deletions geonode/metadata/handlers/meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#########################################################################
#
# Copyright (C) 2026 OSGeo
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#########################################################################

import logging
from html import unescape
import re

from bs4 import BeautifulSoup

from geonode.base.models import ResourceBase
from geonode.metadata.handlers.abstract import MetadataHandler

logger = logging.getLogger(__name__)


class CleanupHandler(MetadataHandler):
_HTML_LIKE_PATTERN = re.compile(r"<\s*/?\s*[a-zA-Z][^>]*>")
_DANGEROUS_TAGS = ("script", "style", "noscript", "iframe", "object", "embed")

@staticmethod
def _preview(value, max_len=120):
text = repr(value)
return text if len(text) <= max_len else f"{text[: max_len - 1]}…"

@classmethod
def _sanitize_string(cls, value: str):
normalized = unescape(value)
if not cls._HTML_LIKE_PATTERN.search(normalized):
return value, False

soup = BeautifulSoup(normalized, "html.parser")
for tag in soup(cls._DANGEROUS_TAGS):
tag.decompose()

sanitized = soup.get_text()
return sanitized, sanitized != value

def _sanitize_instance(self, value, context, errors, path=None):
path = path or []

if isinstance(value, dict):
for key, nested_value in list(value.items()):
nested_path = path + [str(key)]
value[key] = self._sanitize_instance(nested_value, context, errors, nested_path)
return value

if isinstance(value, list):
for idx, nested_value in enumerate(list(value)):
nested_path = path + [f"[{idx}]"]
value[idx] = self._sanitize_instance(nested_value, context, errors, nested_path)
return value

if isinstance(value, str):
sanitized, changed = self._sanitize_string(value)
if changed:
logger.warning(
"Sanitized potentially unsafe metadata field '%s': %s -> %s",
".".join(path) if path else "ROOT",
self._preview(value),
self._preview(sanitized),
)
self._set_error(
errors,
path[0:1], # set error on root field
self.localize_message(context, "metadata_error_sanitized", {}),
)
return sanitized

return value

def update_schema(self, jsonschema: dict, context: dict, lang=None):
return jsonschema

def get_jsonschema_instance(
self, resource: ResourceBase, field_name: str, context: dict, errors: dict, lang: str = None
):
pass

def update_resource(
self, resource: ResourceBase, field_name: str, json_instance: dict, context: dict, errors: dict, **kwargs
):
pass

def pre_deserialization(self, resource, jsonschema: dict, instance: dict, partial: set, context: dict):
errors = context["errors"]

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

Accessing context["errors"] directly will raise a KeyError if the "errors" key is not present in the context dictionary. This is particularly problematic because the newly added unit tests pass context={}. Using context.setdefault("errors", {}) ensures that the dictionary is safely initialized if missing.

Suggested change
errors = context["errors"]
errors = context.setdefault("errors", {})

self._sanitize_instance(instance, context, errors)
Comment on lines +100 to +102
3 changes: 2 additions & 1 deletion geonode/metadata/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def post_init(self):
handler.post_init()

def _init_schema_context(self, lang):
return {}
return {"lang": lang} if lang else {}

def build_schema(self, lang=None):
logger.debug(f"build_schema {lang}")
Expand Down Expand Up @@ -144,6 +144,7 @@ def update_schema_instance(self, resource, request_obj, lang=None, partial=None)
handler.load_deserialization_context(resource, schema, context)

errors = {}
context["errors"] = errors

for handler in self.handlers.values():
handler.pre_deserialization(resource, schema, json_instance, partial, context)
Expand Down
1 change: 1 addition & 0 deletions geonode/metadata/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
JSONSCHEMA_BASE = os.path.join(PROJECT_ROOT, "metadata/schemas/base.json")

METADATA_HANDLERS = {
"metadata_cleaner": "geonode.metadata.handlers.meta.CleanupHandler",
"base": "geonode.metadata.handlers.base.BaseHandler",
"thesaurus": "geonode.metadata.handlers.thesaurus.TKeywordsHandler",
"hkeyword": "geonode.metadata.handlers.hkeyword.HKeywordHandler",
Expand Down
62 changes: 60 additions & 2 deletions geonode/metadata/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,16 @@

from django.urls import reverse
from django.contrib.auth import get_user_model
from django.test import RequestFactory
from django.test import RequestFactory, TestCase, override_settings
from rest_framework import status
from django.utils.translation import gettext as _

from rest_framework.test import APITestCase

from geonode.metadata.handlers.multilang import MultiLangHandler
from geonode.metadata.settings import MODEL_SCHEMA
from geonode.metadata.manager import metadata_manager, CACHE_KEY_SCHEMA
from geonode.metadata.handlers.meta import CleanupHandler
from geonode.metadata.api.views import (
ProfileAutocomplete,
MetadataLinkedResourcesAutocomplete,
Expand Down Expand Up @@ -919,7 +922,7 @@ def test_update_schema_instance_no_errors(self, mock_get_schema):
mock_request.data = {"field1": "new_value1", "new_field2": "new_value2"}
mock_request.user = self.test_user_1

expected_context = {"user": self.test_user_1}
expected_context = {"user": self.test_user_1, "errors": {}}

mock_get_schema.return_value = self.fake_schema

Expand Down Expand Up @@ -1151,3 +1154,58 @@ def test_delete_schema_conflict_returns_409(self, mock_get_schema):
url = self._url(self.resource.pk, "title")
response = self.client.delete(url)
self.assertEqual(response.status_code, status.HTTP_409_CONFLICT)


class CleanupHandlerTests(TestCase):
def setUp(self):
self.handler = CleanupHandler()
self.owner = get_user_model().objects.create_user(
"cleanup_owner", "cleanup_owner@fakemail.com", "cleanup_owner_password", is_active=True
)
self.resource = ResourceBase.objects.create(title="Cleanup Test Resource", uuid=str(uuid4()), owner=self.owner)

@override_settings(LANGUAGE_CODE="en")
def test_pre_deserialization_sanitizes_nested_values_and_logs_warnings(self):
instance = {
"title": "<i>xss</i><img src=/ onerror=\"alert('XSS');\" />",
"details": {
"summary": "plain text",
"body": "<script>alert(1)</script>safe",
},
"items": ["ok", "<b>bad</b>"],
"count": 3,
}

with self.assertLogs("geonode.metadata.handlers.meta", level="WARNING") as cm:
context = {"errors": {}}
self.handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context)

self.assertEqual(instance["title"], "xss")
self.assertEqual(instance["details"]["body"], "safe")
self.assertEqual(instance["items"][1], "bad")
self.assertEqual(instance["count"], 3)

logs = "\n".join(cm.output)
self.assertIn("Sanitized potentially unsafe metadata field 'title'", logs)
self.assertIn("Sanitized potentially unsafe metadata field 'details.body'", logs)
self.assertIn("Sanitized potentially unsafe metadata field 'items.[1]'", logs)

self.assertIn("title", context["errors"])
self.assertIn("__errors", context["errors"]["title"])
self.assertIn("metadata_error_sanitized", context["errors"]["title"]["__errors"])

@override_settings(LANGUAGE_CODE="en", MULTILANG_FIELDS=["title"])
def test_pre_deserialization_copies_sanitized_default_lang_value(self):
instance = {
"title_multilang_en": '<span>Hello</span><img src=x onerror="alert(1)" />',
}
context = {"errors": {}}

ml_handler = MultiLangHandler()
with self.assertLogs("geonode.metadata.handlers.meta", level="WARNING") as cm:
self.handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context)
ml_handler.pre_deserialization(self.resource, {}, instance, partial=set(), context=context)

self.assertEqual(instance["title_multilang_en"], "Hello")
self.assertEqual(instance["title"], "Hello")
self.assertIn("Sanitized potentially unsafe metadata field 'title_multilang_en'", "\n".join(cm.output))
Comment on lines +1209 to +1211

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The assertion self.assertEqual(instance["title"], "Hello") will fail with a KeyError: 'title'. The CleanupHandler only sanitizes fields in-place and does not copy multi-language fields (like title_multilang_en) to their default fields (like title). Since this test runs CleanupHandler in isolation rather than through the full metadata_manager (which would run MultiLangHandler), the "title" key is never created in instance.

Suggested change
self.assertEqual(instance["title_multilang_en"], "Hello")
self.assertEqual(instance["title"], "Hello")
self.assertIn("Sanitized potentially unsafe metadata field 'title_multilang_en'", "\n".join(cm.output))
self.assertEqual(instance["title_multilang_en"], "Hello")
self.assertIn("Sanitized potentially unsafe metadata field 'title_multilang_en'", "\n".join(cm.output))

70 changes: 70 additions & 0 deletions geonode/metadata/thesauri/labels-i18n.metadata.rdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<?xml version='1.0' encoding='UTF-8'?>
<rdf:RDF xmlns="http://www.w3.org/2004/02/skos/core#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/">
<ConceptScheme rdf:about="https://i18n.geonode.org">
<dc:title>Localizzazione labels</dc:title>
<dc:title xml:lang="en">Labels localization</dc:title>
<dc:title xml:lang="it">Localizzazione labels</dc:title>
Comment on lines +4 to +6
<dcterms:issued>2026-06-10T16:54:03</dcterms:issued>
<dcterms:modified>2026-06-10T16:54:03</dcterms:modified>
</ConceptScheme>
<Concept rdf:about="metadata_error_empty_field">
<inScheme rdf:resource="https://i18n.geonode.org"/>
<altLabel>metadata_error_empty_field fieldname:{fieldname}</altLabel>
<prefLabel xml:lang="en">Missing value</prefLabel>
<prefLabel xml:lang="it">Valore richiesto</prefLabel>
</Concept>
<Concept rdf:about="metadata_error_indexing">
<inScheme rdf:resource="https://i18n.geonode.org"/>
<altLabel>metadata_error_indexing exc:{exc}</altLabel>
<prefLabel xml:lang="en">Error while indexing metadata: {exc}</prefLabel>
<prefLabel xml:lang="it">Errore nell'indicizzazione dei metadati: {exc}</prefLabel>
</Concept>
<Concept rdf:about="metadata_error_post_save">
<inScheme rdf:resource="https://i18n.geonode.org"/>
<altLabel>metadata_error_post_save handler:{handler} exc:{exc}</altLabel>
<prefLabel xml:lang="en">Error in post-save procedure: {exc}</prefLabel>
<prefLabel xml:lang="it">Errore nella procedura di post-save: {exc}</prefLabel>
</Concept>
<Concept rdf:about="metadata_error_pre_save">
<inScheme rdf:resource="https://i18n.geonode.org"/>
<altLabel>metadata_error_pre_save handler:{handler} exc:{exc}</altLabel>
<prefLabel xml:lang="en">Error in pre-save procedure: {exc}</prefLabel>
<prefLabel xml:lang="it">Errore nella procedura di pre-save: {exc}</prefLabel>
</Concept>
<Concept rdf:about="metadata_error_sanitized">
<inScheme rdf:resource="https://i18n.geonode.org"/>
<altLabel>metadata_error_sanitized</altLabel>
<prefLabel xml:lang="en">WARNING: possible injection attempt, this field has been sanitized. Reload this page.</prefLabel>
<prefLabel xml:lang="it">ATTENZIONE: possibile injection, il campo è stato modificato. Ricaricare la pagina.</prefLabel>
</Concept>
<Concept rdf:about="metadata_error_save">
<inScheme rdf:resource="https://i18n.geonode.org"/>
<altLabel>metadata_error_save: {exc}</altLabel>
<prefLabel xml:lang="en">Error while saving metadata: {exc}</prefLabel>
<prefLabel xml:lang="it">Errore nel salvataggio dei metadati: {exc}</prefLabel>
</Concept>
<Concept rdf:about="metadata_error_store">
<inScheme rdf:resource="https://i18n.geonode.org"/>
<altLabel>metadata_error_store fieldname:{fieldname} exc:{exc}</altLabel>
<prefLabel xml:lang="en">Error while saving metadata: {exc}</prefLabel>
<prefLabel xml:lang="it">Errore nel salvataggio dei metadati: {exc}</prefLabel>
</Concept>
<Concept rdf:about="metadata_error_update">
<inScheme rdf:resource="https://i18n.geonode.org"/>
<altLabel>metadata_error_update fieldname:{fieldname} handler:{handler} exc:{exc}</altLabel>
<prefLabel xml:lang="en">Error while updating metadata: {exc}</prefLabel>
<prefLabel xml:lang="it">Errore nell'aggiornamento dei metadati: {exc}</prefLabel>
</Concept>
<Concept rdf:about="metadata_sparse_error_parse">
<inScheme rdf:resource="https://i18n.geonode.org"/>
<altLabel>metadata_sparse_error_parse fieldname:{fieldname} type:{type} value:{value}</altLabel>
<prefLabel xml:lang="en">Parsing error</prefLabel>
<prefLabel xml:lang="it">Errore nel parsing</prefLabel>
</Concept>
<Concept rdf:about="metadata_sparse_error_type">
<inScheme rdf:resource="https://i18n.geonode.org"/>
<altLabel>metadata_sparse_error_type fieldname:{fieldname} type:{type}</altLabel>
<prefLabel xml:lang="en">Unexpected field type: {type}</prefLabel>
<prefLabel xml:lang="it">Tipo inaspettato: {type}</prefLabel>
</Concept>
</rdf:RDF>
Loading