Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]
### Added
- JPEG params in `params.py`
- `clear_jpeg_metadata` function in `functions.py`
- `clear_png_metadata` function in `functions.py`
- `extract_metadata` function in `functions.py`
### Changed
Expand Down
66 changes: 65 additions & 1 deletion dmeta/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
from .errors import DMetaBaseError
from .util import get_microsoft_format, extract, read_json
from .params import CORE_XML_MAP, APP_XML_MAP, OVERVIEW, DMETA_VERSION, \
UPDATE_COMMAND_WITH_NO_CONFIG_FILE_ERROR, SUPPORTED_MICROSOFT_FORMATS
UPDATE_COMMAND_WITH_NO_CONFIG_FILE_ERROR, SUPPORTED_MICROSOFT_FORMATS, \
JPEG_MARKER_PREFIX, JPEG_SOI, JPEG_EOI, JPEG_SOS, JPEG_COM, \
JPEG_APP_FIRST, JPEG_APP_LAST, JPEG_STANDALONE_MARKERS


def overwrite_metadata(
Expand Down Expand Up @@ -270,6 +272,68 @@ def clear_png_metadata(png_file_name, in_place=False, verbose=False):
return output_path


def clear_jpeg_metadata(jpeg_file_name, in_place=False, verbose=False):
"""
Remove all metadata from a JPEG file without re-encoding pixel data.

:param jpeg_file_name: path to original JPEG file
:type jpeg_file_name: str
:param in_place: if True, overwrite the original file with cleaned version
:type in_place: bool
:param verbose: if True, print detailed output
:type verbose: bool
:return: path to cleaned JPEG file
"""
if not os.path.exists(jpeg_file_name) or not jpeg_file_name.lower().endswith((".jpg", ".jpeg")):
return

with open(jpeg_file_name, "rb") as f:
data = f.read()
soi = bytes([JPEG_MARKER_PREFIX, JPEG_SOI])
if not data.startswith(soi):
return

# Walk JPEG segments per ITU-T T.81 and drop APPn + COM (metadata holders).
out = bytearray(soi)
i, n = 2, len(data)
while i < n:
while i < n and data[i] == JPEG_MARKER_PREFIX:
i += 1
if i >= n:
break
marker = data[i]
i += 1
if marker in JPEG_STANDALONE_MARKERS:
out += bytes([JPEG_MARKER_PREFIX, marker])
if marker == JPEG_EOI:
break
continue
length = (data[i] << 8) | data[i + 1]
payload = data[i:i + length]
i += length
if JPEG_APP_FIRST <= marker <= JPEG_APP_LAST or marker == JPEG_COM:
continue
out += bytes([JPEG_MARKER_PREFIX, marker]) + payload
if marker == JPEG_SOS:
out += data[i:]
break

if in_place:
output_path = jpeg_file_name
else:
base, ext = os.path.splitext(jpeg_file_name)
output_path = base + "_cleaned" + ext

with open(output_path, "wb") as f:
f.write(bytes(out))

if verbose:
action = "overwritten" if in_place else f"saved to {output_path}"
print(f"Metadata cleared for: {jpeg_file_name} ({action})")

return output_path


def extract_metadata(microsoft_file_name):
"""
Extract all the editable metadata from the given Microsoft file.
Expand Down
8 changes: 8 additions & 0 deletions dmeta/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@
"pptx",
"xlsx"
]
# JPEG marker codes per ITU-T T.81.
JPEG_MARKER_PREFIX = 0xFF
JPEG_SOI = 0xD8 # Start Of Image
JPEG_EOI = 0xD9 # End Of Image
JPEG_SOS = 0xDA # Start Of Scan
JPEG_COM = 0xFE # Comment
JPEG_APP_FIRST, JPEG_APP_LAST = 0xE0, 0xEF # APP0..APP15
JPEG_STANDALONE_MARKERS = frozenset({0x00, 0x01, JPEG_SOI, JPEG_EOI} | set(range(0xD0, 0xD8)))
INVALID_CONFIG_FILE_NAME_ERROR = "Config file name is not a string."
CONFIG_FILE_DOES_NOT_EXIST_ERROR = "Given config file doesn't exist."
UPDATE_COMMAND_WITH_NO_CONFIG_FILE_ERROR = "No config file provided. Set the .json config file with --config command."
Expand Down
Binary file added tests/test.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 17 additions & 0 deletions tests/test_dmeta.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from PIL import Image
from dmeta.functions import update, update_all, clear, clear_all
from dmeta.functions import clear_jpeg_metadata
from dmeta.functions import clear_png_metadata
from dmeta.functions import extract_metadata

Expand Down Expand Up @@ -76,3 +77,19 @@ def test10():
output_path = clear_png_metadata(png_file, in_place=False, verbose=False)
with Image.open(output_path) as img:
assert img.info == {}


def test11():
# clear the metadata of the .jpg file [not inplace]
jpeg_file = os.path.join(TESTS_DIR_PATH, "test.jpg")
output_path = clear_jpeg_metadata(jpeg_file, in_place=False, verbose=False)
with Image.open(output_path) as img:
assert img.info == {}


def test12():
# clear the metadata of the .jpg file [inplace]
jpeg_file = os.path.join(TESTS_DIR_PATH, "test.jpg")
clear_jpeg_metadata(jpeg_file, in_place=True, verbose=False)
with Image.open(jpeg_file) as img:
assert img.info == {}
Loading