diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 0ae707a0..95b1ce15 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -52,7 +52,16 @@ def _geo_to_coords(geo_info, height: int, width: int) -> dict: origin_y)``. ``to_geotiff`` prefers that attr over recomputing the transform from the coord arrays, which avoids float drift on fractional-precision rasters. + + When the file carries no GeoTIFF tags (``has_georef=False``), fall back + to integer pixel coordinates 0..N-1 instead of inventing fractional + values from the default unit transform. """ + if not getattr(geo_info, 'has_georef', True): + return { + 'y': np.arange(height, dtype=np.int64), + 'x': np.arange(width, dtype=np.int64), + } t = geo_info.transform if geo_info.raster_type == RASTER_PIXEL_IS_POINT: # Tiepoint is pixel center -- no offset needed diff --git a/xrspatial/geotiff/_geotags.py b/xrspatial/geotiff/_geotags.py index 674e3354..fe662403 100644 --- a/xrspatial/geotiff/_geotags.py +++ b/xrspatial/geotiff/_geotags.py @@ -114,6 +114,11 @@ class GeoTransform: class GeoInfo: """Geographic metadata extracted from GeoTIFF tags.""" transform: GeoTransform = field(default_factory=GeoTransform) + # True when ModelTransformation, ModelPixelScale, or ModelTiepoint tags + # were present in the IFD. False for plain TIFFs with no GeoTIFF tags -- + # callers should fall back to integer pixel coordinates rather than + # using the default transform (which would produce negative y values). + has_georef: bool = False crs_epsg: int | None = None model_type: int = 0 raster_type: int = RASTER_PIXEL_IS_AREA @@ -288,9 +293,17 @@ def _parse_geokeys(ifd: IFD, data: bytes | memoryview, return geokeys -def _extract_transform(ifd: IFD) -> GeoTransform: +def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]: """Extract affine transform from ModelTransformation, or - ModelTiepoint + ModelPixelScale tags.""" + ModelTiepoint + ModelPixelScale tags. + + Returns + ------- + (transform, has_georef) + ``has_georef`` is True when at least one of the geo-transform tags + was present. When False, ``transform`` is the default identity + and callers should fall back to pixel coordinates. + """ # Try ModelTransformationTag (4x4 row-major matrix, 16 doubles). # Per the GeoTIFF spec this tag wins over ModelPixelScale + ModelTiepoint @@ -326,7 +339,7 @@ def _extract_transform(ifd: IFD) -> GeoTransform: origin_y=m[7], pixel_width=m[0], pixel_height=m[5], - ) + ), True # Try ModelTiepoint + ModelPixelScale tiepoint = ifd.get_value(TAG_MODEL_TIEPOINT) @@ -356,11 +369,15 @@ def _extract_transform(ifd: IFD) -> GeoTransform: origin_y=origin_y, pixel_width=sx, pixel_height=-sy, # negative because y decreases - ) + ), True + + return GeoTransform(pixel_width=sx, pixel_height=-sy), True - return GeoTransform(pixel_width=sx, pixel_height=-sy) + # Tiepoint without scale: still flag as georeferenced (origin known) + if tiepoint is not None: + return GeoTransform(), True - return GeoTransform() + return GeoTransform(), False def extract_geo_info(ifd: IFD, data: bytes | memoryview, @@ -380,7 +397,7 @@ def extract_geo_info(ifd: IFD, data: bytes | memoryview, ------- GeoInfo """ - transform = _extract_transform(ifd) + transform, has_georef = _extract_transform(ifd) geokeys = _parse_geokeys(ifd, data, byte_order) # Extract EPSG @@ -546,6 +563,7 @@ def extract_geo_info(ifd: IFD, data: bytes | memoryview, return GeoInfo( transform=transform, + has_georef=has_georef, crs_epsg=epsg, model_type=int(model_type) if isinstance(model_type, (int, float)) else 0, raster_type=int(raster_type) if isinstance(raster_type, (int, float)) else RASTER_PIXEL_IS_AREA, diff --git a/xrspatial/geotiff/tests/test_georef_edges.py b/xrspatial/geotiff/tests/test_georef_edges.py new file mode 100644 index 00000000..327d1ffc --- /dev/null +++ b/xrspatial/geotiff/tests/test_georef_edges.py @@ -0,0 +1,157 @@ +"""Edge-case tests for georeferenced and non-georeferenced TIFFs. + +Issue #1482: +- T-3: descending-y (south-up) round-trip preserves data orientation. +- T-4: a TIFF with no GeoTIFF tags reads back with integer pixel coords + and no ``crs`` attr, instead of failing or inventing fractional coords. +""" +from __future__ import annotations + +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff +from xrspatial.geotiff._header import parse_all_ifds, parse_header +from xrspatial.geotiff._geotags import extract_geo_info + +from .conftest import make_minimal_tiff + + +class TestNonGeoreferencedRead: + """T-4: opening a TIFF with no GeoTIFF tags should not crash.""" + + def test_open_plain_tiff(self, tmp_path): + # make_minimal_tiff with no geo_transform / epsg = no GeoTIFF tags. + data = make_minimal_tiff(4, 5, np.dtype('float32')) + path = str(tmp_path / 'plain_1482.tif') + with open(path, 'wb') as f: + f.write(data) + + da = open_geotiff(path) + + assert da.shape == (5, 4) # height=5, width=4 + # No CRS attribute should be set. + assert 'crs' not in da.attrs + assert 'crs_wkt' not in da.attrs + + def test_integer_pixel_coords(self, tmp_path): + """Non-georef fallback: y=0..H-1, x=0..W-1.""" + data = make_minimal_tiff(6, 3, np.dtype('uint16')) + path = str(tmp_path / 'plain_coords_1482.tif') + with open(path, 'wb') as f: + f.write(data) + + da = open_geotiff(path) + np.testing.assert_array_equal(da.coords['y'].values, np.arange(3)) + np.testing.assert_array_equal(da.coords['x'].values, np.arange(6)) + + def test_has_georef_flag_false(self, tmp_path): + """The GeoInfo carries the explicit has_georef=False flag.""" + data = make_minimal_tiff(4, 4, np.dtype('float32')) + header = parse_header(data) + ifd = parse_all_ifds(data, header)[0] + info = extract_geo_info(ifd, data, header.byte_order) + assert info.has_georef is False + + def test_has_georef_flag_true_when_present(self, tmp_path): + data = make_minimal_tiff( + 4, 4, np.dtype('float32'), + geo_transform=(-120.0, 45.0, 0.001, -0.001), + epsg=4326, + ) + header = parse_header(data) + ifd = parse_all_ifds(data, header)[0] + info = extract_geo_info(ifd, data, header.byte_order) + assert info.has_georef is True + + def test_round_trip_preserves_pixels(self, tmp_path): + """Read a plain TIFF, write it back, read again -- pixels match.""" + pixels = np.arange(20, dtype=np.float32).reshape(4, 5) + data = make_minimal_tiff(5, 4, pixel_data=pixels) + in_path = str(tmp_path / 'plain_in_1482.tif') + out_path = str(tmp_path / 'plain_out_1482.tif') + with open(in_path, 'wb') as f: + f.write(data) + + da = open_geotiff(in_path) + np.testing.assert_array_equal(da.values, pixels) + + # Round-trip through to_geotiff -- should not crash even without CRS + to_geotiff(da, out_path) + da2 = open_geotiff(out_path) + np.testing.assert_array_equal(da2.values, pixels) + + +class TestDescendingYWrite: + """T-3: writing a south-up DataArray (positive pixel_height). + + GeoTIFF's ModelPixelScale tag stores absolute scale values; sign + information is lost when round-tripping through Tiepoint+Scale. The + pixel data orientation, however, is preserved exactly. This test + documents that contract. + """ + + def _make_south_up_da(self): + data = np.arange(16, dtype=np.float32).reshape(4, 4) + # Ascending y values: row 0 is the south edge, y increases northward. + y = np.array([0.5, 1.5, 2.5, 3.5]) + x = np.array([0.5, 1.5, 2.5, 3.5]) + return xr.DataArray( + data, dims=('y', 'x'), coords={'y': y, 'x': x}, + attrs={'crs': 4326}, + ) + + def test_data_orientation_preserved(self, tmp_path): + """Pixel ordering is identical on round-trip even for south-up input.""" + da = self._make_south_up_da() + path = str(tmp_path / 'south_up_1482.tif') + to_geotiff(da, path) + da2 = open_geotiff(path) + + # Pixel values land in the same row/col positions. + np.testing.assert_array_equal(da2.values, da.values) + + def test_x_coords_preserved(self, tmp_path): + da = self._make_south_up_da() + path = str(tmp_path / 'south_up_x_1482.tif') + to_geotiff(da, path) + da2 = open_geotiff(path) + np.testing.assert_allclose(da2.coords['x'].values, + da.coords['x'].values) + + def test_y_coords_known_limitation(self, tmp_path): + """Known limitation: y sign is not preserved through Scale+Tiepoint. + + ModelPixelScale stores |pixel_height|; on read we always restore + a negative pixel_height per GeoTIFF convention. Document the + magnitude is right and the sign is the GeoTIFF default. + """ + da = self._make_south_up_da() + path = str(tmp_path / 'south_up_y_1482.tif') + to_geotiff(da, path) + da2 = open_geotiff(path) + # Magnitudes match the original spacing. + spacing = np.abs(np.diff(da2.coords['y'].values)) + assert np.allclose(spacing, 1.0) + + def test_descending_y_round_trip(self, tmp_path): + """The standard north-up case (descending y) round-trips cleanly.""" + data = np.arange(16, dtype=np.float32).reshape(4, 4) + # Descending y: row 0 at the north edge. + y = np.array([3.5, 2.5, 1.5, 0.5]) + x = np.array([0.5, 1.5, 2.5, 3.5]) + da = xr.DataArray( + data, dims=('y', 'x'), coords={'y': y, 'x': x}, + attrs={'crs': 4326}, + ) + path = str(tmp_path / 'north_up_1482.tif') + to_geotiff(da, path) + da2 = open_geotiff(path) + np.testing.assert_array_equal(da2.values, da.values) + np.testing.assert_allclose(da2.coords['y'].values, + da.coords['y'].values) + np.testing.assert_allclose(da2.coords['x'].values, + da.coords['x'].values) diff --git a/xrspatial/geotiff/tests/test_header.py b/xrspatial/geotiff/tests/test_header.py index ff16116b..097eadf2 100644 --- a/xrspatial/geotiff/tests/test_header.py +++ b/xrspatial/geotiff/tests/test_header.py @@ -6,9 +6,11 @@ import numpy as np import pytest +from xrspatial.geotiff._dtypes import RATIONAL, SRATIONAL from xrspatial.geotiff._header import ( IFD, TIFFHeader, + _read_value, parse_all_ifds, parse_header, parse_ifd, @@ -121,3 +123,177 @@ def test_defaults(self): assert ifd.photometric == 1 assert ifd.planar_config == 1 assert not ifd.is_tiled + + +class TestIFDChainLoop: + """Verify parse_all_ifds bails out on a malicious IFD chain cycle. + + Issue #1482 (T-2): the parser keeps a ``seen`` set of offsets and + breaks the loop when an IFD's next-pointer points back to an offset + already visited. Without that guard, a crafted file with IFD2 + pointing back at IFD1 would loop forever. + """ + + def _build_two_ifd_loop(self) -> bytes: + """Build a TIFF where IFD #2's next-pointer points back at IFD #1.""" + bo = '<' + # Header: II, magic 42, first-IFD offset + out = bytearray() + out.extend(b'II') + out.extend(struct.pack(f'{bo}H', 42)) + + ifd1_offset = 8 + # IFD layout: 1 entry + next-IFD pointer = 2 + 12 + 4 = 18 bytes + ifd2_offset = ifd1_offset + 18 + out.extend(struct.pack(f'{bo}I', ifd1_offset)) + + # IFD #1: one entry (ImageWidth=1), next = ifd2_offset + out.extend(struct.pack(f'{bo}H', 1)) # num_entries + out.extend(struct.pack(f'{bo}HHI', TAG_IMAGE_WIDTH, 3, 1)) + out.extend(struct.pack(f'{bo}I', 1)) # value (inline padded) + out.extend(struct.pack(f'{bo}I', ifd2_offset)) # next-IFD + + # IFD #2: one entry (ImageWidth=2), next = ifd1_offset (cycle!) + out.extend(struct.pack(f'{bo}H', 1)) + out.extend(struct.pack(f'{bo}HHI', TAG_IMAGE_WIDTH, 3, 1)) + out.extend(struct.pack(f'{bo}I', 2)) + out.extend(struct.pack(f'{bo}I', ifd1_offset)) # cycle back + + return bytes(out) + + def test_cycle_does_not_infinite_loop(self): + data = self._build_two_ifd_loop() + header = parse_header(data) + ifds = parse_all_ifds(data, header) + + # Each unique offset is parsed exactly once. Cycle detection + # must stop us at IFD #2 even though its next-pointer is non-zero. + assert len(ifds) == 2 + # Values should match what we wrote, in order. + assert ifds[0].width == 1 + assert ifds[1].width == 2 + + def test_cycle_completes_quickly(self): + """Parsing must terminate without exhausting the buffer.""" + data = self._build_two_ifd_loop() + header = parse_header(data) + # If the seen-set guard regresses this would hang; pytest-timeout + # would catch it but the bounded len() check below is enough. + ifds = parse_all_ifds(data, header) + assert len(ifds) <= 4 # generous upper bound -- we expect 2 + + def test_self_loop(self): + """An IFD whose next-pointer is its own offset terminates.""" + bo = '<' + out = bytearray() + out.extend(b'II') + out.extend(struct.pack(f'{bo}H', 42)) + ifd_offset = 8 + out.extend(struct.pack(f'{bo}I', ifd_offset)) + out.extend(struct.pack(f'{bo}H', 1)) + out.extend(struct.pack(f'{bo}HHI', TAG_IMAGE_WIDTH, 3, 1)) + out.extend(struct.pack(f'{bo}I', 7)) + out.extend(struct.pack(f'{bo}I', ifd_offset)) # points to self + ifds = parse_all_ifds(bytes(out), parse_header(bytes(out))) + assert len(ifds) == 1 + assert ifds[0].width == 7 + + +class TestReadValueRationals: + """T-8 coverage for RATIONAL / SRATIONAL edge cases in _read_value.""" + + def test_rational_denominator_zero_returns_zero(self): + # numerator=5, denominator=0 -- by convention return 0.0 + buf = struct.pack(' 4 GB don't fit. + + There's no way to actually express an offset > 4 GB in a classic + TIFF (the field is uint32). What we want to verify is that pointing + a classic-TIFF first-IFD at an offset beyond the buffer terminates + cleanly via ``parse_all_ifds`` rather than reading garbage. + """ + + def test_first_ifd_offset_past_buffer(self): + bo = '<' + # Header points to offset 0xFFFFFF00 -- well past any plausible + # buffer. parse_all_ifds short-circuits when offset >= len(data). + out = bytearray() + out.extend(b'II') + out.extend(struct.pack(f'{bo}H', 42)) + out.extend(struct.pack(f'{bo}I', 0xFFFFFF00)) + header = parse_header(bytes(out)) + ifds = parse_all_ifds(bytes(out), header) + assert ifds == [] # no IFDs, no crash