diff --git a/HISTORY.rst b/HISTORY.rst index 5cea81d..7512e78 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -13,6 +13,10 @@ Unreleased Changes indicating that the file passed validation. Previously, no output was emitted unless there were validation errors. https://github.com/natcap/geometamaker/issues/105 +* Added read support for a GDAL Raster Attribute Table. If it exists, the + table will be included as band metadata under the 'raster_attribute_table' + key. It can be retrieved by the ``get_rat`` method of a ``RasterResource`` + instance. https://github.com/natcap/geometamaker/issues/25 * The Natural Capital Project changed its name to the Natural Capital Alliance. References to the old name and website URL have been updated to reflect this change. https://github.com/natcap/geometamaker/issues/115 diff --git a/src/geometamaker/geometamaker.py b/src/geometamaker/geometamaker.py index 10f2ffd..1edd67d 100644 --- a/src/geometamaker/geometamaker.py +++ b/src/geometamaker/geometamaker.py @@ -23,6 +23,8 @@ logging.getLogger('chardet').setLevel(logging.INFO) # DEBUG is just too noisy +GDAL_VERSION = tuple(int(_) for _ in gdal.__version__.split('.')) + LOGGER = logging.getLogger('geometamaker') _NOT_FOR_CLI = 'not_for_cli' _LOG_EXTRA_NOT_FOR_CLI = { @@ -411,10 +413,20 @@ def describe_raster(source_dataset_path, scheme, **kwargs): for i in range(info['n_bands']): b = i + 1 band = raster.GetRasterBand(b) + rat = None + gdal_rat = band.GetDefaultRAT() + if gdal_rat: + rat = models.RasterAttributeTable.from_gdal(gdal_rat) + elif GDAL_VERSION < (3, 11, 0): + # GetDefaultRAT did not support DBF prior to 3.11.0 + dbf_rat = f'{source_dataset_path}.vat.dbf' + if dbf_rat in info['file_list']: + rat = models.RasterAttributeTable.from_gdal_dbf(dbf_rat) + band_gdal_metadata = band.GetMetadata() if compute_stats: try: - if not 'STATISTICS_VALID_PERCENT' in band_gdal_metadata: + if 'STATISTICS_VALID_PERCENT' not in band_gdal_metadata: # Sometimes some stats exist, but not all. If this one doesn't, # it's important enough that we want to force computation. _ = band.ComputeStatistics(0, callback=_gdal_progress_callback) @@ -435,7 +447,8 @@ def describe_raster(source_dataset_path, scheme, **kwargs): gdal_type=gdal.GetDataTypeName(info['datatype']), numpy_type=numpy.dtype(info['numpy_type']).name, nodata=info['nodata'][i], - gdal_metadata=band_gdal_metadata)) + gdal_metadata=band_gdal_metadata, + raster_attribute_table=rat)) band = None raster = None diff --git a/src/geometamaker/models.py b/src/geometamaker/models.py index 97aa598..95deb5b 100644 --- a/src/geometamaker/models.py +++ b/src/geometamaker/models.py @@ -4,10 +4,11 @@ import numbers import os import warnings -from typing import Union +from typing import Literal, Union import fsspec import yaml +from osgeo import gdal from pydantic import BaseModel, ConfigDict, Field, ValidationError from pydantic.dataclasses import dataclass @@ -191,6 +192,100 @@ def get_field_description(self, name): return field +class RATColumnDefn(Parent): + """Class for metadata for a column in a raster attribute table.""" + model_config = ConfigDict(frozen=True) + + name: str + """The name used to uniquely identify the column.""" + type: str + """Datatype of the content of the column. + + String representation of one of ``GDALRATFieldType`` or ``OGRFieldType``. + """ + usage: str + """The intended use of the column. + + String representation of one of ``GDALRATFieldUsage``. + """ + + +class RasterAttributeTable(Parent): + """Class for metadata for a raster attribute table.""" + model_config = ConfigDict(frozen=True) + + table_type: Literal['Thematic', 'Athematic', 'Unknown'] + """One of ``GDALRATTableType`` or Unknown.""" + columns: list[RATColumnDefn] + """A list of column definitions in the raster attribute table.""" + rows: list[dict] + """A list of dicts representing rows in the raster attribute table.""" + + @classmethod + def from_gdal(cls, rat): + """Create a RasterAttributeTable from a GDAL RAT object.""" + table_type = utils._GRTT_INT_TO_STR[rat.GetTableType()] + columns = [] + n_cols = rat.GetColumnCount() + for i in range(n_cols): + columns.append(RATColumnDefn( + name=rat.GetNameOfCol(i), + type=utils._GFT_INT_TO_STR[rat.GetTypeOfCol(i)], + usage=utils._GFU_INT_TO_STR[rat.GetUsageOfCol(i)])) + rows = [] + for i in range(rat.GetRowCount()): + row = {} + for j in range(n_cols): + col = columns[j] + match col.type: + case 'Integer': + row[col.name] = rat.GetValueAsInt(i, j) + case 'String': + row[col.name] = rat.GetValueAsString(i, j) + case 'Real': + row[col.name] = rat.GetValueAsDouble(i, j) + case 'Boolean': + row[col.name] = rat.GetValueAsBoolean(i, j) + case 'DateTime': + row[col.name] = rat.GetValueAsDateTime(i, j) + case 'WKBGeometry': + row[col.name] = rat.GetValueAsWKBGeometry(i, j) + case _: + row[col.name] = rat.GetValueAsString(i, j) + rows.append(row) + return cls(table_type=table_type, columns=columns, rows=rows) + + @classmethod + def from_gdal_dbf(cls, dbf_path): + """Create a RasterAttributeTable from a DBF file.""" + vector = gdal.OpenEx(dbf_path) + layer = vector.GetLayer() + columns = [] + for field in layer.schema: + name = field.GetName() + if name == 'VALUE': + usage = utils._GFU_INT_TO_STR[gdal.GFU_MinMax] + elif name == 'COUNT': + usage = utils._GFU_INT_TO_STR[gdal.GFU_PixelCount] + # I'm not sure how standard any other fields are, so just calling + # them all 'Generic' may be good enough. + else: + usage = utils._GFU_INT_TO_STR[gdal.GFU_Generic] + columns.append( + RATColumnDefn( + name=name, + type=field.GetTypeName(), + usage=usage)) + rows = [] + for feature in layer: + row = {} + for col in columns: + row[col.name] = feature.GetField(col.name) + rows.append(row) + + return cls(table_type='Unknown', columns=columns, rows=rows) + + class BandSchema(Parent): """Class for metadata for a raster band.""" @@ -210,6 +305,8 @@ class BandSchema(Parent): """Unit of measurement for the pixel values.""" gdal_metadata: dict = {} """Metadata key:value pairs stored in the GDAL band object.""" + raster_attribute_table: RasterAttributeTable | None = None + """A representation of a GDAL-readable Raster Attribute Table.""" class RasterSchema(Parent): @@ -875,7 +972,7 @@ def set_band_description(self, band_number, title=None, self.data_model.bands[idx] = band def get_band_description(self, band_number): - """Get the attribute metadata for a band. + """Get the metadata for a band. Args: band_number (int): a raster band index, starting at 1 @@ -885,3 +982,15 @@ def get_band_description(self, band_number): """ return self.data_model.bands[band_number - 1] + + def get_rat(self, band_number): + """Get the raster attribute table for a band, if it exists. + + Args: + band_number (int): a raster band index, starting at 1 + + Returns: + RasterAttributeTable or None if no RAT exists for the band + + """ + return self.data_model.bands[band_number - 1].raster_attribute_table diff --git a/src/geometamaker/utils.py b/src/geometamaker/utils.py index 8cab2a5..5c9ef0b 100644 --- a/src/geometamaker/utils.py +++ b/src/geometamaker/utils.py @@ -1,3 +1,4 @@ +from osgeo import gdal import yaml @@ -28,3 +29,41 @@ def yaml_dump(data): allow_unicode=True, sort_keys=False, Dumper=_SafeDumper) + + +# GDALGetRATFieldUsageName() and GDALGetRATFieldTypeName() were only added to +# GDAL in 3.12, so we can maintain our own lookups. +_GFU_INT_TO_STR = { + gdal.GFU_Generic: 'Generic', + gdal.GFU_PixelCount: 'PixelCount', + gdal.GFU_Name: 'Name', + gdal.GFU_Min: 'Min', + gdal.GFU_Max: 'Max', + gdal.GFU_MinMax: 'MinMax', + gdal.GFU_Red: 'Red', + gdal.GFU_Green: 'Green', + gdal.GFU_Blue: 'Blue', + gdal.GFU_Alpha: 'Alpha', + gdal.GFU_RedMin: 'RedMin', + gdal.GFU_GreenMin: 'GreenMin', + gdal.GFU_BlueMin: 'BlueMin', + gdal.GFU_AlphaMin: 'AlphaMin', + gdal.GFU_RedMax: 'RedMax', + gdal.GFU_GreenMax: 'GreenMax', + gdal.GFU_BlueMax: 'BlueMax', + gdal.GFU_AlphaMax: 'AlphaMax', +} + +_GFT_INT_TO_STR = { + gdal.GFT_Integer: 'Integer', + gdal.GFT_Real: 'Real', + gdal.GFT_String: 'String', + gdal.GFT_Boolean: 'Boolean', + gdal.GFT_DateTime: 'DateTime', + gdal.GFT_WKBGeometry: 'WKBGeometry', +} + +_GRTT_INT_TO_STR = { + gdal.GRTT_THEMATIC: 'Thematic', + gdal.GRTT_ATHEMATIC: 'Athematic' +} diff --git a/tests/data/testrat.tif b/tests/data/testrat.tif new file mode 100644 index 0000000..6fddac1 Binary files /dev/null and b/tests/data/testrat.tif differ diff --git a/tests/data/testrat.tif.vat.dbf b/tests/data/testrat.tif.vat.dbf new file mode 100644 index 0000000..3447237 Binary files /dev/null and b/tests/data/testrat.tif.vat.dbf differ diff --git a/tests/test_geometamaker.py b/tests/test_geometamaker.py index 0cbeb45..12e436e 100644 --- a/tests/test_geometamaker.py +++ b/tests/test_geometamaker.py @@ -18,8 +18,6 @@ from pygeoprocessing.geoprocessing_core import DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS from pydantic import ValidationError -REGRESSION_DATA = os.path.join( - os.path.dirname(__file__), 'data') # A remote file we can use for testing REMOTE_FILEPATH = 'https://storage.googleapis.com/releases.naturalcapitalproject.org/invest/3.14.2/data/CoastalBlueCarbon.zip' @@ -378,6 +376,65 @@ def test_describe_raster_with_gdal_metadata(self): {'AREA_OR_POINT': 'Area', # This exists by default 'FOO': 'BAR'}) + def test_describe_raster_with_raster_attribute_table(self): + """Test raster attribute table will be included if it exists.""" + import geometamaker + + datasource_path = os.path.join(self.workspace_dir, 'raster.tif') + create_raster(numpy.int16, datasource_path, n_bands=1) + raster = gdal.OpenEx(datasource_path, gdal.OF_UPDATE) + band = raster.GetRasterBand(1) + rat = gdal.RasterAttributeTable() + value_name = 'Value' + count_name = 'Count' + rat.CreateColumn(value_name, gdal.GFT_Integer, gdal.GFU_MinMax) + rat.CreateColumn(count_name, gdal.GFT_Integer, gdal.GFU_PixelCount) + array = band.ReadAsArray() + values, counts = numpy.unique(array, return_counts=True) + for i in range(len(values)): + rat.SetValueAsInt(i, 0, int(values[i])) + rat.SetValueAsInt(i, 1, int(counts[i])) + band.SetDefaultRAT(rat) + band = raster = None + + resource = geometamaker.describe(datasource_path) + + table = resource.get_rat(1) + self.assertEqual(len(table.rows), len(values)) + self.assertEqual(table.columns[0].name, value_name) + self.assertEqual(table.columns[0].type, 'Integer') + self.assertEqual(table.columns[0].usage, 'MinMax') + self.assertEqual(table.columns[1].name, count_name) + self.assertEqual(table.columns[1].type, 'Integer') + self.assertEqual(table.columns[1].usage, 'PixelCount') + + def test_describe_raster_with_dbf_rat(self): + """Test raster attribute table can be constructed from vat.dbf.""" + from geometamaker import models + + datasource_path = os.path.join( + os.path.dirname(__file__), 'data/testrat.tif.vat.dbf') + + # Calling this method directly instead of `describe` + # because depending on GDAL version, this method may not be used + # and the resulting table would have minor differences. + table = models.RasterAttributeTable.from_gdal_dbf(datasource_path) + + self.assertEqual(len(table.rows), 2) + self.assertEqual(len(table.columns), 9) + self.assertEqual( + [col.name for col in table.columns], + ["VALUE", "COUNT", "CLASS", "Red", "Green", "Blue", "OtherInt", + "OtherReal", "OtherStr"]) + self.assertEqual( + [col.type for col in table.columns], + ["Integer", "Integer", "String", "Real", "Real", "Real", "Integer", + "Real", "String"]) + self.assertEqual( + [col.usage for col in table.columns], + ["MinMax", "PixelCount", "Generic", "Generic", "Generic", "Generic", "Generic", + "Generic", "Generic"]) + def test_describe_vector_with_gdal_metadata(self): """Test vector metadata will be included if they already exist.""" import geometamaker