Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ Unreleased Changes
indicating that the file passed validation. Previously, no output was emitted
unless there were validation errors.
https://github.com/natcap/geometamaker/issues/105
* Added read support for a GDAL Raster Attribute Table. If it exists, the
table will be included as band metadata under the 'raster_attribute_table'
key. It can be retrieved by the ``get_rat`` method of a ``RasterResource``
instance. https://github.com/natcap/geometamaker/issues/25
* The Natural Capital Project changed its name to the Natural Capital Alliance.
References to the old name and website URL have been updated to reflect
this change. https://github.com/natcap/geometamaker/issues/115
Expand Down
17 changes: 15 additions & 2 deletions src/geometamaker/geometamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

logging.getLogger('chardet').setLevel(logging.INFO) # DEBUG is just too noisy

GDAL_VERSION = tuple(int(_) for _ in gdal.__version__.split('.'))

LOGGER = logging.getLogger('geometamaker')
_NOT_FOR_CLI = 'not_for_cli'
_LOG_EXTRA_NOT_FOR_CLI = {
Expand Down Expand Up @@ -411,10 +413,20 @@ def describe_raster(source_dataset_path, scheme, **kwargs):
for i in range(info['n_bands']):
b = i + 1
band = raster.GetRasterBand(b)
rat = None
gdal_rat = band.GetDefaultRAT()
if gdal_rat:
rat = models.RasterAttributeTable.from_gdal(gdal_rat)
elif GDAL_VERSION < (3, 11, 0):
# GetDefaultRAT did not support DBF prior to 3.11.0
dbf_rat = f'{source_dataset_path}.vat.dbf'
if dbf_rat in info['file_list']:
rat = models.RasterAttributeTable.from_gdal_dbf(dbf_rat)

band_gdal_metadata = band.GetMetadata()
if compute_stats:
try:
if not 'STATISTICS_VALID_PERCENT' in band_gdal_metadata:
if 'STATISTICS_VALID_PERCENT' not in band_gdal_metadata:
# Sometimes some stats exist, but not all. If this one doesn't,
# it's important enough that we want to force computation.
_ = band.ComputeStatistics(0, callback=_gdal_progress_callback)
Expand All @@ -435,7 +447,8 @@ def describe_raster(source_dataset_path, scheme, **kwargs):
gdal_type=gdal.GetDataTypeName(info['datatype']),
numpy_type=numpy.dtype(info['numpy_type']).name,
nodata=info['nodata'][i],
gdal_metadata=band_gdal_metadata))
gdal_metadata=band_gdal_metadata,
raster_attribute_table=rat))
band = None
raster = None

Expand Down
113 changes: 111 additions & 2 deletions src/geometamaker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
import numbers
import os
import warnings
from typing import Union
from typing import Literal, Union

import fsspec
import yaml
from osgeo import gdal
from pydantic import BaseModel, ConfigDict, Field, ValidationError
from pydantic.dataclasses import dataclass

Expand Down Expand Up @@ -191,6 +192,100 @@ def get_field_description(self, name):
return field


class RATColumnDefn(Parent):
"""Class for metadata for a column in a raster attribute table."""
model_config = ConfigDict(frozen=True)

name: str
"""The name used to uniquely identify the column."""
type: str
"""Datatype of the content of the column.

String representation of one of ``GDALRATFieldType`` or ``OGRFieldType``.
"""
usage: str
"""The intended use of the column.

String representation of one of ``GDALRATFieldUsage``.
"""


class RasterAttributeTable(Parent):
"""Class for metadata for a raster attribute table."""
model_config = ConfigDict(frozen=True)

table_type: Literal['Thematic', 'Athematic', 'Unknown']
"""One of ``GDALRATTableType`` or Unknown."""
columns: list[RATColumnDefn]
"""A list of column definitions in the raster attribute table."""
rows: list[dict]
"""A list of dicts representing rows in the raster attribute table."""

@classmethod
def from_gdal(cls, rat):
"""Create a RasterAttributeTable from a GDAL RAT object."""
table_type = utils._GRTT_INT_TO_STR[rat.GetTableType()]
columns = []
n_cols = rat.GetColumnCount()
for i in range(n_cols):
columns.append(RATColumnDefn(
name=rat.GetNameOfCol(i),
type=utils._GFT_INT_TO_STR[rat.GetTypeOfCol(i)],
usage=utils._GFU_INT_TO_STR[rat.GetUsageOfCol(i)]))
rows = []
for i in range(rat.GetRowCount()):
row = {}
for j in range(n_cols):
col = columns[j]
match col.type:
case 'Integer':
row[col.name] = rat.GetValueAsInt(i, j)
case 'String':
row[col.name] = rat.GetValueAsString(i, j)
case 'Real':
row[col.name] = rat.GetValueAsDouble(i, j)
case 'Boolean':
row[col.name] = rat.GetValueAsBoolean(i, j)
case 'DateTime':
row[col.name] = rat.GetValueAsDateTime(i, j)
case 'WKBGeometry':
row[col.name] = rat.GetValueAsWKBGeometry(i, j)
case _:
row[col.name] = rat.GetValueAsString(i, j)
rows.append(row)
return cls(table_type=table_type, columns=columns, rows=rows)

@classmethod
def from_gdal_dbf(cls, dbf_path):
"""Create a RasterAttributeTable from a DBF file."""
vector = gdal.OpenEx(dbf_path)
layer = vector.GetLayer()
columns = []
for field in layer.schema:
name = field.GetName()
if name == 'VALUE':
usage = utils._GFU_INT_TO_STR[gdal.GFU_MinMax]
elif name == 'COUNT':
usage = utils._GFU_INT_TO_STR[gdal.GFU_PixelCount]
# I'm not sure how standard any other fields are, so just calling
# them all 'Generic' may be good enough.
else:
usage = utils._GFU_INT_TO_STR[gdal.GFU_Generic]
columns.append(
RATColumnDefn(
name=name,
type=field.GetTypeName(),
usage=usage))
rows = []
for feature in layer:
row = {}
for col in columns:
row[col.name] = feature.GetField(col.name)
rows.append(row)

return cls(table_type='Unknown', columns=columns, rows=rows)


class BandSchema(Parent):
"""Class for metadata for a raster band."""

Expand All @@ -210,6 +305,8 @@ class BandSchema(Parent):
"""Unit of measurement for the pixel values."""
gdal_metadata: dict = {}
"""Metadata key:value pairs stored in the GDAL band object."""
raster_attribute_table: RasterAttributeTable | None = None
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably worth adding a short docstring here, for consistency?

"""A representation of a GDAL-readable Raster Attribute Table."""


class RasterSchema(Parent):
Expand Down Expand Up @@ -875,7 +972,7 @@ def set_band_description(self, band_number, title=None,
self.data_model.bands[idx] = band

def get_band_description(self, band_number):
"""Get the attribute metadata for a band.
"""Get the metadata for a band.

Args:
band_number (int): a raster band index, starting at 1
Expand All @@ -885,3 +982,15 @@ def get_band_description(self, band_number):

"""
return self.data_model.bands[band_number - 1]

def get_rat(self, band_number):
"""Get the raster attribute table for a band, if it exists.

Args:
band_number (int): a raster band index, starting at 1

Returns:
RasterAttributeTable or None if no RAT exists for the band

"""
return self.data_model.bands[band_number - 1].raster_attribute_table
39 changes: 39 additions & 0 deletions src/geometamaker/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from osgeo import gdal
import yaml


Expand Down Expand Up @@ -28,3 +29,41 @@ def yaml_dump(data):
allow_unicode=True,
sort_keys=False,
Dumper=_SafeDumper)


# GDALGetRATFieldUsageName() and GDALGetRATFieldTypeName() were only added to
# GDAL in 3.12, so we can maintain our own lookups.
_GFU_INT_TO_STR = {
gdal.GFU_Generic: 'Generic',
gdal.GFU_PixelCount: 'PixelCount',
gdal.GFU_Name: 'Name',
gdal.GFU_Min: 'Min',
gdal.GFU_Max: 'Max',
gdal.GFU_MinMax: 'MinMax',
gdal.GFU_Red: 'Red',
gdal.GFU_Green: 'Green',
gdal.GFU_Blue: 'Blue',
gdal.GFU_Alpha: 'Alpha',
gdal.GFU_RedMin: 'RedMin',
gdal.GFU_GreenMin: 'GreenMin',
gdal.GFU_BlueMin: 'BlueMin',
gdal.GFU_AlphaMin: 'AlphaMin',
gdal.GFU_RedMax: 'RedMax',
gdal.GFU_GreenMax: 'GreenMax',
gdal.GFU_BlueMax: 'BlueMax',
gdal.GFU_AlphaMax: 'AlphaMax',
}

_GFT_INT_TO_STR = {
gdal.GFT_Integer: 'Integer',
gdal.GFT_Real: 'Real',
gdal.GFT_String: 'String',
gdal.GFT_Boolean: 'Boolean',
gdal.GFT_DateTime: 'DateTime',
gdal.GFT_WKBGeometry: 'WKBGeometry',
}

_GRTT_INT_TO_STR = {
gdal.GRTT_THEMATIC: 'Thematic',
gdal.GRTT_ATHEMATIC: 'Athematic'
}
Binary file added tests/data/testrat.tif
Binary file not shown.
Binary file added tests/data/testrat.tif.vat.dbf
Binary file not shown.
61 changes: 59 additions & 2 deletions tests/test_geometamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
from pygeoprocessing.geoprocessing_core import DEFAULT_GTIFF_CREATION_TUPLE_OPTIONS
from pydantic import ValidationError

REGRESSION_DATA = os.path.join(
os.path.dirname(__file__), 'data')

# A remote file we can use for testing
REMOTE_FILEPATH = 'https://storage.googleapis.com/releases.naturalcapitalproject.org/invest/3.14.2/data/CoastalBlueCarbon.zip'
Expand Down Expand Up @@ -378,6 +376,65 @@ def test_describe_raster_with_gdal_metadata(self):
{'AREA_OR_POINT': 'Area', # This exists by default
'FOO': 'BAR'})

def test_describe_raster_with_raster_attribute_table(self):
"""Test raster attribute table will be included if it exists."""
import geometamaker

datasource_path = os.path.join(self.workspace_dir, 'raster.tif')
create_raster(numpy.int16, datasource_path, n_bands=1)
raster = gdal.OpenEx(datasource_path, gdal.OF_UPDATE)
band = raster.GetRasterBand(1)
rat = gdal.RasterAttributeTable()
value_name = 'Value'
count_name = 'Count'
rat.CreateColumn(value_name, gdal.GFT_Integer, gdal.GFU_MinMax)
rat.CreateColumn(count_name, gdal.GFT_Integer, gdal.GFU_PixelCount)
array = band.ReadAsArray()
values, counts = numpy.unique(array, return_counts=True)
for i in range(len(values)):
rat.SetValueAsInt(i, 0, int(values[i]))
rat.SetValueAsInt(i, 1, int(counts[i]))
band.SetDefaultRAT(rat)
band = raster = None

resource = geometamaker.describe(datasource_path)

table = resource.get_rat(1)
self.assertEqual(len(table.rows), len(values))
self.assertEqual(table.columns[0].name, value_name)
self.assertEqual(table.columns[0].type, 'Integer')
self.assertEqual(table.columns[0].usage, 'MinMax')
self.assertEqual(table.columns[1].name, count_name)
self.assertEqual(table.columns[1].type, 'Integer')
self.assertEqual(table.columns[1].usage, 'PixelCount')

def test_describe_raster_with_dbf_rat(self):
"""Test raster attribute table can be constructed from vat.dbf."""
from geometamaker import models

datasource_path = os.path.join(
os.path.dirname(__file__), 'data/testrat.tif.vat.dbf')

# Calling this method directly instead of `describe`
# because depending on GDAL version, this method may not be used
# and the resulting table would have minor differences.
table = models.RasterAttributeTable.from_gdal_dbf(datasource_path)

self.assertEqual(len(table.rows), 2)
self.assertEqual(len(table.columns), 9)
self.assertEqual(
[col.name for col in table.columns],
["VALUE", "COUNT", "CLASS", "Red", "Green", "Blue", "OtherInt",
"OtherReal", "OtherStr"])
self.assertEqual(
[col.type for col in table.columns],
["Integer", "Integer", "String", "Real", "Real", "Real", "Integer",
"Real", "String"])
self.assertEqual(
[col.usage for col in table.columns],
["MinMax", "PixelCount", "Generic", "Generic", "Generic", "Generic", "Generic",
"Generic", "Generic"])

def test_describe_vector_with_gdal_metadata(self):
"""Test vector metadata will be included if they already exist."""
import geometamaker
Expand Down
Loading