Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@ Release History

Unreleased Changes
------------------
* Fixed a bug where extra attributes returned from frictionless
would cause a ValidationError when instantiating a ``Resource``.
https://github.com/natcap/geometamaker/issues/128
* Removed the ``encoding`` value when describing raster or vector
datasets as they are generally binary files and the value that was
given did not reflect a true encoding even for the attribute table
of a GDAL vector. https://github.com/natcap/geometamaker/issues/121
* Added an optional ``spatial`` attribute for tables, archives, and
collections. The ``spatial`` attribute for rasters and vectors remains
required. Spatial information for Collections represents the union of the
Expand Down
11 changes: 10 additions & 1 deletion src/geometamaker/geometamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,16 @@ def describe_file(source_dataset_path, scheme):
{description["path"]}'.encode('utf-8'))
description['uid'] = f'sizetimestamp:{hash_func.hexdigest()}'

# We don't have a use for including these attributes in our metadata:
# These are other attributes sometimes returned by frictionless.
# We don't have a use for them in our metadata and we do not permit
# arbitrary extra attributes in our models.
description.pop('mediatype', None)
description.pop('name', None)
description.pop('profile', None)
description.pop('dialect', None)
description.pop('hash', None)
description.pop('sources', None)
description.pop('licenses', None)
Comment on lines +290 to +299
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case frictionless adds some additional attributes in future releases, maybe it'd be more practical to have a list of attributes we want to keep and then delete the rest?

return description


Expand Down Expand Up @@ -357,6 +364,7 @@ def describe_vector(source_dataset_path, scheme, **kwargs):

"""
description = describe_file(source_dataset_path, scheme)
description.pop('encoding', None) # does not make sense for binary data
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh maybe my above comment doesn't make sense if the attributes to keep depends on the datatype.


if 'http' in scheme:
source_dataset_path = f'/vsicurl/{source_dataset_path}'
Expand Down Expand Up @@ -404,6 +412,7 @@ def describe_raster(source_dataset_path, scheme, **kwargs):
"""
compute_stats = kwargs.get('compute_stats', False)
description = describe_file(source_dataset_path, scheme)
description.pop('encoding', None) # does not make sense for binary data
if 'http' in scheme:
source_dataset_path = f'/vsicurl/{source_dataset_path}'
info = pygeoprocessing.get_raster_info(source_dataset_path)
Expand Down
7 changes: 6 additions & 1 deletion src/geometamaker/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,12 @@ def _deep_update_dict(self_dict, other_dict):


class Parent(BaseModel):
"""Parent class on which to configure validation."""
"""Parent class on which to configure validation.

Extra attributes are forbidden because we anticipate
users editing YML docs manually and this can help catch
accidental edits like a typo in an attribute name.
"""

model_config = ConfigDict(validate_assignment=True,
extra='forbid',
Expand Down
13 changes: 13 additions & 0 deletions tests/test_geometamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,19 @@ def test_describe_csv(self):
self.assertEqual(field.units, units)
self.assertEqual(resource.spatial, spatial)

def test_describe_csv_semicolon_dialect(self):
"""Test a CSV that uses semicolon delimiter."""
import geometamaker

datasource_path = os.path.join(self.workspace_dir, 'data.csv')
with open(datasource_path, 'w') as file:
file.write('a;b;c\n')
file.write('1;2;3\n')

resource = geometamaker.describe(datasource_path)
field = resource.get_field_description('a')
self.assertEqual(field.type, 'integer')

def test_describe_bad_csv(self):
"""MetadataControl: CSV with extra item in row does not fail."""
import geometamaker
Expand Down
Loading