Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions fairgraph/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,14 @@ def update_instance(self, instance_id: str, data: JsonLdDocument) -> JsonLdDocum
extended_response_configuration=default_response_configuration,
)
error_context = f"update_instance(data={data}, instance_id={instance_id})"
return self._check_response(response, error_context=error_context).data
response_data = self._check_response(response, error_context=error_context).data
# The cached document for this URI is now stale. Drop it so the next
# fetch goes to the KG. We deliberately do not refresh the cache from
# the response, because depending on the server's `returnPayload`
# default the response may be a minimal document (e.g. just `@id`)
# rather than the full updated instance.
self.cache.pop(self.uri_from_uuid(instance_id), None)
return response_data

def replace_instance(self, instance_id: str, data: JsonLdDocument) -> JsonLdDocument:
"""
Expand All @@ -548,7 +555,10 @@ def replace_instance(self, instance_id: str, data: JsonLdDocument) -> JsonLdDocu
extended_response_configuration=default_response_configuration,
)
error_context = f"replace_instance(data={data}, instance_id={instance_id})"
return self._check_response(response, error_context=error_context).data
response_data = self._check_response(response, error_context=error_context).data
# See update_instance for rationale: invalidate, don't refresh.
self.cache.pop(self.uri_from_uuid(instance_id), None)
return response_data

def delete_instance(self, instance_id: str, ignore_not_found: bool = True, ignore_errors: bool = True):
"""
Expand All @@ -563,6 +573,8 @@ def delete_instance(self, instance_id: str, ignore_not_found: bool = True, ignor
if response: # error
if not ignore_errors:
raise Exception(response.message)
else:
self.cache.pop(self.uri_from_uuid(instance_id), None)
return response

def uri_from_uuid(self, uuid: str) -> str:
Expand Down
1 change: 0 additions & 1 deletion fairgraph/kgobject.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,6 @@ def save(
try:
assert self.uuid is not None
client.replace_instance(self.uuid, local_data)
# what does this return? Can we use it to update `remote_data`?
except AuthorizationError as err:
if ignore_auth_errors:
logger.error(str(err))
Expand Down
112 changes: 112 additions & 0 deletions test/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,3 +234,115 @@ def test_delete_instance(kg_client, mocker):
fake_id = "00000000-0000-0000-0000-000000000000"
response = kg_client.delete_instance(fake_id)
kg_client._kg_client.instances.delete.assert_called_once_with(fake_id)


@pytest.fixture
def offline_kg_client(mocker):
"""A KGClient that can be constructed without network access, for testing
behaviour that doesn't require a real KG. The underlying kg-core SDK methods
must be patched per-test."""
from fairgraph.client import KGClient

client = KGClient(token="fake-token", allow_interactive=False)
# Skip the feature-detection fetch that the `migrated` property triggers.
client._migrated = True
# `instance_from_full_uri` uses this to build the cache key after writes
mocker.patch.object(
client._kg_client.instances._kg_config,
"id_namespace",
"https://kg.ebrains.eu/api/instances/",
create=True,
)
return client


class TestCacheInvalidationOnWrite:
"""Regression tests for the bug where writes left stale entries in
`client.cache`, causing subsequent `from_id(use_cache=True)` calls to
return out-of-date data and `save()` to no-op on what looked like a
legitimate modification. See issue #110."""

uuid = "00000000-0000-0000-0000-000000000000"
uri = "https://kg.ebrains.eu/api/instances/00000000-0000-0000-0000-000000000000"

def test_update_instance_invalidates_cache(self, offline_kg_client, mocker):
offline_kg_client.cache[self.uri] = {"@id": self.uri, "stale": True}
mocker.patch.object(
offline_kg_client._kg_client.instances,
"contribute_to_partial_replacement",
lambda **kw: MockKGResponse({"@id": self.uri}),
)
offline_kg_client.update_instance(self.uuid, {"some": "patch"})
assert self.uri not in offline_kg_client.cache

def test_replace_instance_invalidates_cache(self, offline_kg_client, mocker):
offline_kg_client.cache[self.uri] = {"@id": self.uri, "stale": True}
mocker.patch.object(
offline_kg_client._kg_client.instances,
"contribute_to_full_replacement",
lambda **kw: MockKGResponse({"@id": self.uri}),
)
offline_kg_client.replace_instance(self.uuid, {"some": "data"})
assert self.uri not in offline_kg_client.cache

def test_delete_instance_invalidates_cache(self, offline_kg_client, mocker):
offline_kg_client.cache[self.uri] = {"@id": self.uri}
mocker.patch.object(offline_kg_client._kg_client.instances, "delete", return_value=None)
offline_kg_client.delete_instance(self.uuid)
assert self.uri not in offline_kg_client.cache

def test_unlink_after_refetch_sends_patch(self, offline_kg_client, mocker):
"""End-to-end: this is the user-visible bug. Load a DatasetVersion,
link a subject, save; re-load it via `from_id`, set the link back to
`None`, save again — the second save must PATCH studiedSpecimen=None,
not be a silent no-op."""
from fairgraph.openminds.core import DatasetVersion, Subject

sub_uri = "https://kg.ebrains.eu/api/instances/00000000-0000-0000-0000-000000000abc"
studied_specimen_path = "https://openminds.om-i.org/props/studiedSpecimen"
# Server-side state of the DSV, mutated by each PATCH so subsequent
# `instance_from_full_uri` calls see fresh data.
server_state = {
"@id": self.uri,
"@type": ["https://openminds.om-i.org/types/DatasetVersion"],
"http://schema.org/identifier": [self.uri],
"https://core.kg.ebrains.eu/vocab/meta/space": "myspace",
}

def get_by_id(stage, instance_id, extended_response_configuration):
return MockKGResponse(dict(server_state))

def contribute_to_partial_replacement(instance_id, payload, extended_response_configuration):
for key, value in payload.items():
if value is None:
server_state.pop(key, None)
else:
server_state[key] = value
return MockKGResponse(dict(server_state))

mocker.patch.object(offline_kg_client._kg_client.instances, "get_by_id", get_by_id)
mocker.patch.object(
offline_kg_client._kg_client.instances,
"contribute_to_partial_replacement",
contribute_to_partial_replacement,
)

# 1. Load fresh, link a subject, save.
dsv = DatasetVersion.from_id(self.uuid, offline_kg_client, scope="any")
dsv.studied_specimens = [Subject(id=sub_uri)]
dsv.save(offline_kg_client, space="myspace", recursive=False)
assert studied_specimen_path in server_state, "first save should have linked the subject"

# 2. Re-fetch via from_id. Before the fix, this would have returned
# stale cached data with no studiedSpecimen.
dsv2 = DatasetVersion.from_id(self.uuid, offline_kg_client, scope="any")
assert dsv2.studied_specimens is not None, (
"re-fetched object must see the link added by the prior save"
)

# 3. Unlink and save. The PATCH must clear studiedSpecimen on the server.
dsv2.studied_specimens = None
dsv2.save(offline_kg_client, space="myspace", recursive=False)
assert studied_specimen_path not in server_state, (
"second save should have sent a PATCH that cleared studiedSpecimen"
)
4 changes: 4 additions & 0 deletions test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class MockKGClient:

def __init__(self):
self.instances = {}
self.cache = {}

def retrieve_query(self, query_label):
return {"@id": f"mock-query-{query_label}"}
Expand Down Expand Up @@ -175,6 +176,9 @@ def replace_instance(self, instance_id, data):
assert instance_id is not None
assert data is not None

def uri_from_uuid(self, uuid):
return f"https://kg.ebrains.eu/api/instances/{uuid}"


@pytest.fixture
def mock_client():
Expand Down