diff --git a/openfecli/tests/data/rbfe_results_parallel.tar.gz b/openfecli/tests/data/rbfe_results_parallel.tar.gz index fd033bc17..ea9dc5b04 100644 Binary files a/openfecli/tests/data/rbfe_results_parallel.tar.gz and b/openfecli/tests/data/rbfe_results_parallel.tar.gz differ diff --git a/openfecli/tests/data/restructure_results_data.ipynb b/openfecli/tests/data/restructure_results_data.ipynb deleted file mode 100644 index 92aa67d22..000000000 --- a/openfecli/tests/data/restructure_results_data.ipynb +++ /dev/null @@ -1,245 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "8d1899bc-337a-4024-9fa3-9cfbc452e091", - "metadata": {}, - "outputs": [], - "source": [ - "import json \n", - "from gufe.tokenization import JSON_HANDLER\n", - "import numpy as np\n", - "import os \n", - "import shutil\n", - "from pathlib import Path" - ] - }, - { - "cell_type": "markdown", - "id": "a82b8123-521a-4ca3-a2cf-f73b6504fa14", - "metadata": {}, - "source": [ - "for this dataset, we know we have 3 replicates run in serial for each leg. We want to manipulate the data so that it is equivalent to the output if we re-ran this dataset with each leg run in parallel, with the following directory structure:\n", - "\n", - "```\n", - "results/\n", - " transformations_0/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", - " shared_[hashA]_attempt_0/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", - " transformations_1/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", - " shared_[hashB]_attempt_0/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", - " transformations_2/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", - " shared_[hashC]_attempt_0/\n", - " rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c6ed7fe-b42c-4781-b356-85799e25356f", - "metadata": {}, - "outputs": [], - "source": [ - "def load_json(fpath):\n", - " return json.load(open(fpath, 'r'), cls=JSON_HANDLER.decoder)\n", - "\n", - "def dump_json(data, fpath):\n", - " with open(fpath, \"w\") as f:\n", - " json.dump(data, f, cls=JSON_HANDLER.encoder)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8eba246a-6123-4d8e-8fd8-2de516fbf881", - "metadata": {}, - "outputs": [], - "source": [ - "orig_dir = Path(\"results/\")\n", - "new_dir = Path(\"results_parallel/\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ab4f2587-9b15-422d-9faa-e11ff98fd491", - "metadata": {}, - "outputs": [], - "source": [ - "leg_names = []\n", - "for name in os.listdir(orig_dir):\n", - " if name.endswith(\".json\"):\n", - " continue\n", - " leg_names.append(name)\n", - "leg_names" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "311a7f0e-9c91-47ae-9e09-0e1bef03aca8", - "metadata": {}, - "outputs": [], - "source": [ - "! rm -rf $new_dir\n", - "for leg in leg_names:\n", - " json_data = load_json(orig_dir/f\"{leg}.json\")\n", - " srckey_to_protocol = {}\n", - " srckey_to_unit_results = {}\n", - " srckey_to_estimate = {}\n", - " ## collect results on a per-replicate basis\n", - " for k in json_data['protocol_result']['data']: \n", - " rep_source_key = json_data['protocol_result']['data'][k][0]['source_key']\n", - " \n", - " # keep only the data for this replicate\n", - " rep_result = json_data['protocol_result'].copy()\n", - " rep_result['data']={k:json_data['protocol_result']['data'][k]}\n", - " srckey_to_protocol[rep_source_key] = rep_result\n", - "\n", - " # pull just the estimate value so we can put it at the top of the output\n", - " srckey_to_estimate[rep_source_key] = rep_result['data'][k][0]['outputs']['unit_estimate']\n", - " \n", - " for k in json_data['unit_results']:\n", - " rep_source_key = json_data['unit_results'][k]['source_key']\n", - "\n", - " rep_unit_result = json_data['unit_results'].copy()\n", - " rep_unit_result = {k: json_data['unit_results'][k]}\n", - " srckey_to_unit_results[rep_source_key] = rep_unit_result\n", - " \n", - " assert srckey_to_protocol.keys() == srckey_to_unit_results.keys()\n", - " \n", - " ## write to the new directory\n", - " for n, sk in enumerate(sorted(srckey_to_protocol.keys())):\n", - " rep_dir = new_dir/f\"replicate_{n}\"\n", - " os.makedirs(rep_dir/leg)\n", - " \n", - " # build up the data for this replicate\n", - " replicate_data = {'estimate': srckey_to_estimate[sk],\n", - " 'uncertainty': np.std(srckey_to_estimate[sk]),\n", - " 'protocol_result': srckey_to_protocol[sk],\n", - " 'unit_results': srckey_to_unit_results[sk]}\n", - " \n", - " # write!\n", - " dump_json(replicate_data, rep_dir/f\"{leg}.json\")\n", - " working_dir_name = f\"shared_{sk}_attempt_0\"\n", - " ## TODO: make this work for arbitrary number of attempts \n", - " # os.symlink(orig_dir/leg/working_dir_name, rep_dir/leg/working_dir_name)\n", - " shutil.copytree(orig_dir/leg/working_dir_name, rep_dir/leg/working_dir_name)\n" - ] - }, - { - "cell_type": "markdown", - "id": "f864dcb3-bebf-425b-9154-bffc2b0e3f07", - "metadata": {}, - "source": [ - "## check that objects reload correctly" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6c20639c-8ba7-457a-bf8a-76c64aef4a38", - "metadata": {}, - "outputs": [], - "source": [ - "import openfe" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9cba8316-5500-4d5e-a84d-d72d09ba2a42", - "metadata": {}, - "outputs": [], - "source": [ - "json_reloaded = load_json(\"results_parallel/replicate_0/easy_rbfe_lig_ejm_31_solvent_lig_ejm_47_solvent.json\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c0e90b45-ae83-41c1-8748-0a8c1466b378", - "metadata": {}, - "outputs": [], - "source": [ - "json_reloaded['estimate'], json_reloaded['uncertainty']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c0ce2bc6-d960-4521-b71c-316be0557e9d", - "metadata": {}, - "outputs": [], - "source": [ - "pr_reloaded = openfe.ProtocolResult.from_dict(json_reloaded['protocol_result'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a2fbb695-d4ef-45bd-af53-2ef9d0bc8e0a", - "metadata": {}, - "outputs": [], - "source": [ - "pr_reloaded.data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "19662eaa-46de-4eb0-8c78-ddd6c68b12db", - "metadata": {}, - "outputs": [], - "source": [ - "first_pur_key = list(json_reloaded['unit_results'].keys())[0]\n", - "pur_reloaded = openfe.ProtocolUnit.from_dict(json_reloaded['unit_results'][first_pur_key])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0154fda2-4c1a-4064-8bcc-03aeecf13365", - "metadata": {}, - "outputs": [], - "source": [ - "pur_reloaded" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3f2bbc84-f59c-40b9-a176-9a733ff275c1", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}