diff --git a/pyiron_vasp/vasp/2024_07_07_BenchmarkVaspParser_Cleaned.ipynb b/pyiron_vasp/vasp/2024_07_07_BenchmarkVaspParser_Cleaned.ipynb new file mode 100644 index 00000000..3b89bf49 --- /dev/null +++ b/pyiron_vasp/vasp/2024_07_07_BenchmarkVaspParser_Cleaned.ipynb @@ -0,0 +1,280 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from pymatgen.io.vasp.outputs import Vasprun, Outcar\n", + "from pymatgen.io.ase import AseAtomsAdaptor\n", + "from ase.io import read\n", + "from pymatgen.io.vasp import Chgcar, Potcar\n", + "\n", + "path = \"/root/github_dev/github_pyiron/pyiron_atomistics/tests/static/vasp_test_files/full_job_sample\"\n", + "\n", + "def parse_vasp_output_pymatgen(path):\n", + " # Parse the vasprun.xml file\n", + " vr = Vasprun(filename=f\"{path}/vasprun.xml\",\n", + " parse_projected_eigen=True,\n", + " separate_spins=False)\n", + "\n", + " # Initialize the output dictionary\n", + " output = {}\n", + " output[\"generic\"] = {}\n", + " output[\"description\"] = 'This contains all the output static from this particular VASP run'\n", + "\n", + " # 1. Get ASE atoms object via pymatgen\n", + " output[\"structure\"] = AseAtomsAdaptor.get_atoms(vr.final_structure)\n", + "\n", + " # # 2. Get ASE atoms object from ASE\n", + " # output[\"structure\"] = read(f\"{path}/CONTCAR\")\n", + "\n", + " # 3. Get charge density from CHGCAR\n", + " # For charge density, I'm not too clear on the details of what exactly is happening,\n", + " # but it appears it is spawned from pymatgen's parser originally anyway (per their comments in vasp.volumetric_data)\n", + " output[\"charge_density\"] = Chgcar.from_file(f\"{path}/CHGCAR\")\n", + "\n", + " # 4. Extract generic information\n", + " output[\"generic\"][\"temperature\"] = vr.parameters['TEBEG'] # this is just wrong in pyiron's current parsing. defaults to 0, is not 0.\n", + " # this is NOT actually 0, vasp does calculations at finite temperatures, even in DFT. This is for faster convergence - 0.0001 K is the default\n", + "\n", + " # Here we use the convention that positive stress is compressive, as is done in ASE\n", + " # Units here are eV/A^3\n", + " output[\"generic\"][\"stresses\"] = [-np.array(step[\"stress\"])/1600.21766208 for step in vr.ionic_steps]\n", + " # Don't use pressures entry anymore, I have no idea what it even is - just for the love of god, just let it die!!!\n", + " output[\"generic\"][\"pressures\"] = output[\"generic\"][\"stresses\"]\n", + " output[\"generic\"][\"forces\"] = [step['forces'] for step in vr.ionic_steps]\n", + " # Similarly to pressures, the cell object in generic interface just needs to die - all useful information is in the structure\n", + " # The edge case is where structures are extremely large and might not fit into memory, but cells do,\n", + " # but it's so niche and pollutes the interface for no good reason. \n", + " # I feel like most users aren't doing calculations with structures that don't fit into memory\n", + " output[\"generic\"][\"cells\"] = [AseAtomsAdaptor.get_atoms(step[\"structure\"]).cell for step in vr.ionic_steps]\n", + " # See above - is this necessary?\n", + " output[\"generic\"][\"volume\"] = [step[\"structure\"].lattice.volume for step in vr.ionic_steps]\n", + " # output_pyiron[\"generic\"][\"energy_pot\"] This is e_fr_energy in pymatgen\n", + " # in pymatgen\n", + " # 'e_fr_energy': -17.73798679,\n", + " # 'e_wo_entrp': -17.72353582,\n", + " # 'e_0_energy': -17.7331698}\n", + " # pyiron: \n", + " # output_pyiron[\"generic\"][\"energy_pot\"] -> array([-17.73798679])\n", + " # output_pyiron[\"generic\"][\"energy_tot\"] -> array([-17.73798679])\n", + " output[\"generic\"][\"energy_pot\"] = [step['electronic_steps'][-1]['e_wo_entrp'] for step in vr.ionic_steps]\n", + " # output_pyiron[\"generic\"][\"energy_tot\"] This is also equivalent to e_fr_energy in pymatgen (e_tot = e_pot (!?))\n", + " output[\"generic\"][\"energy_tot\"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]\n", + " # output[\"generic\"][\"energy_tot\"] = [step['electronic_steps'][-1]['e_0_energy'] for step in vasprun.ionic_steps] # energy sigma-> 0 (\"real\" free energy at 0K)\n", + "\n", + " # For some reason steps is acquired by calling np.arange(len(steps)) in current pyiron parser. Surely returning a list when \n", + " # the expected value is an integer is nonsensical? arange also generates 0 in the event of len=1...\n", + " output[\"generic\"][\"steps\"] = len(vr.ionic_steps)\n", + " # Another grievous offender that pollutes the interface for no good reason...\n", + " # introducing \"positions\", when \"structures\" is present in output is clearly enough. Doubling storage in hdf for no new information is just nasty work\n", + " output[\"generic\"][\"positions\"] = [AseAtomsAdaptor.get_atoms(step[\"structure\"]).positions for step in vr.ionic_steps]\n", + "\n", + " # 5. Read elastic constants from OUTCAR\n", + " try:\n", + " elastic_constants = Outcar(filename=f\"{path}/OUTCAR\").read_elastic_tensor()\n", + " except Exception as e:\n", + " elastic_constants = None\n", + " output[\"generic\"][\"elastic_constants\"] = elastic_constants\n", + "\n", + " # 6. Extract DFT information\n", + " # Notes: here is the limitations of pymatgen parsing - they only parse the final step.\n", + " outcar = Outcar(filename=f\"{path}/OUTCAR\")\n", + " output[\"generic\"][\"dft\"] = {}\n", + " output[\"generic\"][\"dft\"][\"n_elect\"] = vr.parameters[\"NELECT\"]\n", + "\n", + " # NOTE: Ahmed look here (?) probably needs to just use our own parser\n", + " output[\"generic\"][\"dft\"][\"potentiostat_output\"] = [] \n", + "\n", + " output[\"generic\"][\"dft\"][\"magnetization\"] = [outcar.magnetization]\n", + " # pymatgen Outcar parser alternative only allows us to parse last step, but does not affect final_magmoms here\n", + " output[\"generic\"][\"dft\"][\"final_magmoms\"] = [ionic_entry[\"tot\"] for ionic_entry in outcar.magnetization]\n", + " # pymatgen only allows the calculation of the fermi energy on the last step\n", + " output[\"generic\"][\"dft\"][\"e_fermi_list\"] = np.array(vr.calculate_efermi())\n", + " # pymatgen only allows the calculation of the valence band maximum on the last step\n", + " # NOTE: SERIOUS PROBLEMS - VBM CALCULATION IS DIFFERENT IN PYIRON VS PYMATGEN - (I (Han) have no idea :))\n", + " output[\"generic\"][\"dft\"][\"vbm_list\"] = vr.eigenvalue_band_properties[1]\n", + " # pymatgen only allows the calculation of the conduction band minimum on the last step\n", + " # NOTE: SERIOUS PROBLEMS - CBM CALCULATION IS DIFFERENT IN PYIRON VS PYMATGEN - (I (Han) have no idea :))\n", + " output[\"generic\"][\"dft\"][\"cbm_list\"] = vr.eigenvalue_band_properties[2]\n", + " # pymatgen only allows parsing of the final total energy contribution...\n", + " output[\"generic\"][\"dft\"][\"ediel_sol\"] = outcar.final_energy_contribs[\"Ediel_sol\"]\n", + " \n", + "\n", + " output[\"generic\"][\"dft\"][\"ediel_sol\"] = outcar.final_energy_contribs[\"Ediel_sol\"]\n", + " output[\"generic\"][\"dft\"][\"potentiostat_output\"] = []\n", + "\n", + " # Read POTCAR file for valence charges - no support in pymatgen for extracting valence from vasprun (although possible)\n", + " # NOTE: Commented out because we don't ahve valid POTCAR for testing here\n", + " # potcar = Potcar.from_file(f\"{path}/POTCAR\")\n", + " #output[\"generic\"][\"dft\"][\"valence_charges\"] = [ps.nelectrons for ps in potcar]\n", + "\n", + " # SCF energies\n", + " output[\"generic\"][\"dft\"][\"scf_dipole_mom\"] = [] # TODO: Likely needs to default to OUTCAR parsing via pyiron OUTCAR\n", + " output[\"generic\"][\"dft\"][\"scf_energy_int\"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]\n", + " output[\"generic\"][\"dft\"][\"scf_energy_free\"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]\n", + " output[\"generic\"][\"dft\"][\"scf_energy_zero\"] = [step['electronic_steps'][-1]['e_0_energy'] for step in vr.ionic_steps]\n", + " output[\"generic\"][\"dft\"][\"energy_int\"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]\n", + " output[\"generic\"][\"dft\"][\"energy_free\"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]\n", + " output[\"generic\"][\"dft\"][\"energy_zero\"] = [step['electronic_steps'][-1]['e_0_energy'] for step in vr.ionic_steps]\n", + "\n", + " # 7. Extract band structure and DOS\n", + " output[\"generic\"][\"dft\"][\"bands\"] = {}\n", + " # NOTE: I'm in favour of just doing:\n", + " # output[\"generic\"][\"dft\"][\"bands\"] = vr.get_band_structure().as_dict()\n", + " # BUT if you want to keep the prior structured output\n", + " vr_bs_dict = vr.get_band_structure().as_dict()\n", + " output[\"generic\"][\"dft\"][\"bands\"][\"k_points\"] = vr.actual_kpoints\n", + " return output\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "path = \"/root/github_dev/github_pyiron/pyiron_atomistics/tests/static/vasp_test_files/full_job_sample\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/root/miniconda3/envs/pyiron/lib/python3.12/site-packages/pymatgen/io/vasp/outputs.py:1185: UserWarning: No POTCAR file with matching TITEL fields was found in\n", + "/root/github_dev/github_pyiron/pyiron_atomistics/tests/static/vasp_test_files/full_job_sample/POTCAR\n", + " warnings.warn(\"No POTCAR file with matching TITEL fields was found in\\n\" + \"\\n \".join(potcar_paths))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "33.4 ms ± 665 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n100\n", + "output = parse_vasp_output_pymatgen(path)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/root/miniconda3/envs/pyiron/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "source": [ + "from pyiron_atomistics.vasp.output import parse_vasp_output" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21.3 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit -n100\n", + "output_pyiron = parse_vasp_output(path)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "path_real = \"/root/github_dev/dev_vasp_scraper/As-30-d-2.5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.34 s ± 104 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit -n1\n", + "output = parse_vasp_output_pymatgen(path_real)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.92 s ± 61.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" + ] + } + ], + "source": [ + "%%timeit -n1\n", + "output_pyiron = parse_vasp_output(path_real)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pyiron", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}