Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
280 changes: 280 additions & 0 deletions pyiron_vasp/vasp/2024_07_07_BenchmarkVaspParser_Cleaned.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
{
Copy link
Contributor

@ahmedabdelkawy ahmedabdelkawy Jul 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line #63.        output["generic"]["steps"] = len(vr.ionic_steps)

The definition of steps here is not the total number of ionic steps it is a method (as I understand) that allows the user to do something like: plt.plot(job.output.steps, job.output.energy_tot). In other words it keeps an index for each ionic step. However, I agree that the parsing is wrong (there is the get_steps in the outcar.py which is never called) and then we end up using the arrange(energy) thing in the output.py.


Reply via ReviewNB

Copy link
Contributor

@ahmedabdelkawy ahmedabdelkawy Jul 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line #64.        # Another grievous offender that pollutes the interface for no good reason...

The structure object in the generic output is only for the first and final structures: job['input/structure/positions'] and job['output/structure/positions']. The positions for the entire trajectory can be obtained only via job['output/generic/positions'] So I don't think we double the storage or save the trajectory twice (as I understand how we currently parse things).


Reply via ReviewNB

"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from pymatgen.io.vasp.outputs import Vasprun, Outcar\n",
"from pymatgen.io.ase import AseAtomsAdaptor\n",
"from ase.io import read\n",
"from pymatgen.io.vasp import Chgcar, Potcar\n",
"\n",
"path = \"/root/github_dev/github_pyiron/pyiron_atomistics/tests/static/vasp_test_files/full_job_sample\"\n",
"\n",
"def parse_vasp_output_pymatgen(path):\n",
" # Parse the vasprun.xml file\n",
" vr = Vasprun(filename=f\"{path}/vasprun.xml\",\n",
" parse_projected_eigen=True,\n",
" separate_spins=False)\n",
"\n",
" # Initialize the output dictionary\n",
" output = {}\n",
" output[\"generic\"] = {}\n",
" output[\"description\"] = 'This contains all the output static from this particular VASP run'\n",
"\n",
" # 1. Get ASE atoms object via pymatgen\n",
" output[\"structure\"] = AseAtomsAdaptor.get_atoms(vr.final_structure)\n",
"\n",
" # # 2. Get ASE atoms object from ASE\n",
" # output[\"structure\"] = read(f\"{path}/CONTCAR\")\n",
"\n",
" # 3. Get charge density from CHGCAR\n",
" # For charge density, I'm not too clear on the details of what exactly is happening,\n",
" # but it appears it is spawned from pymatgen's parser originally anyway (per their comments in vasp.volumetric_data)\n",
" output[\"charge_density\"] = Chgcar.from_file(f\"{path}/CHGCAR\")\n",
"\n",
" # 4. Extract generic information\n",
" output[\"generic\"][\"temperature\"] = vr.parameters['TEBEG'] # this is just wrong in pyiron's current parsing. defaults to 0, is not 0.\n",
" # this is NOT actually 0, vasp does calculations at finite temperatures, even in DFT. This is for faster convergence - 0.0001 K is the default\n",
"\n",
" # Here we use the convention that positive stress is compressive, as is done in ASE\n",
" # Units here are eV/A^3\n",
" output[\"generic\"][\"stresses\"] = [-np.array(step[\"stress\"])/1600.21766208 for step in vr.ionic_steps]\n",
" # Don't use pressures entry anymore, I have no idea what it even is - just for the love of god, just let it die!!!\n",
" output[\"generic\"][\"pressures\"] = output[\"generic\"][\"stresses\"]\n",
" output[\"generic\"][\"forces\"] = [step['forces'] for step in vr.ionic_steps]\n",
" # Similarly to pressures, the cell object in generic interface just needs to die - all useful information is in the structure\n",
" # The edge case is where structures are extremely large and might not fit into memory, but cells do,\n",
" # but it's so niche and pollutes the interface for no good reason. \n",
" # I feel like most users aren't doing calculations with structures that don't fit into memory\n",
" output[\"generic\"][\"cells\"] = [AseAtomsAdaptor.get_atoms(step[\"structure\"]).cell for step in vr.ionic_steps]\n",
" # See above - is this necessary?\n",
" output[\"generic\"][\"volume\"] = [step[\"structure\"].lattice.volume for step in vr.ionic_steps]\n",
" # output_pyiron[\"generic\"][\"energy_pot\"] This is e_fr_energy in pymatgen\n",
" # in pymatgen\n",
" # 'e_fr_energy': -17.73798679,\n",
" # 'e_wo_entrp': -17.72353582,\n",
" # 'e_0_energy': -17.7331698}\n",
" # pyiron: \n",
" # output_pyiron[\"generic\"][\"energy_pot\"] -> array([-17.73798679])\n",
" # output_pyiron[\"generic\"][\"energy_tot\"] -> array([-17.73798679])\n",
" output[\"generic\"][\"energy_pot\"] = [step['electronic_steps'][-1]['e_wo_entrp'] for step in vr.ionic_steps]\n",
" # output_pyiron[\"generic\"][\"energy_tot\"] This is also equivalent to e_fr_energy in pymatgen (e_tot = e_pot (!?))\n",
" output[\"generic\"][\"energy_tot\"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]\n",
" # output[\"generic\"][\"energy_tot\"] = [step['electronic_steps'][-1]['e_0_energy'] for step in vasprun.ionic_steps] # energy sigma-> 0 (\"real\" free energy at 0K)\n",
"\n",
" # For some reason steps is acquired by calling np.arange(len(steps)) in current pyiron parser. Surely returning a list when \n",
" # the expected value is an integer is nonsensical? arange also generates 0 in the event of len=1...\n",
" output[\"generic\"][\"steps\"] = len(vr.ionic_steps)\n",
" # Another grievous offender that pollutes the interface for no good reason...\n",
" # introducing \"positions\", when \"structures\" is present in output is clearly enough. Doubling storage in hdf for no new information is just nasty work\n",
" output[\"generic\"][\"positions\"] = [AseAtomsAdaptor.get_atoms(step[\"structure\"]).positions for step in vr.ionic_steps]\n",
"\n",
" # 5. Read elastic constants from OUTCAR\n",
" try:\n",
" elastic_constants = Outcar(filename=f\"{path}/OUTCAR\").read_elastic_tensor()\n",
" except Exception as e:\n",
" elastic_constants = None\n",
" output[\"generic\"][\"elastic_constants\"] = elastic_constants\n",
"\n",
" # 6. Extract DFT information\n",
" # Notes: here is the limitations of pymatgen parsing - they only parse the final step.\n",
" outcar = Outcar(filename=f\"{path}/OUTCAR\")\n",
" output[\"generic\"][\"dft\"] = {}\n",
" output[\"generic\"][\"dft\"][\"n_elect\"] = vr.parameters[\"NELECT\"]\n",
"\n",
" # NOTE: Ahmed look here (?) probably needs to just use our own parser\n",
" output[\"generic\"][\"dft\"][\"potentiostat_output\"] = [] \n",
"\n",
" output[\"generic\"][\"dft\"][\"magnetization\"] = [outcar.magnetization]\n",
" # pymatgen Outcar parser alternative only allows us to parse last step, but does not affect final_magmoms here\n",
" output[\"generic\"][\"dft\"][\"final_magmoms\"] = [ionic_entry[\"tot\"] for ionic_entry in outcar.magnetization]\n",
" # pymatgen only allows the calculation of the fermi energy on the last step\n",
" output[\"generic\"][\"dft\"][\"e_fermi_list\"] = np.array(vr.calculate_efermi())\n",
" # pymatgen only allows the calculation of the valence band maximum on the last step\n",
" # NOTE: SERIOUS PROBLEMS - VBM CALCULATION IS DIFFERENT IN PYIRON VS PYMATGEN - (I (Han) have no idea :))\n",
" output[\"generic\"][\"dft\"][\"vbm_list\"] = vr.eigenvalue_band_properties[1]\n",
" # pymatgen only allows the calculation of the conduction band minimum on the last step\n",
" # NOTE: SERIOUS PROBLEMS - CBM CALCULATION IS DIFFERENT IN PYIRON VS PYMATGEN - (I (Han) have no idea :))\n",
" output[\"generic\"][\"dft\"][\"cbm_list\"] = vr.eigenvalue_band_properties[2]\n",
" # pymatgen only allows parsing of the final total energy contribution...\n",
" output[\"generic\"][\"dft\"][\"ediel_sol\"] = outcar.final_energy_contribs[\"Ediel_sol\"]\n",
" \n",
"\n",
" output[\"generic\"][\"dft\"][\"ediel_sol\"] = outcar.final_energy_contribs[\"Ediel_sol\"]\n",
" output[\"generic\"][\"dft\"][\"potentiostat_output\"] = []\n",
"\n",
" # Read POTCAR file for valence charges - no support in pymatgen for extracting valence from vasprun (although possible)\n",
" # NOTE: Commented out because we don't ahve valid POTCAR for testing here\n",
" # potcar = Potcar.from_file(f\"{path}/POTCAR\")\n",
" #output[\"generic\"][\"dft\"][\"valence_charges\"] = [ps.nelectrons for ps in potcar]\n",
"\n",
" # SCF energies\n",
" output[\"generic\"][\"dft\"][\"scf_dipole_mom\"] = [] # TODO: Likely needs to default to OUTCAR parsing via pyiron OUTCAR\n",
" output[\"generic\"][\"dft\"][\"scf_energy_int\"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]\n",
" output[\"generic\"][\"dft\"][\"scf_energy_free\"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]\n",
" output[\"generic\"][\"dft\"][\"scf_energy_zero\"] = [step['electronic_steps'][-1]['e_0_energy'] for step in vr.ionic_steps]\n",
" output[\"generic\"][\"dft\"][\"energy_int\"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]\n",
" output[\"generic\"][\"dft\"][\"energy_free\"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]\n",
" output[\"generic\"][\"dft\"][\"energy_zero\"] = [step['electronic_steps'][-1]['e_0_energy'] for step in vr.ionic_steps]\n",
"\n",
" # 7. Extract band structure and DOS\n",
" output[\"generic\"][\"dft\"][\"bands\"] = {}\n",
" # NOTE: I'm in favour of just doing:\n",
" # output[\"generic\"][\"dft\"][\"bands\"] = vr.get_band_structure().as_dict()\n",
" # BUT if you want to keep the prior structured output\n",
" vr_bs_dict = vr.get_band_structure().as_dict()\n",
" output[\"generic\"][\"dft\"][\"bands\"][\"k_points\"] = vr.actual_kpoints\n",
" return output\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"path = \"/root/github_dev/github_pyiron/pyiron_atomistics/tests/static/vasp_test_files/full_job_sample\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/root/miniconda3/envs/pyiron/lib/python3.12/site-packages/pymatgen/io/vasp/outputs.py:1185: UserWarning: No POTCAR file with matching TITEL fields was found in\n",
"/root/github_dev/github_pyiron/pyiron_atomistics/tests/static/vasp_test_files/full_job_sample/POTCAR\n",
" warnings.warn(\"No POTCAR file with matching TITEL fields was found in\\n\" + \"\\n \".join(potcar_paths))\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"33.4 ms ± 665 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"source": [
"%%timeit -n100\n",
"output = parse_vasp_output_pymatgen(path)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/root/miniconda3/envs/pyiron/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"from pyiron_atomistics.vasp.output import parse_vasp_output"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"21.3 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"source": [
"%%timeit -n100\n",
"output_pyiron = parse_vasp_output(path)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"path_real = \"/root/github_dev/dev_vasp_scraper/As-30-d-2.5\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3.34 s ± 104 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit -n1\n",
"output = parse_vasp_output_pymatgen(path_real)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.92 s ± 61.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%%timeit -n1\n",
"output_pyiron = parse_vasp_output(path_real)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "pyiron",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}