Exercise 2: Working with pyiron tables analyze data

The PyironTable class is used to analyze data genreated in the previous notebook! Make sure you have run the first notebook before you go over this one.

import numpy as np
import matplotlib.pylab as plt
from pyiron import Project
from pyiron.table.datamining import PyironTable
pr = Project("demo/potential_scan")

Creating a pyiron table

The idea behind using pyiron tables is to analyze a dataset by applying certain functions on them in a highly automated way

pt = PyironTable(pr, "murn_table")

Defining a ‘filter’ function

A filtering of the dataser is necessary before functions (some of which are expensive) are applied on the dataset

def filter_function(job):
    return (job.status == "finished") & ("murn" in job.job_name)
pt.filter_function = filter_function

Defining the set of functions

The following functions are going to be used on the dataset

def get_lattice_parameter(job):
    return job["output/equilibrium_volume"] ** (1/3)

def get_bm(job):
    return job["output/equilibrium_bulk_modulus"]

def get_pot(job):
    child = job.project.inspect(job["output/id"][0])
    return child["input/potential/Name"]

def get_bulk_energy_size(job, size=4):
    a = get_lattice_parameter(job)
    el = job["output/structure/species"][0]
    ref_supercell = pr.create_ase_bulk(el, a=a).repeat(size)
    pot = get_pot(job)
    ref_job_name = "rj_{}_s_{}".format(pot, size).replace("-", "_")
    df = pr.job_table()
    if not ref_job_name in df[df.status=="finished"].job.to_list():
        ref_job = pr.create_job(pr.job_type.Lammps, ref_job_name)
        ref_job.structure = ref_supercell
        ref_job.potential = pot
        ref_job.calc_minimize()
        ref_job.run()
    e_ref = pr.inspect(ref_job_name)["output/generic/energy_tot"][-1]
    n_ref = pr.inspect(ref_job_name)["output/generic/positions"].shape[1]
    return e_ref, n_ref 

def get_vac_formation_energy(job, size=4):
    a = get_lattice_parameter(job)
    el = job["output/structure/species"][0]
    ref_supercell = pr.create_ase_bulk(el, a=a).repeat(size)
    def_supercell = ref_supercell[0:-1]
    pot = get_pot(job)
    ref_job_name = "rj_{}_s_{}".format(pot, size).replace("-", "_")
    def_job_name = "dj_{}_s_{}".format(pot, size).replace("-", "_")
    df = pr.job_table()
    if not ref_job_name in df[df.status=="finished"].job.to_list():
        ref_job = pr.create_job(pr.job_type.Lammps, ref_job_name)
        ref_job.structure = ref_supercell
        ref_job.potential = pot
        ref_job.calc_minimize()
        ref_job.run()
    e_ref = pr.inspect(ref_job_name)["output/generic/energy_tot"][-1]
    n_ref = pr.inspect(ref_job_name)["output/generic/positions"].shape[1]
    if not def_job_name in df[df.status=="finished"].job.to_list():
        def_job = pr.create_job(pr.job_type.Lammps, def_job_name)
        def_job.structure = def_supercell
        def_job.potential = pot
        def_job.calc_minimize()
        def_job.run()
    e_def = pr.inspect(def_job_name)["output/generic/energy_tot"][-1]
    n_def = pr.inspect(def_job_name)["output/generic/positions"].shape[1]
    return (e_def - e_ref * (n_def/n_ref))

def get_cohesive_energy(job, size=4):
    e_bulk, n_bulk = get_bulk_energy_size(job, size)
    pot = get_pot(job)
    atom_job_name = "aj_{}_s_{}".format(pot, size).replace("-", "_")
    df = pr.job_table()
    if not atom_job_name in df[df.status=="finished"].job.to_list():
        atom_job = pr.create_job(pr.job_type.Lammps, atom_job_name)
        el = job["output/structure/species"][0]
        atom_job.structure = pr.create_atoms(el, cell=np.eye(3)* 20, scaled_positions=[[0.5, 0.5, 0.5]])
        atom_job.potential = pot
        atom_job.calc_static()
        atom_job.run()
    e_atom = pr.inspect(atom_job_name)["output/generic/energy_tot"][-1]
    return e_bulk / n_bulk - e_atom

The functions are appended to the table

pt.add["a_eq"] = get_lattice_parameter
pt.add["bulk_modulus"] = get_bm
pt.add["potential"] = get_pot
pt.add["vac_formation"] = get_vac_formation_energy
pt.add["ecoh"] = get_cohesive_energy
# Decides if the individual jobs are loaded or not during the creation of the table
pt.convert_to_object = False

Creation of the table

All functions are now applied

pt.create_table()
  0%|          | 0/9 [00:00<?, ?it/s]
The job rj_Al_Mg_Mendelev_eam_s_4 was saved and received the ID: 1626
The job dj_Al_Mg_Mendelev_eam_s_4 was saved and received the ID: 1627
The job aj_Al_Mg_Mendelev_eam_s_4 was saved and received the ID: 1628
 11%|█         | 1/9 [00:11<01:30, 11.28s/it]
The job rj_Zope_Ti_Al_2003_eam_s_4 was saved and received the ID: 1629
The job dj_Zope_Ti_Al_2003_eam_s_4 was saved and received the ID: 1630
The job aj_Zope_Ti_Al_2003_eam_s_4 was saved and received the ID: 1631
 22%|██▏       | 2/9 [00:23<01:20, 11.49s/it]
The job rj_Al_H_Ni_Angelo_eam_s_4 was saved and received the ID: 1632
The job dj_Al_H_Ni_Angelo_eam_s_4 was saved and received the ID: 1633
The job aj_Al_H_Ni_Angelo_eam_s_4 was saved and received the ID: 1634
 33%|███▎      | 3/9 [00:31<01:03, 10.60s/it]
The job rj_2000__Landa_A__Al_Pb__LAMMPS__ipr1_s_4 was saved and received the ID: 1635
The job dj_2000__Landa_A__Al_Pb__LAMMPS__ipr1_s_4 was saved and received the ID: 1636
The job aj_2000__Landa_A__Al_Pb__LAMMPS__ipr1_s_4 was saved and received the ID: 1637
 44%|████▍     | 4/9 [00:39<00:48,  9.78s/it]
The job rj_2004__Zhou_X_W__Al__LAMMPS__ipr2_s_4 was saved and received the ID: 1638
The job dj_2004__Zhou_X_W__Al__LAMMPS__ipr2_s_4 was saved and received the ID: 1639
The job aj_2004__Zhou_X_W__Al__LAMMPS__ipr2_s_4 was saved and received the ID: 1640
 56%|█████▌    | 5/9 [00:47<00:36,  9.11s/it]
The job rj_2003__Zope_R_R__Al__LAMMPS__ipr1_s_4 was saved and received the ID: 1641
The job dj_2003__Zope_R_R__Al__LAMMPS__ipr1_s_4 was saved and received the ID: 1642
The job aj_2003__Zope_R_R__Al__LAMMPS__ipr1_s_4 was saved and received the ID: 1643
 67%|██████▋   | 6/9 [00:56<00:27,  9.04s/it]
The job rj_2015__Mendelev_M_I__Al_Sm__LAMMPS__ipr1_s_4 was saved and received the ID: 1644
The job dj_2015__Mendelev_M_I__Al_Sm__LAMMPS__ipr1_s_4 was saved and received the ID: 1645
The job aj_2015__Mendelev_M_I__Al_Sm__LAMMPS__ipr1_s_4 was saved and received the ID: 1646
 78%|███████▊  | 7/9 [01:06<00:18,  9.49s/it]
The job rj_1997__Liu_X_Y__Al_Mg__LAMMPS__ipr1_s_4 was saved and received the ID: 1647
The job dj_1997__Liu_X_Y__Al_Mg__LAMMPS__ipr1_s_4 was saved and received the ID: 1648
The job aj_1997__Liu_X_Y__Al_Mg__LAMMPS__ipr1_s_4 was saved and received the ID: 1649
 89%|████████▉ | 8/9 [01:14<00:08,  8.89s/it]
The job rj_2004__Zhou_X_W__Al__LAMMPS__ipr1_s_4 was saved and received the ID: 1650
The job dj_2004__Zhou_X_W__Al__LAMMPS__ipr1_s_4 was saved and received the ID: 1651
The job aj_2004__Zhou_X_W__Al__LAMMPS__ipr1_s_4 was saved and received the ID: 1652
100%|██████████| 9/9 [01:21<00:00,  9.09s/it]

Output the computed dataset as a pandas dataframe

df = pt.get_dataframe().drop(["col_0", "col_1", "col_2"], axis=1)
df = df.set_index("potential")
df
#df.to_csv("potential_validation_Al.csv")
job_id a_eq bulk_modulus vac_formation ecoh
potential
Al_Mg_Mendelev_eam 1516 4.045415 89.015487 0.667786 -3.410657
Zope_Ti_Al_2003_eam 1528 4.049946 80.836779 0.720309 -3.298766
Al_H_Ni_Angelo_eam 1540 4.049954 81.040445 0.546216 -3.360000
2000--Landa-A--Al-Pb--LAMMPS--ipr1 1554 4.031246 78.213776 0.688258 -3.359280
2004--Zhou-X-W--Al--LAMMPS--ipr2 1566 4.050316 71.546634 0.668060 -3.579979
2003--Zope-R-R--Al--LAMMPS--ipr1 1578 4.049946 80.836777 0.720309 -3.353921
2015--Mendelev-M-I--Al-Sm--LAMMPS--ipr1 1590 4.041196 85.017411 0.763640 -3.905149
1997--Liu-X-Y--Al-Mg--LAMMPS--ipr1 1602 4.032659 83.498560 0.704110 -3.360052
2004--Zhou-X-W--Al--LAMMPS--ipr1 1614 4.050315 71.546946 0.668085 -3.579978