发布时间:2023-07-05 16:00
from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers import composition as cf
from matminer.featurizers.conversions import StrToComposition
import pandas as pd
用dataframe储存材料数据,至少要有材料的化学式,用于得到材料的Magpie特征;
这里提供一个函数得到材料的composition;
def get_magpie_composition(formula):
upper_letter = [\'A\', \'B\', \'C\', \'D\', \'E\', \'F\', \'G\', \'H\', \'I\', \'J\', \'K\', \'L\', \'M\', \'N\', \'O\', \'P\', \'Q\', \'R\', \'S\', \'T\',
\'U\', \'V\', \'W\', \'X\', \'Y\', \'Z\']
number = [\'0\', \'1\', \'2\', \'3\', \'4\', \'5\', \'6\', \'7\', \'8\', \'9\']
ins = []
for i in range(len(formula)):
if i == 0:
continue
if formula[i] in upper_letter:
if formula[i - 1] not in number:
ins.append(i)
formula_list = list(formula)
k = 0
for j in ins:
formula_list.insert(j + k, \'1\')
k = k + 1
formula = \'\'.join(formula_list)
if formula[len(formula) - 1] not in number:
formula = formula + \'1\'
return formula
对dataframe中的每个材料化学式执行get_magpie_composition()函数,并且将执行结果储存到dataframe中的列composition中,得到如下dataframe。
\"\"\"
需要输入材料的化学式(例如Nb1Pt1Si1),可以得到材料的145维向量表示
\"\"\"
def magpie(df,composition):
df = StrToComposition(target_col_id=\'composition_obj\').featurize_dataframe(df, composition)
feature_calculators = MultipleFeaturizer([cf.Stoichiometry(), cf.ElementProperty.from_preset(\"magpie\"),
cf.ValenceOrbital(props=[\'avg\']), cf.IonProperty(fast=True)])
feature_labels = feature_calculators.feature_labels()
print(\'feature_labels\')
print(feature_labels)
df = feature_calculators.featurize_dataframe(df, col_id=\'composition_obj\')
return df
对dataframe利用上述函数可以得到材料的145维特征,储存在原dataframe中。