Executing this notebook requires an active internet connection, as it queries the NCI/CADD Chemical Identifier Resolver web service, to help in suggesting and selecting names for the species
import cirpy # pip install this if you don't have it.
import sys
import os
import re
import numpy
import csv
import cPickle as pickle
import pandas as pd
import itertools
import logging
from collections import Counter, defaultdict, OrderedDict
import IPython
from IPython.display import display, Markdown, HTML
def mprint(s):
"A convenience to format things in Jupyter notebooks via MarkDown syntax"
display(Markdown(s))
# Add $RMGpy to front of your $PYTHONPATH in case you didn't already
sys.path.insert(1,os.getenv('RMGpy',os.path.expanduser('~/Code/RMG-Py')))
from cantera import ck2cti
import rmgpy
import rmgpy.molecule, rmgpy.species, rmgpy.data, rmgpy.data.rmg, rmgpy.thermo.thermoengine
model_name = 'heptane'
csv_filename = 'new-species-{}.csv'.format(model_name)
if not os.path.exists(csv_filename):
with open(csv_filename,'w') as f:
f.write('SMILES, Name\n')
smiles_names = pd.read_csv(csv_filename, index_col=0)
smiles_names
species_list = []
for smiles in smiles_names.index:
m = rmgpy.molecule.Molecule(SMILES=smiles)
species = rmgpy.species.Species(molecule=[m])
species.generateResonanceIsomers()
species.props['smiles'] = smiles.strip()
species_list.append(species)
species_list
rmgDatabase = rmgpy.data.rmg.RMGDatabase()
databasePath = os.path.abspath(os.path.join(os.getenv('RMGpy', '..'), '..', 'RMG-database', 'input'))
print("Loading database from "+databasePath)
rmgDatabase.load(databasePath,
kineticsFamilies='default',
transportLibraries=['PrimaryTransportLibrary', 'GRI-Mech'],
reactionLibraries=[],
seedMechanisms=[],
thermoLibraries=['primaryThermoLibrary',
'BurkeH2O2',
'thermo_DFT_CCSDTF12_BAC',
'CBS_QB3_1dHR',
'CHO',
'USC-Mech-ii',],
solvation=False,
)
for s in species_list:
display(s)
s.generateTransportData()
print s.transportData
# Generate Thermo data
for s in species_list:
display(s)
rmgpy.thermo.thermoengine.submit(s)
print s.thermo
for s in species_list:
display(s)
smiles = s.props['smiles']
names = cirpy.resolve(smiles, 'names')
iupac = cirpy.resolve(smiles, 'iupac_name')
s.props['names'] = names
s.props['iupac'] = iupac
round_trip_smiles = cirpy.resolve(iupac, 'smiles')
m = rmgpy.molecule.Molecule(SMILES=round_trip_smiles)
assert s.isIsomorphic(m)
print iupac
print names
# If you have a gigantic spreadsheet of imported model data,
# search it to see if these species have been seen before:
spreadsheet_path = os.path.expanduser('~/Code/RMG-models/modelComparer/output.xls')
if not os.path.exists(spreadsheet_path):
print "You don't have the spreadsheet, so not checking."
else:
df = pd.read_excel(spreadsheet_path, sheetname='names', skiprows=4)
print "Looking for {} names of {} species in {} models".format(df[df.columns[3:]].count().sum(), *df.shape)
for s in species_list:
display(s)
smiles = s.molecule[0].toSMILES()
formula = s.molecule[0].getFormula()
print smiles, formula
for testsmiles in df[df.Formula==formula].Species:
m = rmgpy.molecule.Molecule(SMILES=str(testsmiles))
if s.isIsomorphic(m):
print testsmiles
d = df[df.Species==testsmiles]
print Counter(d.values[0,2:])
break
else:
print "didn't find in table"
# Report possible suggested names, for you to choose from
possibles = {}
for s in species_list:
possibles[s.props['smiles']] = s.props['names']
possibles
# Choose and specify some names for each.
# Unforutnately they can't start with a digit, ruling out many IUPAC names
preferred = {
'C#CCO': 'prop2yn1ol',
'C=C=CC': 'C4H612', # buta12diene
'C=C=CCC': 'penta12diene', # Penta-1,2-diene
'C=C=CO': 'propadienol', # propa-1,2-dien-1-ol
'C=CCC(=O)CC': 'hex5en3one',
'C=CCC(C)=O': 'pent4en2one',
'C=CCCC(C)=O': 'hex5en2one',
'CC(=O)C=O': 'oxopropanal',
'CC(C)O': 'propan2ol',
'CCCC(=O)CCC': 'heptan4one',
'CCCCC(=O)CC': 'heptan3one',
'CCCCCC(C)=O': 'heptan2one',
'CCCCCCC=O': 'heptanal',
'O=COC=O': 'formicanhydride',
}
# Check if the chosen names can be correctly resolved using NCI/CADD Chemical Identifier Resolver
for s in species_list:
display(s)
smiles = s.molecule[0].toSMILES()
name = preferred[smiles]
s.label = name
round_trip_smiles = cirpy.resolve(name, 'smiles')
print name
if round_trip_smiles is None:
print "WARNING: Cannot interpret above name using NCI/CADD Chemical Identifier Resolver:"
continue
m = rmgpy.molecule.Molecule(SMILES=round_trip_smiles)
if not s.isIsomorphic(m):
print "WARNING: Cannot resolve above name correctly using NCI/CADD Chemical Identifier Resolver:"
for s in species_list:
smiles = s.props['smiles']
name = s.label
smiles_names.loc[smiles] = name
smiles_names.to_csv(csv_filename)
smiles_names
thermo_filename = 'new-species-{}-thermo.txt'.format(model_name)
rmgpy.chemkin.saveChemkinFile(thermo_filename, species=species_list, reactions=[])
print(open(thermo_filename).read())
transport_filename = 'new-species-{}-transport.txt'.format(model_name)
rmgpy.chemkin.saveTransportFile(transport_filename,species_list)
print(open(transport_filename).read())
# Check that we can convert it into cantera
outdir = 'CanteraFiles'
new_parser = ck2cti.Parser()
surfaces = new_parser.convertMech(inputFile=thermo_filename,
thermoFile=None,
transportFile=transport_filename,
surfaceFile=None,
phaseName='gas',
outName=os.path.join(outdir,model_name+'.new-species.cti'),
permissive=True)