Last active
March 9, 2022 10:07
-
-
Save chenyuxiang0425/b229a57b3ebf4f7deaa740d1559dcfed to your computer and use it in GitHub Desktop.
cas to property
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import pubchempy as pcp | |
| import argparse | |
| def search_molecule_from_cas(CAS): | |
| """ search molecules from CAS number | |
| @parma molecule_name | |
| @return molecule | |
| """ | |
| searched_results = pcp.get_compounds(CAS,"name") # gget a compounds list | |
| if len(searched_results) <= 0: | |
| return None | |
| elif len(searched_results) >= 1: | |
| return searched_results[0] | |
| def main(): | |
| parser = argparse.ArgumentParser(description='filename') | |
| parser.add_argument('filename') | |
| args = parser.parse_args() | |
| data = pd.read_csv(args.filename) | |
| cas_lst=data["CAS"] | |
| out_dict = {"CAS":[],'cid':[], 'exact_mass':[], 'isomeric_smiles':[],'iupac_name':[],'molecular_formula':[],'molecular_weight':[]} | |
| for i in range(len(cas_lst)): | |
| cas = cas_lst[i] | |
| current_molecule = search_molecule_from_cas(cas) | |
| out_dict['CAS'].append(cas) | |
| out_dict['cid'].append(current_molecule.cid) if current_molecule else out_dict['cid'].append(None) | |
| out_dict['exact_mass'].append(current_molecule.exact_mass) if current_molecule else out_dict['exact_mass'].append(None) | |
| out_dict['isomeric_smiles'].append(current_molecule.isomeric_smiles)if current_molecule else out_dict['isomeric_smiles'].append(None) | |
| out_dict['iupac_name'].append(current_molecule.iupac_name)if current_molecule else out_dict['iupac_name'].append(None) | |
| out_dict['molecular_formula'].append(current_molecule.molecular_formula)if current_molecule else out_dict['molecular_formula'].append(None) | |
| out_dict['molecular_weight'].append(current_molecule.molecular_weight)if current_molecule else out_dict['molecular_weight'].append(None) | |
| print(i, cas, current_molecule.isomeric_smiles if current_molecule else "not found") | |
| pd.DataFrame(out_dict).to_csv("CAS2property.csv") | |
| if __name__ == "__main__": | |
| main() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
What is it?
This is a script that uses pubchempy library to get some simple properties of molecules from the PubChem website. And get a list in "CSV" format.
What are its requirements?
python3
pubchempy
pandas
The input file must be in "CSV" format.
How to use it in the command line?
python cas2property.py yourfilename