Last active
April 16, 2020 05:30
-
-
Save aclyde11/73a0dd4737b365f6d26cc2ad066b595d to your computer and use it in GitHub Desktop.
combine new docking data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| from glob import glob | |
| from tqdm import tqdm | |
| import numpy as np | |
| import re | |
| gdrive = glob("/Users/austin/Google Drive/Workflow0COVID/results/*/*.out") | |
| orig = "/Users/austin/Box/2019-nCoV/drug-screening/raw_data/V3_docking_data_april_9/docking_data_out_v3.1.csv" | |
| for i in gdrive: | |
| print(i) | |
| # | |
| def read_dfs(): | |
| dfs = [] | |
| for file in tqdm(gdrive): | |
| print(file) | |
| df = pd.read_csv(file, header=None, engine='c', low_memory=False) | |
| df = df.iloc[:, [2,3]] | |
| df.columns= ['smiles', 'dock'] | |
| df['receptor'] = re.sub("receptor.out", "", file)[:-1].split("/")[-1] | |
| dfs.append(df) | |
| return dfs | |
| df = pd.concat(read_dfs()) | |
| df = df[['smiles', 'receptor', 'dock']] | |
| df = df.pivot_table(values='dock', index='smiles', columns='receptor', aggfunc='first', fill_value=np.nan) | |
| df = df.reset_index() | |
| cols = df.columns.tolist() | |
| cols = ['smiles'] + [s + "_dock" for s in cols[1:]] | |
| df.columns = cols | |
| df.iloc[:, 1:] = np.clip(df.iloc[:, 1:], None, 0) | |
| df.to_pickle("gdrive.pkl") | |
| gdrive_df = pd.read_pickle("gdrive.pkl") | |
| old_df = pd.read_csv(orig) | |
| print(gdrive_df.head()) | |
| print(old_df.head()) | |
| df = pd.merge(left=old_df, right=gdrive_df, how='outer') | |
| print(df.shape) | |
| print(df.head()) | |
| df.to_csv("combo.csv", index=False) | |
| df.to_pickle('combo.pkl') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment