Skip to content

Instantly share code, notes, and snippets.

@aclyde11
Last active April 16, 2020 05:30
Show Gist options
  • Select an option

  • Save aclyde11/73a0dd4737b365f6d26cc2ad066b595d to your computer and use it in GitHub Desktop.

Select an option

Save aclyde11/73a0dd4737b365f6d26cc2ad066b595d to your computer and use it in GitHub Desktop.
combine new docking data
import pandas as pd
from glob import glob
from tqdm import tqdm
import numpy as np
import re
gdrive = glob("/Users/austin/Google Drive/Workflow0COVID/results/*/*.out")
orig = "/Users/austin/Box/2019-nCoV/drug-screening/raw_data/V3_docking_data_april_9/docking_data_out_v3.1.csv"
for i in gdrive:
print(i)
#
def read_dfs():
dfs = []
for file in tqdm(gdrive):
print(file)
df = pd.read_csv(file, header=None, engine='c', low_memory=False)
df = df.iloc[:, [2,3]]
df.columns= ['smiles', 'dock']
df['receptor'] = re.sub("receptor.out", "", file)[:-1].split("/")[-1]
dfs.append(df)
return dfs
df = pd.concat(read_dfs())
df = df[['smiles', 'receptor', 'dock']]
df = df.pivot_table(values='dock', index='smiles', columns='receptor', aggfunc='first', fill_value=np.nan)
df = df.reset_index()
cols = df.columns.tolist()
cols = ['smiles'] + [s + "_dock" for s in cols[1:]]
df.columns = cols
df.iloc[:, 1:] = np.clip(df.iloc[:, 1:], None, 0)
df.to_pickle("gdrive.pkl")
gdrive_df = pd.read_pickle("gdrive.pkl")
old_df = pd.read_csv(orig)
print(gdrive_df.head())
print(old_df.head())
df = pd.merge(left=old_df, right=gdrive_df, how='outer')
print(df.shape)
print(df.head())
df.to_csv("combo.csv", index=False)
df.to_pickle('combo.pkl')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment