Skip to content

Instantly share code, notes, and snippets.

@jacepark12
Created August 31, 2018 02:18
Show Gist options
  • Select an option

  • Save jacepark12/0db46b08eef1262d2a2180f446be7150 to your computer and use it in GitHub Desktop.

Select an option

Save jacepark12/0db46b08eef1262d2a2180f446be7150 to your computer and use it in GitHub Desktop.
import pandas as pd
from scipy.stats.stats import pearsonr
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_excel('data_2.xlsx')
print(df)
c = pd.Series(df.columns.values, name='x').tolist()
print(c)
remove_list = ['Year', 'Company', '설립연도']
x_data = []
y_data = []
for x in remove_list:
c.remove(x)
for i in range(0, len(c)):
for j in range(i + 1, len(c)):
r, p_v = pearsonr(df[c[i]], df[c[j]])
if abs(r) > 0.1:
x_data.append("{} / {} ".format(c[i], c[j]))
y_data.append(r)
print("{}, {} : {}".format(c[i], c[j], r))
z = sorted(zip(x_data,y_data), key=lambda _t: _t[1])
x_sorted = []
y_sorted = []
print(z)
for _z in z:
x_sorted.append(_z[0])
y_sorted.append(_z[1])
x_t = np.arange(len(z))
plt.bar(x_t,y_sorted)
plt.xticks(x_t, x_sorted)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment