Skip to content

Instantly share code, notes, and snippets.

@prs-watch
Created March 2, 2019 07:16
Show Gist options
  • Select an option

  • Save prs-watch/039b39427e8747fe0d5566138b60bd10 to your computer and use it in GitHub Desktop.

Select an option

Save prs-watch/039b39427e8747fe0d5566138b60bd10 to your computer and use it in GitHub Desktop.
import pandas as pd
import umap
from pyclustering.cluster import xmeans
class FGClusterExecuter:
"""
FanGraphs stats clustering class
"""
@classmethod
def cluster(cls, csv_file, labels, sep=','):
labels, data = cls.__load_data(csv_file, labels, sep)
ndata = cls.__normalize(data)
arrdata = ndata.as_matrix()
umap_emb = cls.__exec_umap(arrdata)
clusters = cls.__exec_xmeans(arrdata)
return labels, data, umap_emb, clusters
@staticmethod
def __load_data(csv_file, labels, sep):
df = pd.read_csv(csv_file, sep=sep)
labels =df[labels]
data = df.drop(labels, axis=1)
return labels, data
@staticmethod
def __normalize(df):
df = df.fillna(0)
ndata = (df - df.mean()) / (df.max() - df.min())
return ndata.fillna(0)
@staticmethod
def __exec_umap(array):
return umap.UMAP().fit_transform(array)
@staticmethod
def __exec_xmeans(array):
init_center = xmeans.kmeans_plusplus_initializer(array, 2).initialize()
xm = xmeans.xmeans(array, init_center, ccore=False)
xm.process()
return xm.get_clusters()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment