This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # set colors | |
| cmap=['white','red','orange','yellow','green','blue', | |
| 'indigo','violet','purple','grey','pink', | |
| 'brown','black'] | |
| f, ax = plt.subplots(figsize=(6, 18)) | |
| # drop duplicates for bookDf **End of book A is the start of book B | |
| df = bookDf.copy() | |
| df.drop_duplicates(['Date'],inplace=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Adding needed columns | |
| bookDf['DateOrig'] = bookDf['Date'] | |
| bookDf['Day'] = bookDf['Date'].apply(lambda x: x.day) | |
| bookDf['Month'] = bookDf['Date'].apply(lambda x: dt.datetime.strftime(x,'%b')) | |
| bookDf['DOW'] = bookDf['Date'].apply(lambda x: dt.datetime.strftime(x,'%a')) | |
| bookDf['Month_num'] = bookDf['Date'].apply(lambda x: x.month) | |
| bookDf['DOW_num'] = bookDf['Date'].apply(lambda x: x.weekday()) | |
| bookDf['Week_num'] = bookDf['Date'].apply(lambda x: int(dt.datetime.strftime(x,'%W'))) | |
| #add proxy for different colours |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # create a df with date from 1st to last day of year from min_year | |
| dateList = pd.DataFrame(list(date_generator(dt.datetime(year,1,1,0,0,0),dt.datetime(year,12,31,0,0,0))),columns=['Date']) | |
| dateList.Date = dateList.Date.astype('O') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def date_generator(from_date,to_date): | |
| while from_date<=to_date: | |
| yield from_date | |
| from_date = from_date + dt.timedelta(days=1) | |
| # create a new df with 2 columns | |
| # col1 : Title, col2: DateRead | |
| Title = [] | |
| Date = [] | |
| for index,row in booksv2.iterrows(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import datetime as dt | |
| booksv2 = pd.read_csv('book1.csv') | |
| booksv2['Start'] = booksv2['Start'].apply(lambda x: dt.datetime.strptime(str(x),'%d/%m/%Y')) | |
| booksv2['End'] = booksv2['End'].apply(lambda x: dt.datetime.strptime(str(x),'%d/%m/%Y')) | |
| min_date = min(list(booksv2['Start'])+list(booksv2['End'])) | |
| max_date = max(list(booksv2['Start'])+list(booksv2['End'])) | |
| #year = min_date.year |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import plotly | |
| import plotly.plotly as py | |
| fig = genSankey(df,cat_cols=['lvl1','lvl2','lvl3','lvl4'],value_cols='count',title='Word Etymology') | |
| plotly.offline.plot(fig, validate=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def genSankey(df,cat_cols=[],value_cols='',title='Sankey Diagram'): | |
| # maximum of 6 value cols -> 6 colors | |
| colorPalette = ['#4B8BBE','#306998','#FFE873','#FFD43B','#646464'] | |
| labelList = [] | |
| colorNumList = [] | |
| for catCol in cat_cols: | |
| labelListTemp = list(set(df[catCol].values)) | |
| colorNumList.append(len(labelListTemp)) | |
| labelList = labelList + labelListTemp | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| app.layout = html.Div([ | |
| html.Div(dcc.Graph(id='Graph',figure=fig)), | |
| html.Div(className='row', children=[ | |
| html.Div([html.H2('Overall Data'), | |
| html.P('Num of nodes: ' + str(len(G.nodes))), | |
| html.P('Num of edges: ' + str(len(G.edges)))], | |
| className='three columns'), | |
| html.Div([ | |
| html.H2('Selected Data'), | |
| html.Div(id='selected-data'), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fig = go.Figure(data=[edge_trace, node_trace], | |
| layout=go.Layout( | |
| title='<br>Network Graph of '+str(num_nodes)+' rules', | |
| titlefont=dict(size=16), | |
| showlegend=False, | |
| hovermode='closest', | |
| margin=dict(b=20,l=5,r=5,t=40), | |
| annotations=[ dict( | |
| showarrow=False, | |
| xref="paper", yref="paper", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| top_words = [] | |
| #loop to find top 5 words of each class in the dataset | |
| for code in vect_data.index: | |
| top_words.append([code,find_top_words(code,5)]) | |
| #print the list of top words | |
| top_words |
NewerOlder