mmatkinson

## github_resources.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                mmatkinson
                / github_resources.md
            
            
              Last active
              November 16, 2018 22:00
            
              
                github resources
              
          
    Interactive Tutorials

https://learngitbranching.js.org/

End-To-end Tutorial

(Pycon 2016 - 3 hrs) : https://www.youtube.com/watch?v=RrdECLvHW6g

Command line tutorial

https://www.youtube.com/watch?v=HVsySz-h9r4


## postgres_queries_and_commands.sql
-- show running queries (pre 9.2)
SELECT procpid, age(query_start, clock_timestamp()), usename, current_query
FROM pg_stat_activity
WHERE current_query != '<IDLE>' AND current_query NOT ILIKE '%pg_stat_activity%'
ORDER BY query_start desc;

-- show running queries (9.2)
SELECT pid, age(query_start, clock_timestamp()), usename, query
FROM pg_stat_activity
WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%'

## converting-jupyter-notebooks-to-various-formats
**Convert .ipynb to Slides**
cd "test"
ipython nbconvert "test.ipynb" --to slides --reveal-prefix "http://cdn.jsdelivr.net/reveal.js/2.6.2" --post serve --config slides_config.py

* To print slides add ?print-pdf at the end of the URL and print

**Convert .ipynb to LaTex/PDF**
ipython nbconvert MyFirstNotebook.ipynb --to latex --post PDF

**Convert .ipynb to HTML**

## geopandas_tour.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                mmatkinson
                / geopandas_tour.ipynb
            
            
              Created
              January 16, 2018 17:41
                — forked from ocefpaf/geopandas_tour.ipynb
            
              
                explore shapefile
              
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## dplyr_to_pandas.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                mmatkinson
                / dplyr_to_pandas.ipynb
            
            
              Created
              October 31, 2017 19:22
                — forked from wlattner/dplyr_to_pandas.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## google_spreadsheets_create_update_example.py
"""Google spreadsheet related.
Packages required: oauth2client, google-api-python-client
* https://gist.github.com/miohtama/f988a5a83a301dd27469
"""

from oauth2client.service_account import ServiceAccountCredentials
from apiclient import discovery


def get_credentials(scopes: list) -> ServiceAccountCredentials:

## df_to_ddl.py
def df_to_ddl(df, tablename='test.mytable'):
    data_dtypes = df.dtypes.reset_index().rename(columns = {'index':'colname',0:'datatype'})

    # Map pandas datatypes into SQL
    data_dtypes['sql_dtype'] = data_dtypes.datatype.astype(str).map(
                            {'object':'varchar(24)',
                             'float64':'float',
                             'int64':'int',
                             'bool':'boolean'}   )

## table_comparison.py
import pandas as pd

def df_diff(index_cols, data1, data2, lsuffix='_1'):
  """
  usage:
  comparisondf=  df_diff( ['unique_id','date'],  current_df, new_df, lsuffix='_curr')

  retuns:
  single dataframe with index_cols on the index, as well as all other variables stacked on the index, and the
  values in each dataframe along the columns.

## lda_vec.py
# For gensim
from itertools import groupby
import gensim


class VectorizedCorpus(object):
    """
    Helper Class for using Sklearn Vectorizers with gensim's LDA model
    handles transformations between gensim corpus / bow representations and sklearn matrix


## useful_pandas_snippets.py
#List unique values in a DataFrame column
pd.unique(df.column_name.ravel())

#Convert Series datatype to numeric, getting rid of any non-numeric values
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)

#Grab DataFrame rows where column has certain values
valuelist = ['value1', 'value2', 'value3']
df = df[df.column.isin(value_list)]
	-- show running queries (pre 9.2)
	SELECT procpid, age(query_start, clock_timestamp()), usename, current_query
	FROM pg_stat_activity
	WHERE current_query != '<IDLE>' AND current_query NOT ILIKE '%pg_stat_activity%'
	ORDER BY query_start desc;

	-- show running queries (9.2)
	SELECT pid, age(query_start, clock_timestamp()), usename, query
	FROM pg_stat_activity
	WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%'
	Convert .ipynb to Slides
	cd "test"
	ipython nbconvert "test.ipynb" --to slides --reveal-prefix "http://cdn.jsdelivr.net/reveal.js/2.6.2" --post serve --config slides_config.py

	* To print slides add ?print-pdf at the end of the URL and print

	Convert .ipynb to LaTex/PDF
	ipython nbconvert MyFirstNotebook.ipynb --to latex --post PDF

	Convert .ipynb to HTML
	"""Google spreadsheet related.
	Packages required: oauth2client, google-api-python-client
	* https://gist.github.com/miohtama/f988a5a83a301dd27469
	"""

	from oauth2client.service_account import ServiceAccountCredentials
	from apiclient import discovery


	def get_credentials(scopes: list) -> ServiceAccountCredentials:
	def df_to_ddl(df, tablename='test.mytable'):
	data_dtypes = df.dtypes.reset_index().rename(columns = {'index':'colname',0:'datatype'})

	# Map pandas datatypes into SQL
	data_dtypes['sql_dtype'] = data_dtypes.datatype.astype(str).map(
	{'object':'varchar(24)',
	'float64':'float',
	'int64':'int',
	'bool':'boolean'} )
	import pandas as pd

	def df_diff(index_cols, data1, data2, lsuffix='_1'):
	"""
	usage:
	comparisondf= df_diff( ['unique_id','date'], current_df, new_df, lsuffix='_curr')

	retuns:
	single dataframe with index_cols on the index, as well as all other variables stacked on the index, and the
	values in each dataframe along the columns.
	# For gensim
	from itertools import groupby
	import gensim


	class VectorizedCorpus(object):
	"""
	Helper Class for using Sklearn Vectorizers with gensim's LDA model
	handles transformations between gensim corpus / bow representations and sklearn matrix
	#List unique values in a DataFrame column
	pd.unique(df.column_name.ravel())

	#Convert Series datatype to numeric, getting rid of any non-numeric values
	df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True)

	#Grab DataFrame rows where column has certain values
	valuelist = ['value1', 'value2', 'value3']
	df = df[df.column.isin(value_list)]