Allie .S Ubisse AllieUbisse

## python batch geocoding.py
"""
Python script for batch geocoding of addresses using the Google Geocoding API.
This script allows for massive lists of addresses to be geocoded for free by pausing when the
geocoder hits the free rate limit set by Google (2500 per day).  If you have an API key for paid
geocoding from Google, set it in the API key section.
Addresses for geocoding can be specified in a list of strings "addresses". In this script, addresses
come from a csv file with a column "Address". Adjust the code to your own requirements as needed.
After every 500 successul geocode operations, a temporary file with results is recorded in case of
script failure / loss of connection later.
Addresses and data are held in memory, so this script may need to be adjusted to process files line

## forecasting_metrics.py
import numpy as np

EPSILON = 1e-10


def _error(actual: np.ndarray, predicted: np.ndarray):
    """ Simple error """
    return actual - predicted

## add_policy.py
import boto3

lamba_client = boto3.client('lambda', region_name='REGION_NAME')

lamba_client.add_permission(
    FunctionName='create_lab',
    StatementId='AWSEventsRule',
    Action='lambda:InvokeFunction',
    Principal='events.amazonaws.com',
    SourceArn='arn:aws:events:REGION_NAME:ACCOUNT_NUMBER:rule/*',

## forecasting_metrics.py
import numpy as np

EPSILON = 1e-10


def _error(actual: np.ndarray, predicted: np.ndarray):
    """ Simple error """
    return actual - predicted

## stack-processing.py
#Import All Functions
from pyspark.sql import SQLContext
from pyspark.sql import functions as F
from pyspark.sql import SparkSession
from pyspark.sql.functions import unix_timestamp, to_date, date_format, month, year, dayofyear, dayofweek, col
from pyspark.sql.types import TimestampType
from pyspark.sql import functions as F
from pyspark.sql import SparkSession
from pyspark.sql.functions import unix_timestamp, to_date, date_format, month, year, dayofyear, dayofweek, col
from pyspark.sql.types import TimestampType

## pyspark_help.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                AllieUbisse
                / pyspark_help.md
            
            
              Created
              August 23, 2020 12:13
                — forked from hammadzz/pyspark_help.md
            
              
                PySpark HelpSheet
              
          
    Common Alias Functions

These functions are exactly equivalent


Reference


filter
where
pyspark.sql.DataFrame.filter


drop_duplicates
dropDuplicates
pyspark.sql.DataFrame.drop_duplicates


avg
mean
pyspark.sql.GroupedData.avg


## 1-ETL_TS.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                AllieUbisse
                / 1-ETL_TS.ipynb
            
            
              Created
              August 23, 2020 12:11
                — forked from carlleston/1-ETL_TS.ipynb
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## ec2run.sh
#!/bin/bash
########################################################################################
#                                   START, STOP or STATUS                              #
#                               ----------------------------                           #
#       This scrip is intended to help you start, stop or get the IP address of        #
#       Current running EC2.                                                           #
#       This will require you to 1st configure your AWC-CLI, namualy to ensure safety  #
#                                                                                      #
#       Please read the code to ensure that It does not cause any security issues      #
#                                                                                      #

## Spark Dataframe Cheat Sheet.py
# A simple cheat sheet of Spark Dataframe syntax
# Current for Spark 1.6.1

# import statements
from pyspark.sql import SQLContext
from pyspark.sql.types import *
from pyspark.sql.functions import *

#creating dataframes
df = sqlContext.createDataFrame([(1, 4), (2, 5), (3, 6)], ["A", "B"]) # from manual data

## docker_installer.sh
#!/bin/bash


##################################################################################
#     ----------------------------------------------------------------
#     THIS SCRIPT WILL HELP YOUR AUTOMATE THE DOCKER INSTALATION STEPS
#     ----------------------------------------------------------------
# Test was ran on aws ec2 instance.
#
# AUTHOR:
	"""
	Python script for batch geocoding of addresses using the Google Geocoding API.
	This script allows for massive lists of addresses to be geocoded for free by pausing when the
	geocoder hits the free rate limit set by Google (2500 per day). If you have an API key for paid
	geocoding from Google, set it in the API key section.
	Addresses for geocoding can be specified in a list of strings "addresses". In this script, addresses
	come from a csv file with a column "Address". Adjust the code to your own requirements as needed.
	After every 500 successul geocode operations, a temporary file with results is recorded in case of
	script failure / loss of connection later.
	Addresses and data are held in memory, so this script may need to be adjusted to process files line
	import numpy as np

	EPSILON = 1e-10


	def _error(actual: np.ndarray, predicted: np.ndarray):
	""" Simple error """
	return actual - predicted
	import boto3

	lamba_client = boto3.client('lambda', region_name='REGION_NAME')

	lamba_client.add_permission(
	FunctionName='create_lab',
	StatementId='AWSEventsRule',
	Action='lambda:InvokeFunction',
	Principal='events.amazonaws.com',
	SourceArn='arn:aws:events:REGION_NAME:ACCOUNT_NUMBER:rule/*',
	#Import All Functions
	from pyspark.sql import SQLContext
	from pyspark.sql import functions as F
	from pyspark.sql import SparkSession
	from pyspark.sql.functions import unix_timestamp, to_date, date_format, month, year, dayofyear, dayofweek, col
	from pyspark.sql.types import TimestampType
	from pyspark.sql import functions as F
	from pyspark.sql import SparkSession
	from pyspark.sql.functions import unix_timestamp, to_date, date_format, month, year, dayofyear, dayofweek, col
	from pyspark.sql.types import TimestampType
		Reference
filter	where	pyspark.sql.DataFrame.filter
drop_duplicates	dropDuplicates	pyspark.sql.DataFrame.drop_duplicates
avg	mean	pyspark.sql.GroupedData.avg
	#!/bin/bash
	########################################################################################
	# START, STOP or STATUS #
	# ---------------------------- #
	# This scrip is intended to help you start, stop or get the IP address of #
	# Current running EC2. #
	# This will require you to 1st configure your AWC-CLI, namualy to ensure safety #
	# #
	# Please read the code to ensure that It does not cause any security issues #
	# #
	# A simple cheat sheet of Spark Dataframe syntax
	# Current for Spark 1.6.1

	# import statements
	from pyspark.sql import SQLContext
	from pyspark.sql.types import *
	from pyspark.sql.functions import *

	#creating dataframes
	df = sqlContext.createDataFrame([(1, 4), (2, 5), (3, 6)], ["A", "B"]) # from manual data
	#!/bin/bash


	##################################################################################
	# ----------------------------------------------------------------
	# THIS SCRIPT WILL HELP YOUR AUTOMATE THE DOCKER INSTALATION STEPS
	# ----------------------------------------------------------------
	# Test was ran on aws ec2 instance.
	#
	# AUTHOR: