Corey Hermanson coreyhermanson

## bp_twitterSentiment.py
#!/usr/bin/env python

"""
This script returns documents from the BrightPlanet REST API. Input is a text file with a list of queries.
Output is a CSV file with your desired fields for each document. Default time period is everything until present.
Requires 'requests' module. To install via cmd, enter: python -m pip install requests
"""

import requests
import csv

## gist:85defceac4e5cd6548aef7e32ed89584
import requests
import csv

input_file = r'YOUR_FULL_FILEPATH_HERE'
var_scheduled = "RECURRING"
var_initial_delay = 1.0  # float
var_time_between_scheduled_events = 12.0  # float
var_max_depth = 1
var_depth_external = 0
var_max_docsize = -1

## normalize_company.py
#!/usr/bin/env python

import pyperclip
import re
from list_clipboard_manipulations import list_to_clipboard

delete_counter = 0
good_list = list()
sort_alpha = False

## deepweb_examples.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                coreyhermanson
                / deepweb_examples.md
            
            
              Created
              April 4, 2017 17:47
            
              
                BrightPlanet Harvest API: Deep Web Project Examples
              
          
    BrightPlanet Harvest API: Deep Web harvest examples

One-Time and Scheduled harvest examples


Unscheduled: Deep Web harvest will execute immediately (no delay parameter), and run once (scheduleType="ONCE" and no interval parameter)

 {
  "id": "string",
  "harvestEventType": "DEEP",

  
## bp_twitterharvest
import requests

infile = r'C:\Users\Account\PythonFiles\generic_infile.txt'  # full path to any file inside quotes

# Harvest Event Variables
api_key = "123abc"  # STRING - 1 API key per Harvest API schema
searchable_items_per_event = 100  # INT - max queries OR max screenNames
name_of_event = "NewYork_Politics"  # STRING - Program will pre-pend "TW_" and add "_#" to the end
filterQuery = None  # STRING - ex: "nuclear AND (war OR energy)"
event_tags = ["source_Politics", "New York"]  # LIST

## tableau_codebook.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              3 stars
            
          
                coreyhermanson
                / tableau_codebook.md
            
            
              Last active
              July 23, 2023 04:49
            
              
                Tableau CodeBook
              
          
    Tableau CodeBook

Functional


Sorting segments within a stacked bar chart: http://kb.tableau.com/articles/howto/sorting-segments-within-stacked-bars-by-value
Pareto 2-part area chart: http://www.vizwiz.com/2016/08/tableau-tip-tuesday-how-to-create-two.html
Doughnut charts: http://www.evolytics.com/blog/tableau-201-how-to-make-donut-charts/
Hex maps: http://sirvizalot.blogspot.com/2015/11/hex-tile-maps-in-tableau.html
Tile maps: http://www.bfongdata.com/2015/11/periodic-table-map.html
Small multiple-tile maps: http://sirvizalot.blogspot.com/2016/05/how-to-small-multiple-tile-map-in.html
Date Functions:


## python_codebook.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              2 stars
            
          
                coreyhermanson
                / python_codebook.md
            
            
              Last active
              June 9, 2021 12:06
            
              
                Python CodeBook
              
          
    Python CodeBook

Best Practices


7 Tips for writing better Python: https://www.youtube.com/watch?v=VBokjWj_cEA

TIMESTAMPS
enumerate()
zip()
Swapping values
Checking key in dictionary
for ... else


## brightplanet_harvestAPI_examples.JSON
EXAMPLE JSON PAYLOADS FOR BRIGHTPLANET HARVEST API
=================================================
1. Website harvest - scraping search results pages
2. Website harvest - harvesting a list of URLs, includes Xpath overwrite and Date-finding Xpath
3. Website harvest - scheduled harvest to monitor new documents
4. Deep Web harvest - query search engines (USE SPARINGLY - rate limits)
5. Deep Web harvest - query sources from multiple source groups
6. RSS harvest - monitor new documents daily using RSS feeds, includes Xpath overwrite and Date-finding Xpath
7. XPATH expressions - use these xpaths to manipulate which text is harvested from a web page
=================================================

## list_to_clipboard.py
#!/usr/bin/env python

import pyperclip

example_list = ["Line 1", "Line 2", "Line 3", "forever and ever"]

def list_to_clipboard(output_list):
    """ Check if len(list) > 0, then copy to clipboard """
    if len(output_list) > 0:
        pyperclip.copy('\n'.join(output_list))

## regex_in_list.py
#!/usr/bin/env python

import re


input_file = 'infile.txt'  # enter full file path, precede string with 'r' (r'PATH') if using Windows
output_file = 'outfile.txt'  # enter full file path, precede string with 'r' (r'PATH') if using Windows
delete_counter = 0

# list of individual regex, which will be combined into a single regex in the next step
	#!/usr/bin/env python

	"""
	This script returns documents from the BrightPlanet REST API. Input is a text file with a list of queries.
	Output is a CSV file with your desired fields for each document. Default time period is everything until present.
	Requires 'requests' module. To install via cmd, enter: python -m pip install requests
	"""

	import requests
	import csv
	import requests
	import csv

	input_file = r'YOUR_FULL_FILEPATH_HERE'
	var_scheduled = "RECURRING"
	var_initial_delay = 1.0 # float
	var_time_between_scheduled_events = 12.0 # float
	var_max_depth = 1
	var_depth_external = 0
	var_max_docsize = -1
	#!/usr/bin/env python

	import pyperclip
	import re
	from list_clipboard_manipulations import list_to_clipboard

	delete_counter = 0
	good_list = list()
	sort_alpha = False
	import requests

	infile = r'C:\Users\Account\PythonFiles\generic_infile.txt' # full path to any file inside quotes

	# Harvest Event Variables
	api_key = "123abc" # STRING - 1 API key per Harvest API schema
	searchable_items_per_event = 100 # INT - max queries OR max screenNames
	name_of_event = "NewYork_Politics" # STRING - Program will pre-pend "TW_" and add "_#" to the end
	filterQuery = None # STRING - ex: "nuclear AND (war OR energy)"
	event_tags = ["source_Politics", "New York"] # LIST
	EXAMPLE JSON PAYLOADS FOR BRIGHTPLANET HARVEST API
	=================================================
	1. Website harvest - scraping search results pages
	2. Website harvest - harvesting a list of URLs, includes Xpath overwrite and Date-finding Xpath
	3. Website harvest - scheduled harvest to monitor new documents
	4. Deep Web harvest - query search engines (USE SPARINGLY - rate limits)
	5. Deep Web harvest - query sources from multiple source groups
	6. RSS harvest - monitor new documents daily using RSS feeds, includes Xpath overwrite and Date-finding Xpath
	7. XPATH expressions - use these xpaths to manipulate which text is harvested from a web page
	=================================================
	#!/usr/bin/env python

	import pyperclip

	example_list = ["Line 1", "Line 2", "Line 3", "forever and ever"]

	def list_to_clipboard(output_list):
	""" Check if len(list) > 0, then copy to clipboard """
	if len(output_list) > 0:
	pyperclip.copy('\n'.join(output_list))
	#!/usr/bin/env python

	import re


	input_file = 'infile.txt' # enter full file path, precede string with 'r' (r'PATH') if using Windows
	output_file = 'outfile.txt' # enter full file path, precede string with 'r' (r'PATH') if using Windows
	delete_counter = 0

	# list of individual regex, which will be combined into a single regex in the next step