RenanMsV/get_goo_gl_analytics.py

## get_goo_gl_analytics.py
'''
  How to:

    This wont work after Mar 30. 2019 as Google will shutdown goo.gl
    Download your list of shortened urls here https://goo.gl/# click on Download CSV
    Save/Rename this file as file.csv.

    Then put this script in the same folder as the file.csv.
    Then change your API_KEY in this script. You can get your key at https://developers.google.com/url-shortener/v1/getting_started

    Then just run it.
    It will output the analytics data as json files in the output folder.

    Tested with ~100 urls. Google can block you if you're trying to download too many data at once.
    If so, try to change the script to do a pause after some queries (pause after 200 downloads and start again later? ).

'''


import urllib.request
import re
import json
import os

apikey = "API_KEY" # place your google urlshortener api key here
requestUrl = "https://www.googleapis.com/urlshortener/v1/url?shortUrl=http://goo.gl/{}&projection=FULL&key={}"

if not os.path.exists('output'):
    os.makedirs('output')

def urlCharReplacer(url):
    url = (url.split(',')[1])
    url = urllib.parse.unquote(urllib.parse.unquote(url))
    for let in url:
        url = url.strip()
        if not (let.isalpha() or let.isnumeric()):
            url = url.replace(let, '_')
        url = re.sub(r'_{2,}', '_', url)
        url = re.sub(r'_$', '_', url)
    if (url[-1] == '_'): return url[:-1]
    return url


with open('file.csv', 'r') as inputfile:
    content = inputfile.readlines()[1:] # not the first one
    for line in content:
        url=(line.split(',')[0].split('/')[-1])
        fileName = urlCharReplacer(line)
        print('Requesting ...', url, fileName)
        with urllib.request.urlopen(requestUrl.format(url,apikey)) as response:
            html = response.read()
            with open('output/{}_{}.json'.format(url, fileName), 'w+') as outputfile:
                outputfile.write(json.dumps(json.loads(html.decode("utf-8")), indent=4))
                print('Saved {}'.format('{}_{}.json'.format(url, fileName)))
	'''
	How to:

	This wont work after Mar 30. 2019 as Google will shutdown goo.gl
	Download your list of shortened urls here https://goo.gl/# click on Download CSV
	Save/Rename this file as file.csv.

	Then put this script in the same folder as the file.csv.
	Then change your API_KEY in this script. You can get your key at https://developers.google.com/url-shortener/v1/getting_started

	Then just run it.
	It will output the analytics data as json files in the output folder.

	Tested with ~100 urls. Google can block you if you're trying to download too many data at once.
	If so, try to change the script to do a pause after some queries (pause after 200 downloads and start again later? ).

	'''


	import urllib.request
	import re
	import json
	import os

	apikey = "API_KEY" # place your google urlshortener api key here
	requestUrl = "https://www.googleapis.com/urlshortener/v1/url?shortUrl=http://goo.gl/{}&projection=FULL&key={}"

	if not os.path.exists('output'):
	os.makedirs('output')

	def urlCharReplacer(url):
	url = (url.split(',')[1])
	url = urllib.parse.unquote(urllib.parse.unquote(url))
	for let in url:
	url = url.strip()
	if not (let.isalpha() or let.isnumeric()):
	url = url.replace(let, '_')
	url = re.sub(r'_{2,}', '_', url)
	url = re.sub(r'_$', '_', url)
	if (url[-1] == '_'): return url[:-1]
	return url


	with open('file.csv', 'r') as inputfile:
	content = inputfile.readlines()[1:] # not the first one
	for line in content:
	url=(line.split(',')[0].split('/')[-1])
	fileName = urlCharReplacer(line)
	print('Requesting ...', url, fileName)
	with urllib.request.urlopen(requestUrl.format(url,apikey)) as response:
	html = response.read()
	with open('output/{}_{}.json'.format(url, fileName), 'w+') as outputfile:
	outputfile.write(json.dumps(json.loads(html.decode("utf-8")), indent=4))
	print('Saved {}'.format('{}_{}.json'.format(url, fileName)))
No results found