|
#!/usr/bin/env python |
|
# -*- coding: utf-8 -*- |
|
|
|
################### |
|
import scraperwiki |
|
import requests |
|
import xmltodict |
|
import json |
|
import time |
|
|
|
graph_query_root = "https://graph.facebook.com/fql" |
|
graph_attr = ['share_count', 'like_count', 'comment_count'] |
|
|
|
|
|
################### |
|
def query_graph_api(url): # query the Graph API, return data. |
|
result = {} |
|
graph_query = graph_query_root + '?q=SELECT ' + ','.join(graph_attr) + ' FROM link_stat WHERE url = "' + url + '"' |
|
print graph_query # debug console |
|
query_data = requests.get(graph_query) |
|
query_json = json.loads(query_data.text) |
|
for item in graph_attr: |
|
result[item] = query_json['data'][0][item] |
|
time.sleep(2) |
|
return result |
|
|
|
def query_twitter(url): |
|
twitter_query = "http://urls.api.twitter.com/1/urls/count.json?url=%s" % url |
|
print twitter_query |
|
query_data = requests.get(twitter_query) |
|
query_json = json.loads(query_data.text) |
|
return query_json['count'] |
|
|
|
def query_pinterest(url): |
|
pinterest_query = "http://widgets.pinterest.com/v1/urls/count.json?url=%s" % url |
|
print pinterest_query |
|
query_data = requests.get(pinterest_query) |
|
query_data_trim = query_data.text[13:-1] |
|
query_json = json.loads(query_data_trim) |
|
return query_json['count'] |
|
|
|
|
|
|
|
################### CREATE DICTIONARY FROM SITEMAP |
|
sitemapURL = 'http://www.recipegirl.com/sitemap.xml' |
|
sitemap_raw = requests.get(sitemapURL) |
|
sitemap_dict = xmltodict.parse(sitemap_raw.text) |
|
|
|
|
|
|
|
################### COLLECT DATA FROM SITEMAP |
|
for page in sitemap_dict['urlset']['url']: |
|
pages = {} |
|
pages['url'] = page['loc'] |
|
|
|
# Get Facebook Graph data |
|
graph_data = query_graph_api(pages['url']) |
|
for item in graph_attr: |
|
pages[item] = graph_data[item] |
|
|
|
# Get Twitter data |
|
pages['tweets'] = query_twitter(pages['url']) |
|
# Get Pinterest data |
|
pages['pins'] = query_pinterest(pages['url']) |
|
# Get LinkedIn data |
|
# Get Delicious data |
|
# Get StumbleUpon data |
|
# Get Reddit data |
|
|
|
# Commit data |
|
scraperwiki.sqlite.save(unique_keys=['url'], data=pages) |