Skip to content

Instantly share code, notes, and snippets.

@christinac
Created January 1, 2016 02:51
Show Gist options
  • Select an option

  • Save christinac/ee6e14f256956db07dcb to your computer and use it in GitHub Desktop.

Select an option

Save christinac/ee6e14f256956db07dcb to your computer and use it in GitHub Desktop.
How the book-extraction magic happens before all those books get dropped
#!/usr/bin/python
from datetime import datetime
import json
import re
from xml.sax.saxutils import unescape
import models
from bookkit import Book, BookNotFoundException
from utils.gmailservice import GmailService # modified version of https://developers.google.com/gmail/api/quickstart/python
class ResultSet(object):
"""Takes in a Gmail service (already constructed in the request) and returns a set of potential books for a user"""
service = None
user = None
data = []
potential_books = []
def __init__(self, service, user):
self.service = GmailService(service=service)
self.user = user
super(ResultSet, self).__init__()
def extend(self, data):
self.data.extend(data)
def search_and_extend(self, query, page_token=None):
threads = self.service.search(query, page_token=page_token)
self.extend(threads['threads'])
return threads.get('nextPageToken') or threads.get('pageToken')
def capture_and_extract(self):
self.capture()
self.extract()
def capture(self):
query = 'from:amazon.com ((-kindle subject:shipped) OR (kindle order))'
next_token = self.search_and_extend(query)
while next_token:
next_token = self.search_and_extend(query, page_token=next_token)
return self.data
def extract(self):
self.service.get_messages([x['id'] for x in self.data], self.list_callback)
def list_callback(self, request_id, response, exception):
if exception:
return
message = MessageExtractor(response)
self.potential_books.append(PotentialBook(message.title(), message.date()))
if len(self.potential_books) == len(self.data):
## try to make books for the user in an async task
class MessageExtractor(object):
"""Takes in a gmail message, finds a message date, and looks for a book title"""
message = None
def __init__(self, message):
self.message = message
super(MessageExtractor, self).__init__()
def date(self):
return datetime.fromtimestamp(float(self.message['internalDate'])/1000)
def title(self):
return unescape(self._title())
def _title(self):
if self.message.get('payload'):
for header in self.message['payload'].get('headers'):
if 'Subject' in header['name']:
try:
return re.search('"(.*)"', header['value']).groups()[0]
except AttributeError:
try:
return re.search('Amazon.com order of (.*)', header['value']).groups()[0]
except AttributeError:
try:
return re.search('"(.*)&quot', self.message['snippet']).groups()[0]
except AttributeError:
return '' # Empty string so unescape doesn't break, is caught elsewhere
class PotentialBook(object):
"""A title that's been extracted and now needs to be looked up on Amazon and maybe saved with a user"""
title = None
date = None
def __init__(self, title, date):
self.title = title
self.date = date
super(PotentialBook, self).__init__()
def create(self, user):
if self.title:
try:
## make the book and save it with the user
except BookNotFoundException:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment