Skip to content

Instantly share code, notes, and snippets.

@z4r
Created April 18, 2012 10:47
Show Gist options
  • Select an option

  • Save z4r/2412762 to your computer and use it in GitHub Desktop.

Select an option

Save z4r/2412762 to your computer and use it in GitHub Desktop.
Cache for urllib2 [Redis, FileSystem]
import hashlib
import StringIO
import httplib
import urllib2
import os
import errno
import time
from threading import RLock
cache_lock = RLock()
def locked(origfunc):
def wrapped(*args, **kwargs):
cache_lock.acquire()
try:
return origfunc(*args, **kwargs)
finally:
cache_lock.release()
return wrapped
C_HEADER = 'X-Local-Cache'
EXTENSIONS = ('body', 'headers', 'code', 'msg')
class CacheHandler(urllib2.BaseHandler):
def __init__(self, client, ttl=21600):
self.client = client
self.ttl = ttl
def default_open(self, request):
if request.get_method() is 'GET':
try:
url = request.get_full_url()
body, headers, code, msg = self.client.get(
self.client.key(url), self.ttl
)
return CachedResponse(
url=url,
body=body,
headers=headers,
code=code,
msg=msg,
)
except Exception:
pass
return None
def http_response(self, request, response):
if request.get_method() is 'GET' and C_HEADER not in response.info():
try:
url = request.get_full_url()
body = response.read()
headers = str(response.info())
code = response.code
msg = response.msg
self.client.set(self.client.key(url), self.ttl, dict(zip(
EXTENSIONS, (body, headers + "%s: %s\r\n" % (
C_HEADER, self.client.key(url)), code, msg
))))
response = CachedResponse(
url=url,
body=body,
headers=headers,
code=code,
msg=msg,
)
except Exception:
pass
return response
class CachedResponse(StringIO.StringIO):
def __init__(self, url, body, headers, code, msg):
StringIO.StringIO.__init__(self, body)
self.url = url
self.code = int(code)
self.msg = msg
self.headers = httplib.HTTPMessage(StringIO.StringIO(headers))
def info(self):
return self.headers
def geturl(self):
return self.url
class BaseCacheClient(object):
def get(self, key, ttl):
raise NotImplementedError
def set(self, key, ttl, value):
raise NotImplementedError
def key(self, url):
raise NotImplementedError
@staticmethod
def thumb(url):
return hashlib.md5(url).hexdigest()
class RedisCacheClient(BaseCacheClient):
def __init__(self, red):
self.red = red
def get(self, key, ttl):
return self.red.hmget(key, EXTENSIONS)
def set(self, key, ttl, value):
self.red.hmset(key, value)
self.red.expire(key, ttl)
def key(self, url):
return ':'.join(('redcache', self.thumb(url)))
class FileCacheClient(BaseCacheClient):
@locked
def __init__(self, location):
self.location = location
if not os.path.exists(self.location):
try:
os.mkdir(self.location)
except OSError, e:
if e.errno == errno.EEXIST and os.path.isdir(self.location):
pass
@locked
def get(self, key, ttl):
if os.stat(key).st_mtime + ttl > time.time():
ret = []
for filename in EXTENSIONS:
with open(os.path.join(key, filename), 'r') as fp:
ret.append(fp.read())
return ret
@locked
def set(self, key, ttl, value):
if not os.path.exists(key):
try:
os.mkdir(key)
except OSError, e:
if e.errno == errno.EEXIST and os.path.isdir(key):
pass
for filename, content in value.iteritems():
with open(os.path.join(key, filename), 'w') as fp:
fp.write(str(content))
def key(self, url):
return os.path.join(self.location, 'redcache:'+self.thumb(url))
if __name__ == '__main__':
import redis
#opener = urllib2.build_opener(CacheHandler(RedisCacheClient(red=redis.StrictRedis())))
#opener = urllib2.build_opener(CacheHandler(RedisCacheClient(red=redis.StrictRedis(host='mbsql', db=3))))
opener = urllib2.build_opener(CacheHandler(FileCacheClient('/Users/ademarco/tmp/redcache')))
resp = opener.open('http://redis.io/')
print resp.info()
resp = opener.open('http://redis.io/')
print resp.info()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment