Created
April 18, 2012 10:47
-
-
Save z4r/2412762 to your computer and use it in GitHub Desktop.
Cache for urllib2 [Redis, FileSystem]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import hashlib | |
| import StringIO | |
| import httplib | |
| import urllib2 | |
| import os | |
| import errno | |
| import time | |
| from threading import RLock | |
| cache_lock = RLock() | |
| def locked(origfunc): | |
| def wrapped(*args, **kwargs): | |
| cache_lock.acquire() | |
| try: | |
| return origfunc(*args, **kwargs) | |
| finally: | |
| cache_lock.release() | |
| return wrapped | |
| C_HEADER = 'X-Local-Cache' | |
| EXTENSIONS = ('body', 'headers', 'code', 'msg') | |
| class CacheHandler(urllib2.BaseHandler): | |
| def __init__(self, client, ttl=21600): | |
| self.client = client | |
| self.ttl = ttl | |
| def default_open(self, request): | |
| if request.get_method() is 'GET': | |
| try: | |
| url = request.get_full_url() | |
| body, headers, code, msg = self.client.get( | |
| self.client.key(url), self.ttl | |
| ) | |
| return CachedResponse( | |
| url=url, | |
| body=body, | |
| headers=headers, | |
| code=code, | |
| msg=msg, | |
| ) | |
| except Exception: | |
| pass | |
| return None | |
| def http_response(self, request, response): | |
| if request.get_method() is 'GET' and C_HEADER not in response.info(): | |
| try: | |
| url = request.get_full_url() | |
| body = response.read() | |
| headers = str(response.info()) | |
| code = response.code | |
| msg = response.msg | |
| self.client.set(self.client.key(url), self.ttl, dict(zip( | |
| EXTENSIONS, (body, headers + "%s: %s\r\n" % ( | |
| C_HEADER, self.client.key(url)), code, msg | |
| )))) | |
| response = CachedResponse( | |
| url=url, | |
| body=body, | |
| headers=headers, | |
| code=code, | |
| msg=msg, | |
| ) | |
| except Exception: | |
| pass | |
| return response | |
| class CachedResponse(StringIO.StringIO): | |
| def __init__(self, url, body, headers, code, msg): | |
| StringIO.StringIO.__init__(self, body) | |
| self.url = url | |
| self.code = int(code) | |
| self.msg = msg | |
| self.headers = httplib.HTTPMessage(StringIO.StringIO(headers)) | |
| def info(self): | |
| return self.headers | |
| def geturl(self): | |
| return self.url | |
| class BaseCacheClient(object): | |
| def get(self, key, ttl): | |
| raise NotImplementedError | |
| def set(self, key, ttl, value): | |
| raise NotImplementedError | |
| def key(self, url): | |
| raise NotImplementedError | |
| @staticmethod | |
| def thumb(url): | |
| return hashlib.md5(url).hexdigest() | |
| class RedisCacheClient(BaseCacheClient): | |
| def __init__(self, red): | |
| self.red = red | |
| def get(self, key, ttl): | |
| return self.red.hmget(key, EXTENSIONS) | |
| def set(self, key, ttl, value): | |
| self.red.hmset(key, value) | |
| self.red.expire(key, ttl) | |
| def key(self, url): | |
| return ':'.join(('redcache', self.thumb(url))) | |
| class FileCacheClient(BaseCacheClient): | |
| @locked | |
| def __init__(self, location): | |
| self.location = location | |
| if not os.path.exists(self.location): | |
| try: | |
| os.mkdir(self.location) | |
| except OSError, e: | |
| if e.errno == errno.EEXIST and os.path.isdir(self.location): | |
| pass | |
| @locked | |
| def get(self, key, ttl): | |
| if os.stat(key).st_mtime + ttl > time.time(): | |
| ret = [] | |
| for filename in EXTENSIONS: | |
| with open(os.path.join(key, filename), 'r') as fp: | |
| ret.append(fp.read()) | |
| return ret | |
| @locked | |
| def set(self, key, ttl, value): | |
| if not os.path.exists(key): | |
| try: | |
| os.mkdir(key) | |
| except OSError, e: | |
| if e.errno == errno.EEXIST and os.path.isdir(key): | |
| pass | |
| for filename, content in value.iteritems(): | |
| with open(os.path.join(key, filename), 'w') as fp: | |
| fp.write(str(content)) | |
| def key(self, url): | |
| return os.path.join(self.location, 'redcache:'+self.thumb(url)) | |
| if __name__ == '__main__': | |
| import redis | |
| #opener = urllib2.build_opener(CacheHandler(RedisCacheClient(red=redis.StrictRedis()))) | |
| #opener = urllib2.build_opener(CacheHandler(RedisCacheClient(red=redis.StrictRedis(host='mbsql', db=3)))) | |
| opener = urllib2.build_opener(CacheHandler(FileCacheClient('/Users/ademarco/tmp/redcache'))) | |
| resp = opener.open('http://redis.io/') | |
| print resp.info() | |
| resp = opener.open('http://redis.io/') | |
| print resp.info() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment