-
-
Save akhundMurad/47c66402c43005e9dec461050665a68b to your computer and use it in GitHub Desktop.
Redis-backed LRU cache decorator in Python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| __author__ = 'Kevin Warrick' | |
| __email__ = 'kwarrick@uga.edu, abulka@gmail.com' | |
| __version__ = '1.6.3' | |
| import pickle | |
| from collections import namedtuple | |
| from functools import wraps | |
| from icecream import ic | |
| ALLOW_NON_REDIS_CACHING = False | |
| _CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) | |
| _CacheInfoVerbose = namedtuple("CacheInfoVerbose", ["hits", "misses", "maxsize", "currsize", "paramsignatures"]) | |
| def redis_lru(maxsize=None, slice=slice(None), conn=None): | |
| """ | |
| Simple Redis-based LRU cache decorator *. | |
| *conn* Redis connection | |
| *maxsize* maximum number of entries in LRU cache | |
| *slice* slice object for restricting prototype args | |
| Original blog post | |
| https://blog.warrick.io/2012/12/09/redis-lru-cache-decorator-in-python.html | |
| Usage is as simple as prepending the decorator to a function, | |
| passing a Redis connection object, and the desired capacity | |
| of your cache. | |
| @redis_lru(maxsize=10000) | |
| def func(foo, bar): | |
| # some expensive operation | |
| return baz | |
| func.init(redis.StrictRedis()) | |
| Uses 4 Redis keys, all suffixed with the function name: | |
| lru:keys: - sorted set, stores hash keys | |
| lru:vals: - hash, stores function output values | |
| lru:hits: - string, stores hit counter | |
| lru:miss: - string, stores miss counter | |
| * Functions prototypes must be serializable equivalent! | |
| Python 3 port and enhancements by Andy Bulka, abulka@gmail.com, June 2021 | |
| ------------------------------------------------------------------------- | |
| - Python 3 compatibility | |
| - redis-py 3.0 compatibile | |
| - Made some things more like Python 3 functools.lru_cache | |
| - renamed .clear() to .cache_clear() | |
| - renamed .info() to .cache_info() | |
| - .cache_info() now returns namedtuple object like Python 3 functools.lru_cache does | |
| - renamed redis_lru 'capacity' parameter to 'maxsize', allow it to be None | |
| - Enable passing in `conn` via the decorator | |
| - Added version number to source code | |
| - Raise exception if redis_lru function has no redis connection | |
| Enhanced cache_clear() behaviour: | |
| - cache_clear() behaviour has been enhanced so that if you pass parameters the cache is | |
| cleared partially - only for that particular call signature. This allows for more | |
| granular cache clearing. | |
| See https://stackoverflow.com/questions/56413413/lru-cache-is-it-possible-to-clear-only-a-specific-call-from-the-cache | |
| Warning: Please use enhanced cache_clear() behaviour with great care since whilst | |
| e.g. `f(1)` and `f(param=1)` mean the same, the lru caching system will cache those | |
| two calls as separate entries. Then when you invalidate one style of call with | |
| `f.cache_clear(1)` this leaves the other style of call `f(param=1)` still cached and | |
| returning stale values - even though semantically these two styles of call are the | |
| same. So if you do use cache_clear granularly, make sure you call it repeatedly for | |
| all possible parameter signatures that you might have used e.g. `f.cache_clear(1)`; | |
| `f.cache_clear(param=1)`. | |
| Enhanced verbose flag for cache_info(): | |
| You may now pass 'verbose' to cache_info e.g. `cache_info(verbose=True)` which returns | |
| a namedtuple with one additional member "paramsignatures" | |
| e.g. `["hits", "misses", "maxsize", "currsize", "paramsignatures"]`. | |
| The additional item "paramsignatures" in the tuple is a list of all the active parameter | |
| signatures being cached for this particular function. | |
| Note: Invalidating a particular parameter signature using the enhanced cache_clear(...) | |
| with those parameters will delete that parameter signature from this list of tuples. | |
| E.g. If you called f.cache_info(verbose=True) and got "paramsignatures" as two signatures | |
| [(1, 2, 'A'), (1, 2, ('baz', 'A'))] then calling f.cache_clear(1, 2, 'A') then calling | |
| f.cache_info(verbose=True) you will see that "paramsignatures" will now be reported | |
| as merely one signature [(1, 2, ('baz', 'A'))] | |
| Tests: | |
| - Added asserts to the tests | |
| - test a second function | |
| - test maxsize of None | |
| - test maxsize of 1 and ensure cache ejection works | |
| - additional tests | |
| Tips: | |
| - Always call `somefunc.init(conn)` with the redis connection otherwise | |
| your function won't cache. Or pass `conn` in via the decorator (new feature in v1.4). | |
| - Call somefunc.cache_clear() at the start of your tests, since cached | |
| results are permanently in redis | |
| Example Usage: | |
| from fncache import redis_lru as lru_cache | |
| from redis_my_module import conn | |
| @lru_cache(maxsize=None, conn=conn) | |
| def getProjectIds(user) -> List[str]: | |
| return 1 | |
| # Later somewhere else | |
| getProjectIds.cache_clear() | |
| """ | |
| if maxsize is None: | |
| maxsize = 5000 | |
| def decorator(func): | |
| cache_keys = "lru:keys:%s" % (func.__name__,) | |
| cache_vals = "lru:vals:%s" % (func.__name__,) | |
| cache_hits = "lru:hits:%s" % (func.__name__,) | |
| cache_miss = "lru:miss:%s" % (func.__name__,) | |
| lvars = [None] # closure mutable | |
| def add(key, value): | |
| eject() | |
| conn = lvars[0] | |
| conn.incr(cache_miss) | |
| conn.hset(cache_vals, key, pickle.dumps(value)) | |
| """ | |
| Python 3, redis-py 3.0 fix | |
| zadd() - Set any number of element-name, score pairs to the key ``name``. Pairs | |
| are specified as a dict of element-names keys to score values. The score values should | |
| be the string representation of a double precision floating point number. | |
| redis-py 3.0 has changed these three commands to all accept a single positional | |
| argument named mapping that is expected to be a dict. For MSET and MSETNX, the | |
| dict is a mapping of key-names -> values. For ZADD, the dict is a mapping of | |
| element-names -> score. https://pypi.org/project/redis/ | |
| """ | |
| # conn.zadd(cache_keys, 0, key) # Original Python 2 | |
| conn.zadd(cache_keys, {key: 0.0}) | |
| return value | |
| def get(key): | |
| conn = lvars[0] | |
| value = conn.hget(cache_vals, key) | |
| if value: | |
| conn.incr(cache_hits) | |
| """ | |
| Python 3, redis-py 3.0 fix | |
| All 2.X users that rely on ZINCRBY must swap the order of amount and value for the | |
| command to continue to work as intended. https://pypi.org/project/redis/ | |
| """ | |
| # conn.zincrby(cache_keys, key, 1.0) # Original Python 2 | |
| conn.zincrby(cache_keys, 1.0, key) | |
| value = pickle.loads(value) | |
| return value | |
| def eject(): | |
| conn = lvars[0] | |
| """ | |
| In python 2.7, the / operator is integer division if inputs are integers. | |
| In python 3 Integer division is achieved by using // | |
| """ | |
| # count = min((maxsize / 10) or 1, 1000) # Original Python 2 | |
| count = min((maxsize // 10) or 1, 1000) | |
| if conn.zcard(cache_keys) >= maxsize: | |
| eject = conn.zrange(cache_keys, 0, count) | |
| conn.zremrangebyrank(cache_keys, 0, count) | |
| conn.hdel(cache_vals, *eject) | |
| @wraps(func) | |
| def wrapper(*args, **kwargs): | |
| conn = lvars[0] | |
| if conn: | |
| items = args + tuple(sorted(kwargs.items())) | |
| key = pickle.dumps(items[slice]) | |
| return get(key) or add(key, func(*args, **kwargs)) | |
| else: | |
| if ALLOW_NON_REDIS_CACHING: | |
| return func(*args, **kwargs) # Original behaviour (deprecated) | |
| else: | |
| raise RuntimeWarning(f"redis_lru - no redis connection has been supplied " | |
| f"for caching calls to '{func.__name__}'") | |
| def cache_info(verbose=False): | |
| conn = lvars[0] | |
| size = int(conn.zcard(cache_keys) or 0) | |
| hits, misses = int(conn.get(cache_hits) or 0), int(conn.get(cache_miss) or 0) | |
| if verbose: | |
| paramsignatures = conn.zrange(cache_keys, 0, 9999) | |
| return _CacheInfoVerbose(hits, misses, maxsize, size, [pickle.loads(sig) for sig in paramsignatures]) | |
| # return hits, misses, capacity, size # Original Python 2 | |
| return _CacheInfo(hits, misses, maxsize, size) | |
| def cache_clear(*args, **kwargs): | |
| conn = lvars[0] | |
| if len(args) == 0 and len(kwargs) == 0: | |
| # no parameters passed, so do the traditional behaviour of invalidating | |
| # the entire cache for this decorated function | |
| conn.delete(cache_keys, cache_vals) | |
| conn.delete(cache_hits, cache_miss) | |
| else: | |
| # invalidate only the entry matching params - very fancy new functionality ;-) | |
| items = args + tuple(sorted(kwargs.items())) | |
| key = pickle.dumps(items[slice]) | |
| conn.hdel(cache_vals, key) # remove cached return value from hash | |
| conn.zrem(cache_keys, key) # remove param score stuff from sorted set | |
| def init(conn): | |
| lvars[0] = conn | |
| if conn: | |
| init(conn) | |
| wrapper.init = init | |
| wrapper.cache_info = cache_info | |
| wrapper.cache_clear = cache_clear | |
| return wrapper | |
| return decorator | |
| if __name__ == "__main__": | |
| import redis | |
| conn = redis.StrictRedis() | |
| num_actual_calls_made = 0 | |
| @redis_lru(maxsize=10) | |
| def test(foo, bar, baz=None): | |
| print('called test.') | |
| global num_actual_calls_made | |
| num_actual_calls_made += 1 | |
| return True | |
| conn = redis.StrictRedis() | |
| test.init(conn) | |
| test.cache_clear() | |
| assert test.cache_info() == (0, 0, 10, 0) | |
| assert num_actual_calls_made == 0 | |
| test(1, 2, baz='A') | |
| assert test.cache_info() == (0, 1, 10, 1) | |
| assert num_actual_calls_made == 1 | |
| test(3, 4, baz='B') | |
| assert test.cache_info() == (0, 2, 10, 2) | |
| assert num_actual_calls_made == 2 | |
| test(1, 2, baz='A') | |
| assert test.cache_info() == (1, 2, 10, 2) | |
| assert num_actual_calls_made == 2 | |
| print("hits %d, misses %d, capacity %d, size %d" % test.cache_info()) | |
| test(1, 2, baz='A') | |
| assert test.cache_info() == (2, 2, 10, 2) | |
| assert num_actual_calls_made == 2 | |
| test(3, 4, baz='B') | |
| assert test.cache_info() == (3, 2, 10, 2) | |
| assert num_actual_calls_made == 2 | |
| test(1, 2, baz='A') | |
| assert test.cache_info() == (4, 2, 10, 2) | |
| assert num_actual_calls_made == 2 | |
| print("hits %d, misses %d, capacity %d, size %d" % test.cache_info()) | |
| # Check _CacheInfo named tuple fields | |
| assert test.cache_info().hits == 4 | |
| assert test.cache_info().misses == 2 | |
| assert test.cache_info().maxsize == 10 | |
| assert test.cache_info().currsize == 2 | |
| test.cache_clear() | |
| assert test.cache_info() == (0, 0, 10, 0) | |
| # Test a second function and also that setting maxsize to None should set maxsize to 5000 | |
| @redis_lru(maxsize=None) | |
| def somefunc(foo, bar, baz=None): | |
| global num_actual_calls_made | |
| num_actual_calls_made += 1 | |
| print('called somefunc.') | |
| return True | |
| somefunc.init(conn) | |
| somefunc.cache_clear() | |
| num_actual_calls_made = 0 | |
| assert somefunc.cache_info() == (0, 0, 5000, 0) | |
| assert num_actual_calls_made == 0 | |
| somefunc(1, 2, baz='A') | |
| assert somefunc.cache_info() == (0, 1, 5000, 1) | |
| assert num_actual_calls_made == 1 | |
| somefunc(1, 2, baz='A') | |
| assert somefunc.cache_info() == (1, 1, 5000, 1) | |
| assert num_actual_calls_made == 1 | |
| print("hits %d, misses %d, capacity %d, size %d" % somefunc.cache_info()) | |
| # Initialise using the decorator 'conn' parameter, which means | |
| # we don't need to make a separate call to somefunc.init(conn) | |
| @redis_lru(maxsize=1, conn=conn) | |
| def anotherfunc(foo, bar, baz=None): | |
| global num_actual_calls_made | |
| num_actual_calls_made += 1 | |
| print('called anotherfunc.') | |
| return True | |
| num_actual_calls_made = 0 | |
| anotherfunc.cache_clear() | |
| assert anotherfunc.cache_info() == (0, 0, 1, 0) | |
| assert num_actual_calls_made == 0 | |
| anotherfunc(1, 2, baz='A') | |
| assert anotherfunc.cache_info() == (0, 1, 1, 1) | |
| assert num_actual_calls_made == 1 | |
| anotherfunc(1, 2, baz='A') | |
| anotherfunc(1, 2, baz='A') | |
| assert anotherfunc.cache_info() == (2, 1, 1, 1) | |
| assert num_actual_calls_made == 1 | |
| # see if ejection works - only room for one cache entry cos maxsize=1 | |
| anotherfunc(1, 2, baz='B') # different params will trigger ejection of old cache entry and miss | |
| assert anotherfunc.cache_info() == (2, 2, 1, 1) | |
| assert num_actual_calls_made == 2 | |
| anotherfunc(1, 2, baz='B') | |
| assert anotherfunc.cache_info() == (3, 2, 1, 1) | |
| assert num_actual_calls_made == 2 | |
| # this call used to be cached but was ejected, so will get a miss, and an ejection of old | |
| anotherfunc(1, 2, baz='A') | |
| assert anotherfunc.cache_info() == (3, 3, 1, 1) | |
| assert num_actual_calls_made == 3 | |
| anotherfunc(1, 2, baz='A') | |
| assert anotherfunc.cache_info() == (4, 3, 1, 1) # hit ok again | |
| assert num_actual_calls_made == 3 | |
| # Try using redis_lru without passing in a redis connection | |
| @redis_lru() | |
| def improperfunc(foo, bar, baz=None): | |
| pass | |
| failed = False | |
| try: | |
| improperfunc(1, 2, baz='A') | |
| except RuntimeWarning: | |
| failed = True | |
| assert failed | |
| """ | |
| Test invalidating a specific call signature via cache_clear() - | |
| Python's built in lru cache_clear does not have this feature but we do. | |
| """ | |
| @redis_lru(maxsize=10, conn=conn) | |
| def smartfunc(foo, bar, baz=None): | |
| return 100 | |
| num_actual_calls_made = 0 | |
| smartfunc.cache_clear() | |
| smartfunc(1, 2, baz='A') | |
| assert smartfunc.cache_info() == (0, 1, 10, 1) | |
| smartfunc.cache_clear(1, 2, baz='A') | |
| assert smartfunc.cache_info() == (0, 1, 10, 0) # note currsize reset to 0 | |
| # this should add back the cached entry for these particular function call params | |
| smartfunc(1, 2, baz='A') | |
| assert smartfunc.cache_info() == (0, 2, 10, 1) | |
| # this should give us a hit again | |
| smartfunc(1, 2, baz='A') | |
| assert smartfunc.cache_info() == (1, 2, 10, 1) | |
| # Test invalidating a specific call signature via cache_clear, | |
| # but the other one should remain cached | |
| smartfunc.cache_clear() | |
| smartfunc(1, 2, baz='A') # call type 1 | |
| smartfunc(1, 2, baz='B') # call type 2 | |
| assert smartfunc.cache_info() == (0, 2, 10, 2) | |
| smartfunc(1, 2, baz='A') | |
| smartfunc(1, 2, baz='B') | |
| assert smartfunc.cache_info() == (2, 2, 10, 2) # two nice hits | |
| smartfunc.cache_clear(1, 2, baz='A') # invalidate one of them | |
| smartfunc(1, 2, baz='B') | |
| assert smartfunc.cache_info() == (3, 2, 10, 1) # still get a hit with the other | |
| # Test invalidating a specific call signature via cache_clear but this uncovers a gotcha to | |
| # be aware of in this granular invalidation concept viz. that because the same semantics can | |
| # be achieved with different call signatures so this kind of individual cache clearing can | |
| # only be used if signatures match exactly | |
| smartfunc.cache_clear() | |
| smartfunc(1, 2, baz='A') # call style #1 | |
| assert smartfunc.cache_info() == (0, 1, 10, 1) | |
| # this next call, even though semantically the same, registers as a miss and creates a new entry | |
| smartfunc(1, 2, 'A') # call style #2 | |
| assert smartfunc.cache_info() == (0, 2, 10, 2) | |
| smartfunc.cache_clear(1, 2, baz='A') # invalidate one parameter signature - call style #1 | |
| assert smartfunc.cache_info() == (0, 2, 10, 1) | |
| # this next call reveals the gotcha edge case to be careful of - a call of style #2 should | |
| # ideally be a miss because we just had an invalidation (albeit signature is different). | |
| # Whilst its semantically invalidated, because the signature is different, it still has its | |
| # own active cached entry which will get a hit and pass back stale/wrong information | |
| smartfunc(1, 2, 'A') | |
| assert smartfunc.cache_info() == (1, 2, 10, 1) # should ideally be a miss (0, 3, 10, 1) | |
| # The correct way to use granular cache_clear which avoids this gotcha | |
| smartfunc.cache_clear() | |
| smartfunc(1, 2, baz='A') # call style #1 | |
| smartfunc(1, 2, 'A') # call style #2 | |
| assert smartfunc.cache_info() == (0, 2, 10, 2) | |
| # clear BOTH styles of call (alternatively of course just call cache_clear() with no parameters) | |
| smartfunc.cache_clear(1, 2, baz='A') | |
| smartfunc.cache_clear(1, 2, 'A') | |
| # test, both will be misses, as expected | |
| smartfunc(1, 2, baz='A') # call style #1 | |
| smartfunc(1, 2, 'A') # call style #2 | |
| assert smartfunc.cache_info() == (0, 4, 10, 2) | |
| # Test getting slightly more info out of the cache re a particular function | |
| # Extra 'paramsignatures' showing all the active parameter signatures being cached. Invalidating | |
| # a particular signature will delete it from this list of tuples. | |
| info = smartfunc.cache_info(verbose=True) | |
| assert info.hits == 0 | |
| assert info.misses == 4 | |
| assert info.maxsize == 10 | |
| assert info.currsize == 2 | |
| assert info.paramsignatures == [(1, 2, 'A'), (1, 2, ('baz', 'A'))] # the new verbose info | |
| assert len(info.paramsignatures) == 2 | |
| assert (1, 2, 'A') in info.paramsignatures | |
| assert (1, 2, ('baz', 'A')) in info.paramsignatures | |
| print("tests pass") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment