Implemented file-based cache.
authorJosh Roesslein <jroesslein@gmail.com>
Sun, 9 Aug 2009 18:47:13 +0000 (13:47 -0500)
committerJosh Roesslein <jroesslein@gmail.com>
Sun, 9 Aug 2009 18:47:13 +0000 (13:47 -0500)
README
TODO
tweepy/cache.py

diff --git a/README b/README
index 7e52ba5c8c5cad5dfcfbb9dfa392718b278e6309..6b28470bf2ae7a430a647b0cdd39740e1179eccd 100644 (file)
--- a/README
+++ b/README
@@ -14,6 +14,7 @@ Features:
   Supports most of the twitter api endpoints.
   Python 3 support. (See py3k branch)
   Streaming API
+  Cache system (memory and file based)
 
 Getting started:
   Check out the tutorial folder to get started with Tweepy.
diff --git a/TODO b/TODO
index c178025b9d9595a49167fb28938e017cccbe31cc..b61c1c4e5d2f5b670f2ee802a137590b7304ccba 100644 (file)
--- a/TODO
+++ b/TODO
@@ -3,6 +3,7 @@ Stuff that needs to be done...
 - finish unit tests
 - search API
 - caching system
-    + in-memory cache done. now just need a file-based cache (pickle?)
+    - memcache? database?
+    + memory and file caches implemented
 - needs docs, tutors, examples, etc
 - commandline client
index 9062f2284ec41a2a76266503e904a25058eeba5f..3cb91bb3399889803e403c5833278de7981a59cc 100644 (file)
@@ -4,6 +4,10 @@
 
 import time
 import threading
+import os
+import hashlib
+import fcntl
+import cPickle as pickle
 
 """Cache interface"""
 class Cache(object):
@@ -71,10 +75,7 @@ class MemoryCache(Cache):
 
       # use provided timeout in arguments if provided
       # otherwise use the one provided during init.
-      if timeout is None:
-        _timeout = self.timeout
-      else:
-        _timeout = timeout
+      _timeout = self.timeout if timeout is None else timeout
 
       # make sure entry is not expired
       if self._is_expired(entry, _timeout):
@@ -95,3 +96,89 @@ class MemoryCache(Cache):
     with self.lock:
       self._entries.clear()
 
+"""File-based cache"""
+class FileCache(Cache):
+
+  def __init__(self, cache_dir, timeout=60):
+    Cache.__init__(self, timeout)
+    if os.path.exists(cache_dir) is False:
+      os.mkdir(cache_dir)
+    self.cache_dir = cache_dir
+    self.lock = threading.Lock()
+
+  def _get_path(self, key):
+    md5 = hashlib.md5()
+    md5.update(key)
+    return os.path.join(self.cache_dir, md5.hexdigest())
+
+  def _lock_file(self, path, exclusive=True):
+    lock_path = path + '.lock'
+    if exclusive is True:
+      f_lock = open(lock_path, 'w')
+      fcntl.lockf(f_lock, fcntl.LOCK_EX)
+    else:
+      f_lock = open(lock_path, 'r')
+      fcntl.lockf(f_lock, fcntl.LOCK_SH)
+    if os.path.exists(lock_path) is False:
+      f_lock.close()
+      return None
+    return f_lock
+
+  def _delete_file(self, path):
+    os.remove(path)
+    os.remove(path + '.lock')
+
+  def store(self, key, value):
+    path = self._get_path(key)
+    with self.lock:
+      # acquire lock and open file
+      f_lock = self._lock_file(path)
+      datafile = open(path, 'wb')
+
+      # write data
+      pickle.dump((time.time(), value), datafile)
+
+      # close and unlock file
+      datafile.close()
+      f_lock.close()
+
+  def get(self, key, timeout=None):
+    return self._get(self._get_path(key), timeout)
+
+  def _get(self, path, timeout):
+    if os.path.exists(path) is False:
+      # no record
+      return None
+    while self.lock:
+      # acquire lock and open
+      f_lock = self._lock_file(path, False)
+      if f_lock is None:
+        # does not exist
+        return None
+      datafile = open(path, 'rb')
+
+      # read pickled object
+      created_time, value = pickle.load(datafile)
+      datafile.close()
+
+      # check if value is expired
+      _timeout = self.timeout if timeout is None else timeout
+      if _timeout > 0 and (time.time() - created_time) >= _timeout:
+        # expired! delete from cache
+        value = None
+        self._delete_file(path)
+
+      # unlock and return result
+      f_lock.close()
+      return value
+
+  def cleanup(self):
+    for entry in os.listdir(self.cache_dir):
+      if entry.endswith('.lock'): continue
+      self._get(os.path.join(self.cache_dir, entry), None)
+
+  def flush(self):
+    for entry in os.listdir(self.cache_dir):
+      if entry.endswith('.lock'): continue
+      self._delete_file(os.path.join(self.cache_dir, entry))
+