Make Cloudfiles copy memory efficient too (#419)

author Sebastian Spaeth <Sebastian@SSpaeth.de>

Wed, 19 Dec 2012 14:59:44 +0000 (15:59 +0100)

committer Joar Wandborg <joar@wandborg.se>

Sat, 2 Mar 2013 22:44:54 +0000 (23:44 +0100)
author Sebastian Spaeth <Sebastian@SSpaeth.de>
Wed, 19 Dec 2012 14:59:44 +0000 (15:59 +0100)
committer Joar Wandborg <joar@wandborg.se>
Sat, 2 Mar 2013 22:44:54 +0000 (23:44 +0100)
diff --git a/mediagoblin/storage/cloudfiles.py b/mediagoblin/storage/cloudfiles.py

index 1b5a63637fa70e97275825a22f1cdd25dffce7bb..e6d21726bd2cb33ef8955f4614d35431c4c8d39a 100644 (file)
--- a/mediagoblin/storage/cloudfiles.py
+++ b/mediagoblin/storage/cloudfiles.py
@@ -131,6 +131,42 @@ class CloudFilesStorage(StorageInterface):
                  self._resolve_filepath(filepath)])
  
  
+    def copy_locally(self, filepath, dest_path):
+        """
+        Copy this file locally.
+
+        A basic working method for this is provided that should
+        function both for local_storage systems and remote storge
+        systems, but if more efficient systems for copying locally
+        apply to your system, override this method with something more
+        appropriate.
+        """
+        # Override this method, using the "stream" iterator for efficient streaming
+        with self.get_file(filepath, 'rb') as source_file:
+            with file(dest_path, 'wb') as dest_file:
+                for data in source_file:
+                    dest_file.write(data)
+
+    def copy_local_to_storage(self, filename, filepath):
+        """
+        Copy this file from locally to the storage system.
+
+        This is kind of the opposite of copy_locally.  It's likely you
+        could override this method with something more appropriate to
+        your storage system.
+        """
+        # It seems that (our implementation of) cloudfiles.write() takes
+        # all existing data and appends write(data) to it, sending the
+        # full monty over the wire everytime. This would of course
+        # absolutely kill chunked writes with some O(1^n) performance
+        # and bandwidth usage. So, override this method and use the
+        # Cloudfile's "send" interface instead.
+        # TODO: Fixing write() still seems worthwhile though.
+        with self.get_file(filepath, 'wb') as dest_file:
+            with file(filename, 'rb') as source_file:
+                # Copy to storage system in 4096 byte chunks
+                dest_file.send(source_file)
+
  class CloudFilesStorageObjectWrapper():
      """
      Wrapper for python-cloudfiles's cloudfiles.storage_object.Object
@@ -188,3 +224,15 @@ class CloudFilesStorageObjectWrapper():
          see self.__enter__()
          """
          self.close()
+
+
+    def __iter__(self, **kwargs):
+        """Make CloudFile an iterator, yielding 8192 bytes by default
+
+        This returns a generator object that can be used to getting the
+        object's content in a memory efficient way.
+
+        Warning: The HTTP response is only complete after this generator
+        has raised a StopIteration. No other methods can be called until
+        this has occurred."""
+        return self.storage_object.stream(**kwargs)
author	Sebastian Spaeth <Sebastian@SSpaeth.de>
	Wed, 19 Dec 2012 14:59:44 +0000 (15:59 +0100)
committer	Joar Wandborg <joar@wandborg.se>
	Sat, 2 Mar 2013 22:44:54 +0000 (23:44 +0100)