From 8b35e7ad158f99bc7d3bb34774a6fde6d06bca67 Mon Sep 17 00:00:00 2001 From: Sebastian Spaeth Date: Wed, 19 Dec 2012 15:59:44 +0100 Subject: [PATCH] Make Cloudfiles copy memory efficient too (#419) It seems that (our implementation of) cloudfiles.write() takes all existing data and appends write(data) to it, sending the full monty over the wire everytime. This would of course absolutely kill chunked writes with some O(1^n) performance and bandwidth usage. So, override this method and use the Cloudfile's "send" interface instead. Also make the Cloudfile file wrapper an iterator that allows us to simply do "for data in cloudfile:" which will stream the data in a memory-efficient way. DO NOTE THAT THIS PATCH IS COMPLETELY UNTESTED DUE TO LACK OF SETUP PLEASE REVIEW AND VERIFY. --- mediagoblin/storage/cloudfiles.py | 48 +++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/mediagoblin/storage/cloudfiles.py b/mediagoblin/storage/cloudfiles.py index 1b5a6363..e6d21726 100644 --- a/mediagoblin/storage/cloudfiles.py +++ b/mediagoblin/storage/cloudfiles.py @@ -131,6 +131,42 @@ class CloudFilesStorage(StorageInterface): self._resolve_filepath(filepath)]) + def copy_locally(self, filepath, dest_path): + """ + Copy this file locally. + + A basic working method for this is provided that should + function both for local_storage systems and remote storge + systems, but if more efficient systems for copying locally + apply to your system, override this method with something more + appropriate. + """ + # Override this method, using the "stream" iterator for efficient streaming + with self.get_file(filepath, 'rb') as source_file: + with file(dest_path, 'wb') as dest_file: + for data in source_file: + dest_file.write(data) + + def copy_local_to_storage(self, filename, filepath): + """ + Copy this file from locally to the storage system. + + This is kind of the opposite of copy_locally. It's likely you + could override this method with something more appropriate to + your storage system. + """ + # It seems that (our implementation of) cloudfiles.write() takes + # all existing data and appends write(data) to it, sending the + # full monty over the wire everytime. This would of course + # absolutely kill chunked writes with some O(1^n) performance + # and bandwidth usage. So, override this method and use the + # Cloudfile's "send" interface instead. + # TODO: Fixing write() still seems worthwhile though. + with self.get_file(filepath, 'wb') as dest_file: + with file(filename, 'rb') as source_file: + # Copy to storage system in 4096 byte chunks + dest_file.send(source_file) + class CloudFilesStorageObjectWrapper(): """ Wrapper for python-cloudfiles's cloudfiles.storage_object.Object @@ -188,3 +224,15 @@ class CloudFilesStorageObjectWrapper(): see self.__enter__() """ self.close() + + + def __iter__(self, **kwargs): + """Make CloudFile an iterator, yielding 8192 bytes by default + + This returns a generator object that can be used to getting the + object's content in a memory efficient way. + + Warning: The HTTP response is only complete after this generator + has raised a StopIteration. No other methods can be called until + this has occurred.""" + return self.storage_object.stream(**kwargs) -- 2.25.1