From: Sebastian Spaeth Date: Wed, 19 Dec 2012 10:06:51 +0000 (+0100) Subject: Make copying to/from storage systems memory efficient (#419) X-Git-Url: https://vcs.fsf.org/?a=commitdiff_plain;h=99a54c0095ccadcebeb640cb20cb6eadb8b9a39d;p=mediagoblin.git Make copying to/from storage systems memory efficient (#419) The copy_locally and copy_local_to_storage (very inconsistent terms BTW) were simply slurping in everything in RAM and writing it out at once. (the copy_locally was actually memory efficient if the remote system was local) Use shutil.copyfileobj which does chunked reads/writes on file objects. The default buffer size is 16kb, and as each chunk means a separate HTTP request for e.g. cloudfiles, we use a chunksize of 4MB here (which has just been arbitrarily set by me without tests). This should help with the failure to upload large files issue #419. --- diff --git a/mediagoblin/storage/__init__.py b/mediagoblin/storage/__init__.py index 2db4c37d..5c1d7d36 100644 --- a/mediagoblin/storage/__init__.py +++ b/mediagoblin/storage/__init__.py @@ -160,12 +160,13 @@ class StorageInterface(object): appropriate. """ if self.local_storage: - shutil.copy( - self.get_local_path(filepath), dest_path) + # Note: this will copy in small chunks + shutil.copy(self.get_local_path(filepath), dest_path) else: with self.get_file(filepath, 'rb') as source_file: with file(dest_path, 'wb') as dest_file: - dest_file.write(source_file.read()) + # Copy from remote storage in 4M chunks + shutil.copyfileobj(source_file, dest_file, length=4*1048576) def copy_local_to_storage(self, filename, filepath): """ @@ -177,7 +178,8 @@ class StorageInterface(object): """ with self.get_file(filepath, 'wb') as dest_file: with file(filename, 'rb') as source_file: - dest_file.write(source_file.read()) + # Copy to storage system in 4M chunks + shutil.copyfileobj(source_file, dest_file, length=4*1048576) ########### diff --git a/mediagoblin/storage/filestorage.py b/mediagoblin/storage/filestorage.py index 00d6335e..ef786b61 100644 --- a/mediagoblin/storage/filestorage.py +++ b/mediagoblin/storage/filestorage.py @@ -87,6 +87,5 @@ class BasicFileStorage(StorageInterface): directory = self._resolve_filepath(filepath[:-1]) if not os.path.exists(directory): os.makedirs(directory) - - shutil.copy( - filename, self.get_local_path(filepath)) + # This uses chunked copying of 16kb buffers (Py2.7): + shutil.copy(filename, self.get_local_path(filepath))