Switch test_app generation over to use py.test fixtures.

[mediagoblin.git] / mediagoblin / storage / cloudfiles.py
diff --git a/mediagoblin/storage/cloudfiles.py b/mediagoblin/storage/cloudfiles.py

index b1dd945010710f1ae96c393fc0d647017fc1c624..b6e57c911f73d64a89ae60d762fc1f2cc6d4913f 100644 (file)
--- a/mediagoblin/storage/cloudfiles.py
+++ b/mediagoblin/storage/cloudfiles.py
@@ -1,5 +1,5 @@
  # GNU MediaGoblin -- federated, autonomous media hosting
-# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
  #
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU Affero General Public License as published by
@@ -26,6 +26,10 @@ from mediagoblin.storage import StorageInterface, clean_listy_filepath
  
  import cloudfiles
  import mimetypes
+import logging
+
+_log = logging.getLogger(__name__)
+
  
  class CloudFilesStorage(StorageInterface):
      '''
@@ -41,8 +45,11 @@ class CloudFilesStorage(StorageInterface):
          self.param_host = kwargs.get('cloudfiles_host')
          self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet')
  
+        # the Mime Type webm doesn't exists, let's add it
+        mimetypes.add_type("video/webm", "webm")
+
          if not self.param_host:
-            print('No CloudFiles host URL specified, '
+            _log.info('No CloudFiles host URL specified, '
                    'defaulting to Rackspace US')
  
          self.connection = cloudfiles.get_connection(
@@ -51,6 +58,10 @@ class CloudFilesStorage(StorageInterface):
              servicenet=True if self.param_use_servicenet == 'true' or \
                  self.param_use_servicenet == True else False)
  
+        _log.debug('Connected to {0} (auth: {1})'.format(
+            self.connection.connection.host,
+            self.connection.auth.host))
+
          if not self.param_container == \
                  self.connection.get_container(self.param_container):
              self.container = self.connection.create_container(
@@ -61,6 +72,9 @@ class CloudFilesStorage(StorageInterface):
              self.container = self.connection.get_container(
                  self.param_container)
  
+        _log.debug('Container: {0}'.format(
+            self.container.name))
+
          self.container_uri = self.container.public_uri()
  
      def _resolve_filepath(self, filepath):
@@ -69,15 +83,14 @@ class CloudFilesStorage(StorageInterface):
  
      def file_exists(self, filepath):
          try:
-            object = self.container.get_object(
-                self._resolve_filepath(filepath))
+            self.container.get_object(self._resolve_filepath(filepath))
              return True
          except cloudfiles.errors.NoSuchObject:
              return False
  
      def get_file(self, filepath, *args, **kwargs):
          """
-        - Doesn't care about the "mode" argument
+        - Doesn't care about the "mode" argument.
          """
          try:
              obj = self.container.get_object(
@@ -86,19 +99,31 @@ class CloudFilesStorage(StorageInterface):
              obj = self.container.create_object(
                  self._resolve_filepath(filepath))
  
+            # Detect the mimetype ourselves, since some extensions (webm)
+            # may not be universally accepted as video/webm
              mimetype = mimetypes.guess_type(
                  filepath[-1])
  
-            if mimetype:
+            if mimetype[0]:
+                # Set the mimetype on the CloudFiles object
                  obj.content_type = mimetype[0]
+                obj.metadata = {'mime-type': mimetype[0]}
+            else:
+                obj.content_type = 'application/octet-stream'
+                obj.metadata = {'mime-type': 'application/octet-stream'}
  
          return CloudFilesStorageObjectWrapper(obj, *args, **kwargs)
  
      def delete_file(self, filepath):
          # TODO: Also delete unused directories if empty (safely, with
          # checks to avoid race conditions).
-        self.container.delete_object(
-            self._resolve_filepath(filepath))
+        try:
+            self.container.delete_object(
+                self._resolve_filepath(filepath))
+        except cloudfiles.container.ResponseError:
+            pass
+        finally:
+            pass
  
      def file_url(self, filepath):
          return '/'.join([
@@ -106,12 +131,49 @@ class CloudFilesStorage(StorageInterface):
                  self._resolve_filepath(filepath)])
  
  
+    def copy_locally(self, filepath, dest_path):
+        """
+        Copy this file locally.
+
+        A basic working method for this is provided that should
+        function both for local_storage systems and remote storge
+        systems, but if more efficient systems for copying locally
+        apply to your system, override this method with something more
+        appropriate.
+        """
+        # Override this method, using the "stream" iterator for efficient streaming
+        with self.get_file(filepath, 'rb') as source_file:
+            with file(dest_path, 'wb') as dest_file:
+                for data in source_file:
+                    dest_file.write(data)
+
+    def copy_local_to_storage(self, filename, filepath):
+        """
+        Copy this file from locally to the storage system.
+
+        This is kind of the opposite of copy_locally.  It's likely you
+        could override this method with something more appropriate to
+        your storage system.
+        """
+        # It seems that (our implementation of) cloudfiles.write() takes
+        # all existing data and appends write(data) to it, sending the
+        # full monty over the wire everytime. This would of course
+        # absolutely kill chunked writes with some O(1^n) performance
+        # and bandwidth usage. So, override this method and use the
+        # Cloudfile's "send" interface instead.
+        # TODO: Fixing write() still seems worthwhile though.
+        _log.debug('Sending {0} to cloudfiles...'.format(filepath))
+        with self.get_file(filepath, 'wb') as dest_file:
+            with file(filename, 'rb') as source_file:
+                # Copy to storage system in 4096 byte chunks
+                dest_file.send(source_file)
+
  class CloudFilesStorageObjectWrapper():
      """
      Wrapper for python-cloudfiles's cloudfiles.storage_object.Object
      used to circumvent the mystic `medium.jpg` corruption issue, where
      we had both python-cloudfiles and PIL doing buffering on both
-    ends and that breaking things.
+    ends and causing breakage.
  
      This wrapper currently meets mediagoblin's needs for a public_store
      file-like object.
@@ -120,6 +182,8 @@ class CloudFilesStorageObjectWrapper():
          self.storage_object = storage_object
  
      def read(self, *args, **kwargs):
+        _log.debug('Reading {0}'.format(
+            self.storage_object.name))
          return self.storage_object.read(*args, **kwargs)
  
      def write(self, data, *args, **kwargs):
@@ -133,12 +197,26 @@ class CloudFilesStorageObjectWrapper():
          Currently this method does not support any write modes except "append".
          However if we should need it it would be easy implement.
          """
+        _log.warn(
+            '{0}.write() has bad performance! Use .send instead for now'\
+            .format(self.__class__.__name__))
+
          if self.storage_object.size and type(data) == str:
+            _log.debug('{0} is > 0 in size, appending data'.format(
+                self.storage_object.name))
              data = self.read() + data
  
+        _log.debug('Writing {0}'.format(
+            self.storage_object.name))
          self.storage_object.write(data, *args, **kwargs)
  
+    def send(self, *args, **kw):
+        self.storage_object.send(*args, **kw)
+
      def close(self):
+        """
+        Not sure we need anything here.
+        """
          pass
  
      def __enter__(self):
@@ -154,3 +232,15 @@ class CloudFilesStorageObjectWrapper():
          see self.__enter__()
          """
          self.close()
+
+
+    def __iter__(self, **kwargs):
+        """Make CloudFile an iterator, yielding 8192 bytes by default
+
+        This returns a generator object that can be used to getting the
+        object's content in a memory efficient way.
+
+        Warning: The HTTP response is only complete after this generator
+        has raised a StopIteration. No other methods can be called until
+        this has occurred."""
+        return self.storage_object.stream(**kwargs)