mediagoblin/storage/cloudfiles.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 '''
  18 Make it so that ``import cloudfiles`` does not pick THIS file, but the
  19 python-cloudfiles one.
  20
  21 http://docs.python.org/whatsnew/2.5.html#pep-328-absolute-and-relative-imports
  22 '''
  23 from __future__ import absolute_import
  24
  25 from mediagoblin.storage import StorageInterface, clean_listy_filepath
  26
  27 import cloudfiles
  28 import mimetypes
  29 import logging
  30
  31 _log = logging.getLogger(__name__)
  32
  33
  34 class CloudFilesStorage(StorageInterface):
  35     '''
  36     OpenStack/Rackspace Cloud's Swift/CloudFiles support
  37     '''
  38
  39     local_storage = False
  40
  41     def __init__(self, **kwargs):
  42         self.param_container = kwargs.get('cloudfiles_container')
  43         self.param_user = kwargs.get('cloudfiles_user')
  44         self.param_api_key = kwargs.get('cloudfiles_api_key')
  45         self.param_host = kwargs.get('cloudfiles_host')
  46         self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet')
  47
  48         # the Mime Type webm doesn't exists, let's add it
  49         mimetypes.add_type("video/webm", "webm")
  50
  51         if not self.param_host:
  52             _log.info('No CloudFiles host URL specified, '
  53                   'defaulting to Rackspace US')
  54
  55         self.connection = cloudfiles.get_connection(
  56             username=self.param_user,
  57             api_key=self.param_api_key,
  58             servicenet=True if self.param_use_servicenet == 'true' or \
  59                 self.param_use_servicenet == True else False)
  60
  61         _log.debug('Connected to {0} (auth: {1})'.format(
  62             self.connection.connection.host,
  63             self.connection.auth.host))
  64
  65         if not self.param_container == \
  66                 self.connection.get_container(self.param_container):
  67             self.container = self.connection.create_container(
  68                 self.param_container)
  69             self.container.make_public(
  70                 ttl=60 * 60 * 2)
  71         else:
  72             self.container = self.connection.get_container(
  73                 self.param_container)
  74
  75         _log.debug('Container: {0}'.format(
  76             self.container.name))
  77
  78         self.container_uri = self.container.public_uri()
  79
  80     def _resolve_filepath(self, filepath):
  81         return '/'.join(
  82             clean_listy_filepath(filepath))
  83
  84     def file_exists(self, filepath):
  85         try:
  86             self.container.get_object(self._resolve_filepath(filepath))
  87             return True
  88         except cloudfiles.errors.NoSuchObject:
  89             return False
  90
  91     def get_file(self, filepath, *args, **kwargs):
  92         """
  93         - Doesn't care about the "mode" argument.
  94         """
  95         try:
  96             obj = self.container.get_object(
  97                 self._resolve_filepath(filepath))
  98         except cloudfiles.errors.NoSuchObject:
  99             obj = self.container.create_object(
 100                 self._resolve_filepath(filepath))
 101
 102             # Detect the mimetype ourselves, since some extensions (webm)
 103             # may not be universally accepted as video/webm
 104             mimetype = mimetypes.guess_type(
 105                 filepath[-1])
 106
 107             if mimetype[0]:
 108                 # Set the mimetype on the CloudFiles object
 109                 obj.content_type = mimetype[0]
 110                 obj.metadata = {'mime-type': mimetype[0]}
 111             else:
 112                 obj.content_type = 'application/octet-stream'
 113                 obj.metadata = {'mime-type': 'application/octet-stream'}
 114
 115         return CloudFilesStorageObjectWrapper(obj, *args, **kwargs)
 116
 117     def delete_file(self, filepath):
 118         # TODO: Also delete unused directories if empty (safely, with
 119         # checks to avoid race conditions).
 120         try:
 121             self.container.delete_object(
 122                 self._resolve_filepath(filepath))
 123         except cloudfiles.container.ResponseError:
 124             pass
 125         finally:
 126             pass
 127
 128     def file_url(self, filepath):
 129         return '/'.join([
 130                 self.container_uri,
 131                 self._resolve_filepath(filepath)])
 132
 133
 134     def copy_locally(self, filepath, dest_path):
 135         """
 136         Copy this file locally.
 137
 138         A basic working method for this is provided that should
 139         function both for local_storage systems and remote storge
 140         systems, but if more efficient systems for copying locally
 141         apply to your system, override this method with something more
 142         appropriate.
 143         """
 144         # Override this method, using the "stream" iterator for efficient streaming
 145         with self.get_file(filepath, 'rb') as source_file:
 146             with file(dest_path, 'wb') as dest_file:
 147                 for data in source_file:
 148                     dest_file.write(data)
 149
 150     def copy_local_to_storage(self, filename, filepath):
 151         """
 152         Copy this file from locally to the storage system.
 153
 154         This is kind of the opposite of copy_locally.  It's likely you
 155         could override this method with something more appropriate to
 156         your storage system.
 157         """
 158         # It seems that (our implementation of) cloudfiles.write() takes
 159         # all existing data and appends write(data) to it, sending the
 160         # full monty over the wire everytime. This would of course
 161         # absolutely kill chunked writes with some O(1^n) performance
 162         # and bandwidth usage. So, override this method and use the
 163         # Cloudfile's "send" interface instead.
 164         # TODO: Fixing write() still seems worthwhile though.
 165         _log.debug('Sending {0} to cloudfiles...'.format(filepath))
 166         with self.get_file(filepath, 'wb') as dest_file:
 167             with file(filename, 'rb') as source_file:
 168                 # Copy to storage system in 4096 byte chunks
 169                 dest_file.send(source_file)
 170
 171 class CloudFilesStorageObjectWrapper():
 172     """
 173     Wrapper for python-cloudfiles's cloudfiles.storage_object.Object
 174     used to circumvent the mystic `medium.jpg` corruption issue, where
 175     we had both python-cloudfiles and PIL doing buffering on both
 176     ends and causing breakage.
 177
 178     This wrapper currently meets mediagoblin's needs for a public_store
 179     file-like object.
 180     """
 181     def __init__(self, storage_object, *args, **kwargs):
 182         self.storage_object = storage_object
 183
 184     def read(self, *args, **kwargs):
 185         _log.debug('Reading {0}'.format(
 186             self.storage_object.name))
 187         return self.storage_object.read(*args, **kwargs)
 188
 189     def write(self, data, *args, **kwargs):
 190         """
 191         write data to the cloudfiles storage object
 192
 193         The original motivation for this wrapper is to ensure
 194         that buffered writing to a cloudfiles storage object does not overwrite
 195         any preexisting data.
 196
 197         Currently this method does not support any write modes except "append".
 198         However if we should need it it would be easy implement.
 199         """
 200         _log.warn(
 201             '{0}.write() has bad performance! Use .send instead for now'\
 202             .format(self.__class__.__name__))
 203
 204         if self.storage_object.size and type(data) == str:
 205             _log.debug('{0} is > 0 in size, appending data'.format(
 206                 self.storage_object.name))
 207             data = self.read() + data
 208
 209         _log.debug('Writing {0}'.format(
 210             self.storage_object.name))
 211         self.storage_object.write(data, *args, **kwargs)
 212
 213     def send(self, *args, **kw):
 214         self.storage_object.send(*args, **kw)
 215
 216     def close(self):
 217         """
 218         Not sure we need anything here.
 219         """
 220         pass
 221
 222     def __enter__(self):
 223         """
 224         Context Manager API implementation
 225         http://docs.python.org/library/stdtypes.html#context-manager-types
 226         """
 227         return self
 228
 229     def __exit__(self, *exc_info):
 230         """
 231         Context Manger API implementation
 232         see self.__enter__()
 233         """
 234         self.close()
 235
 236
 237     def __iter__(self, **kwargs):
 238         """Make CloudFile an iterator, yielding 8192 bytes by default
 239
 240         This returns a generator object that can be used to getting the
 241         object's content in a memory efficient way.
 242
 243         Warning: The HTTP response is only complete after this generator
 244         has raised a StopIteration. No other methods can be called until
 245         this has occurred."""
 246         return self.storage_object.stream(**kwargs)