mediagoblin/storage/cloudfiles.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 '''
  18 Make it so that ``import cloudfiles`` does not pick THIS file, but the
  19 python-cloudfiles one.
  20
  21 http://docs.python.org/whatsnew/2.5.html#pep-328-absolute-and-relative-imports
  22 '''
  23 from __future__ import absolute_import
  24
  25 from mediagoblin.storage import StorageInterface, clean_listy_filepath
  26
  27 import cloudfiles
  28 import mimetypes
  29 import logging
  30
  31 _log = logging.getLogger(__name__)
  32
  33
  34 class CloudFilesStorage(StorageInterface):
  35     '''
  36     OpenStack/Rackspace Cloud's Swift/CloudFiles support
  37     '''
  38
  39     local_storage = False
  40
  41     def __init__(self, **kwargs):
  42         self.param_container = kwargs.get('cloudfiles_container')
  43         self.param_user = kwargs.get('cloudfiles_user')
  44         self.param_api_key = kwargs.get('cloudfiles_api_key')
  45         self.param_host = kwargs.get('cloudfiles_host')
  46         self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet')
  47
  48         # the Mime Type webm doesn't exists, let's add it
  49         mimetypes.add_type("video/webm", "webm")
  50
  51         if not self.param_host:
  52             _log.info('No CloudFiles host URL specified, '
  53                   'defaulting to Rackspace US')
  54
  55         self.connection = cloudfiles.get_connection(
  56             username=self.param_user,
  57             api_key=self.param_api_key,
  58             servicenet=True if self.param_use_servicenet == 'true' or \
  59                 self.param_use_servicenet == True else False)
  60
  61         _log.debug('Connected to {0} (auth: {1})'.format(
  62             self.connection.connection.host,
  63             self.connection.auth.host))
  64
  65         if not self.param_container == \
  66                 self.connection.get_container(self.param_container):
  67             self.container = self.connection.create_container(
  68                 self.param_container)
  69             self.container.make_public(
  70                 ttl=60 * 60 * 2)
  71         else:
  72             self.container = self.connection.get_container(
  73                 self.param_container)
  74
  75         _log.debug('Container: {0}'.format(
  76             self.container.name))
  77
  78         self.container_uri = self.container.public_ssl_uri()
  79
  80     def _resolve_filepath(self, filepath):
  81         return '/'.join(
  82             clean_listy_filepath(filepath))
  83
  84     def file_exists(self, filepath):
  85         try:
  86             self.container.get_object(self._resolve_filepath(filepath))
  87             return True
  88         except cloudfiles.errors.NoSuchObject:
  89             return False
  90
  91     def get_file(self, filepath, *args, **kwargs):
  92         """
  93         - Doesn't care about the "mode" argument.
  94         """
  95         try:
  96             obj = self.container.get_object(
  97                 self._resolve_filepath(filepath))
  98         except cloudfiles.errors.NoSuchObject:
  99             obj = self.container.create_object(
 100                 self._resolve_filepath(filepath))
 101
 102             # Detect the mimetype ourselves, since some extensions (webm)
 103             # may not be universally accepted as video/webm
 104             mimetype = mimetypes.guess_type(
 105                 filepath[-1])
 106
 107             if mimetype[0]:
 108                 # Set the mimetype on the CloudFiles object
 109                 obj.content_type = mimetype[0]
 110                 obj.metadata = {'mime-type': mimetype[0]}
 111             else:
 112                 obj.content_type = 'application/octet-stream'
 113                 obj.metadata = {'mime-type': 'application/octet-stream'}
 114
 115         return CloudFilesStorageObjectWrapper(obj, *args, **kwargs)
 116
 117     def delete_file(self, filepath):
 118         # TODO: Also delete unused directories if empty (safely, with
 119         # checks to avoid race conditions).
 120         try:
 121             self.container.delete_object(
 122                 self._resolve_filepath(filepath))
 123         except cloudfiles.container.ResponseError:
 124             pass
 125         finally:
 126             pass
 127
 128     def file_url(self, filepath):
 129         return '/'.join([
 130                 self.container_uri,
 131                 self._resolve_filepath(filepath)])
 132
 133
 134     def copy_locally(self, filepath, dest_path):
 135         """
 136         Copy this file locally.
 137
 138         A basic working method for this is provided that should
 139         function both for local_storage systems and remote storge
 140         systems, but if more efficient systems for copying locally
 141         apply to your system, override this method with something more
 142         appropriate.
 143         """
 144         # Override this method, using the "stream" iterator for efficient streaming
 145         with self.get_file(filepath, 'rb') as source_file:
 146             with open(dest_path, 'wb') as dest_file:
 147                 for data in source_file:
 148                     dest_file.write(data)
 149
 150     def copy_local_to_storage(self, filename, filepath):
 151         """
 152         Copy this file from locally to the storage system.
 153
 154         This is kind of the opposite of copy_locally.  It's likely you
 155         could override this method with something more appropriate to
 156         your storage system.
 157         """
 158         # It seems that (our implementation of) cloudfiles.write() takes
 159         # all existing data and appends write(data) to it, sending the
 160         # full monty over the wire everytime. This would of course
 161         # absolutely kill chunked writes with some O(1^n) performance
 162         # and bandwidth usage. So, override this method and use the
 163         # Cloudfile's "send" interface instead.
 164         # TODO: Fixing write() still seems worthwhile though.
 165         _log.debug('Sending {0} to cloudfiles...'.format(filepath))
 166         with self.get_file(filepath, 'wb') as dest_file:
 167             with open(filename, 'rb') as source_file:
 168                 # Copy to storage system in 4096 byte chunks
 169                 dest_file.send(source_file)
 170
 171     def get_file_size(self, filepath):
 172         """Returns the file size in bytes"""
 173         obj = self.container.get_object(
 174                 self._resolve_filepath(filepath))
 175         return obj.total_bytes
 176
 177 class CloudFilesStorageObjectWrapper():
 178     """
 179     Wrapper for python-cloudfiles's cloudfiles.storage_object.Object
 180     used to circumvent the mystic `medium.jpg` corruption issue, where
 181     we had both python-cloudfiles and PIL doing buffering on both
 182     ends and causing breakage.
 183
 184     This wrapper currently meets mediagoblin's needs for a public_store
 185     file-like object.
 186     """
 187     def __init__(self, storage_object, *args, **kwargs):
 188         self.storage_object = storage_object
 189
 190     def read(self, *args, **kwargs):
 191         _log.debug('Reading {0}'.format(
 192             self.storage_object.name))
 193         return self.storage_object.read(*args, **kwargs)
 194
 195     def write(self, data, *args, **kwargs):
 196         """
 197         write data to the cloudfiles storage object
 198
 199         The original motivation for this wrapper is to ensure
 200         that buffered writing to a cloudfiles storage object does not overwrite
 201         any preexisting data.
 202
 203         Currently this method does not support any write modes except "append".
 204         However if we should need it it would be easy implement.
 205         """
 206         _log.warn(
 207             '{0}.write() has bad performance! Use .send instead for now'\
 208             .format(self.__class__.__name__))
 209
 210         if self.storage_object.size and type(data) == str:
 211             _log.debug('{0} is > 0 in size, appending data'.format(
 212                 self.storage_object.name))
 213             data = self.read() + data
 214
 215         _log.debug('Writing {0}'.format(
 216             self.storage_object.name))
 217         self.storage_object.write(data, *args, **kwargs)
 218
 219     def send(self, *args, **kw):
 220         self.storage_object.send(*args, **kw)
 221
 222     def close(self):
 223         """
 224         Not sure we need anything here.
 225         """
 226         pass
 227
 228     def __enter__(self):
 229         """
 230         Context Manager API implementation
 231         http://docs.python.org/library/stdtypes.html#context-manager-types
 232         """
 233         return self
 234
 235     def __exit__(self, *exc_info):
 236         """
 237         Context Manger API implementation
 238         see self.__enter__()
 239         """
 240         self.close()
 241
 242
 243     def __iter__(self, **kwargs):
 244         """Make CloudFile an iterator, yielding 8192 bytes by default
 245
 246         This returns a generator object that can be used to getting the
 247         object's content in a memory efficient way.
 248
 249         Warning: The HTTP response is only complete after this generator
 250         has raised a StopIteration. No other methods can be called until
 251         this has occurred."""
 252         return self.storage_object.stream(**kwargs)