Added .send method to cloudfiles storage object
[mediagoblin.git] / mediagoblin / storage / cloudfiles.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 '''
18 Make it so that ``import cloudfiles`` does not pick THIS file, but the
19 python-cloudfiles one.
20
21 http://docs.python.org/whatsnew/2.5.html#pep-328-absolute-and-relative-imports
22 '''
23 from __future__ import absolute_import
24
25 from mediagoblin.storage import StorageInterface, clean_listy_filepath
26
27 import cloudfiles
28 import mimetypes
29 import logging
30
31 _log = logging.getLogger(__name__)
32
33
34 class CloudFilesStorage(StorageInterface):
35 '''
36 OpenStack/Rackspace Cloud's Swift/CloudFiles support
37 '''
38
39 local_storage = False
40
41 def __init__(self, **kwargs):
42 self.param_container = kwargs.get('cloudfiles_container')
43 self.param_user = kwargs.get('cloudfiles_user')
44 self.param_api_key = kwargs.get('cloudfiles_api_key')
45 self.param_host = kwargs.get('cloudfiles_host')
46 self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet')
47
48 # the Mime Type webm doesn't exists, let's add it
49 mimetypes.add_type("video/webm", "webm")
50
51 if not self.param_host:
52 _log.info('No CloudFiles host URL specified, '
53 'defaulting to Rackspace US')
54
55 self.connection = cloudfiles.get_connection(
56 username=self.param_user,
57 api_key=self.param_api_key,
58 servicenet=True if self.param_use_servicenet == 'true' or \
59 self.param_use_servicenet == True else False)
60
61 _log.debug('Connected to {0} (auth: {1})'.format(
62 self.connection.connection.host,
63 self.connection.auth.host))
64
65 if not self.param_container == \
66 self.connection.get_container(self.param_container):
67 self.container = self.connection.create_container(
68 self.param_container)
69 self.container.make_public(
70 ttl=60 * 60 * 2)
71 else:
72 self.container = self.connection.get_container(
73 self.param_container)
74
75 _log.debug('Container: {0}'.format(
76 self.container.name))
77
78 self.container_uri = self.container.public_uri()
79
80 def _resolve_filepath(self, filepath):
81 return '/'.join(
82 clean_listy_filepath(filepath))
83
84 def file_exists(self, filepath):
85 try:
86 self.container.get_object(self._resolve_filepath(filepath))
87 return True
88 except cloudfiles.errors.NoSuchObject:
89 return False
90
91 def get_file(self, filepath, *args, **kwargs):
92 """
93 - Doesn't care about the "mode" argument.
94 """
95 try:
96 obj = self.container.get_object(
97 self._resolve_filepath(filepath))
98 except cloudfiles.errors.NoSuchObject:
99 obj = self.container.create_object(
100 self._resolve_filepath(filepath))
101
102 # Detect the mimetype ourselves, since some extensions (webm)
103 # may not be universally accepted as video/webm
104 mimetype = mimetypes.guess_type(
105 filepath[-1])
106
107 if mimetype[0]:
108 # Set the mimetype on the CloudFiles object
109 obj.content_type = mimetype[0]
110 obj.metadata = {'mime-type': mimetype[0]}
111 else:
112 obj.content_type = 'application/octet-stream'
113 obj.metadata = {'mime-type': 'application/octet-stream'}
114
115 return CloudFilesStorageObjectWrapper(obj, *args, **kwargs)
116
117 def delete_file(self, filepath):
118 # TODO: Also delete unused directories if empty (safely, with
119 # checks to avoid race conditions).
120 try:
121 self.container.delete_object(
122 self._resolve_filepath(filepath))
123 except cloudfiles.container.ResponseError:
124 pass
125 finally:
126 pass
127
128 def file_url(self, filepath):
129 return '/'.join([
130 self.container_uri,
131 self._resolve_filepath(filepath)])
132
133
134 def copy_locally(self, filepath, dest_path):
135 """
136 Copy this file locally.
137
138 A basic working method for this is provided that should
139 function both for local_storage systems and remote storge
140 systems, but if more efficient systems for copying locally
141 apply to your system, override this method with something more
142 appropriate.
143 """
144 # Override this method, using the "stream" iterator for efficient streaming
145 with self.get_file(filepath, 'rb') as source_file:
146 with file(dest_path, 'wb') as dest_file:
147 for data in source_file:
148 dest_file.write(data)
149
150 def copy_local_to_storage(self, filename, filepath):
151 """
152 Copy this file from locally to the storage system.
153
154 This is kind of the opposite of copy_locally. It's likely you
155 could override this method with something more appropriate to
156 your storage system.
157 """
158 # It seems that (our implementation of) cloudfiles.write() takes
159 # all existing data and appends write(data) to it, sending the
160 # full monty over the wire everytime. This would of course
161 # absolutely kill chunked writes with some O(1^n) performance
162 # and bandwidth usage. So, override this method and use the
163 # Cloudfile's "send" interface instead.
164 # TODO: Fixing write() still seems worthwhile though.
165 _log.debug('Sending {0} to cloudfiles...'.format(filepath))
166 with self.get_file(filepath, 'wb') as dest_file:
167 with file(filename, 'rb') as source_file:
168 # Copy to storage system in 4096 byte chunks
169 dest_file.send(source_file)
170
171 class CloudFilesStorageObjectWrapper():
172 """
173 Wrapper for python-cloudfiles's cloudfiles.storage_object.Object
174 used to circumvent the mystic `medium.jpg` corruption issue, where
175 we had both python-cloudfiles and PIL doing buffering on both
176 ends and causing breakage.
177
178 This wrapper currently meets mediagoblin's needs for a public_store
179 file-like object.
180 """
181 def __init__(self, storage_object, *args, **kwargs):
182 self.storage_object = storage_object
183
184 def read(self, *args, **kwargs):
185 _log.debug('Reading {0}'.format(
186 self.storage_object.name))
187 return self.storage_object.read(*args, **kwargs)
188
189 def write(self, data, *args, **kwargs):
190 """
191 write data to the cloudfiles storage object
192
193 The original motivation for this wrapper is to ensure
194 that buffered writing to a cloudfiles storage object does not overwrite
195 any preexisting data.
196
197 Currently this method does not support any write modes except "append".
198 However if we should need it it would be easy implement.
199 """
200 _log.warn(
201 '{0}.write() has bad performance! Use .send instead for now'\
202 .format(self.__class__.__name__))
203
204 if self.storage_object.size and type(data) == str:
205 _log.debug('{0} is > 0 in size, appending data'.format(
206 self.storage_object.name))
207 data = self.read() + data
208
209 _log.debug('Writing {0}'.format(
210 self.storage_object.name))
211 self.storage_object.write(data, *args, **kwargs)
212
213 def send(self, *args, **kw):
214 self.storage_object.send(*args, **kw)
215
216 def close(self):
217 """
218 Not sure we need anything here.
219 """
220 pass
221
222 def __enter__(self):
223 """
224 Context Manager API implementation
225 http://docs.python.org/library/stdtypes.html#context-manager-types
226 """
227 return self
228
229 def __exit__(self, *exc_info):
230 """
231 Context Manger API implementation
232 see self.__enter__()
233 """
234 self.close()
235
236
237 def __iter__(self, **kwargs):
238 """Make CloudFile an iterator, yielding 8192 bytes by default
239
240 This returns a generator object that can be used to getting the
241 object's content in a memory efficient way.
242
243 Warning: The HTTP response is only complete after this generator
244 has raised a StopIteration. No other methods can be called until
245 this has occurred."""
246 return self.storage_object.stream(**kwargs)