Proper handling of processor failures, working as hoped!
[mediagoblin.git] / mediagoblin / storage.py
CommitLineData
8e1e744d 1# GNU MediaGoblin -- federated, autonomous media hosting
a6b378ef
CAW
2# Copyright (C) 2011 Free Software Foundation, Inc
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
779f2b94 17import os
ffa22935 18import re
6a07362d 19import shutil
f61a41b8 20import urlparse
2fdec827 21import uuid
851c51a3 22import cloudfiles
a6b378ef
CAW
23
24from werkzeug.utils import secure_filename
25
ffa22935
CAW
26from mediagoblin import util
27
d807b725
CAW
28########
29# Errors
30########
a6b378ef 31
770c12be 32
aa797ca1
JW
33class Error(Exception):
34 pass
35
36
37class InvalidFilepath(Error):
38 pass
39
40
41class NoWebServing(Error):
42 pass
43
44
45class NotImplementedError(Error):
46 pass
797be93c 47
770c12be 48
d807b725
CAW
49###############################################
50# Storage interface & basic file implementation
51###############################################
a6b378ef 52
797be93c
CAW
53class StorageInterface(object):
54 """
55 Interface for the storage API.
56
57 This interface doesn't actually provide behavior, but it defines
58 what kind of storage patterns subclasses should provide.
59
60 It is important to note that the storage API idea of a "filepath"
61 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
62 mind while reading method documentation.
5afb9227
CAW
63
64 You should set up your __init__ method with whatever keyword
65 arguments are appropriate to your storage system, but you should
66 also passively accept all extraneous keyword arguments like:
67
68 def __init__(self, **kwargs):
69 pass
70
71 See BasicFileStorage as a simple implementation of the
72 StorageInterface.
797be93c 73 """
797be93c 74
3a89c23e
CAW
75 # Whether this file store is on the local filesystem.
76 local_storage = False
77
797be93c
CAW
78 def __raise_not_implemented(self):
79 """
80 Raise a warning about some component not implemented by a
81 subclass of this interface.
82 """
83 raise NotImplementedError(
84 "This feature not implemented in this storage API implementation.")
85
86 def file_exists(self, filepath):
87 """
88 Return a boolean asserting whether or not file at filepath
89 exists in our storage system.
90
91 Returns:
92 True / False depending on whether file exists or not.
93 """
94 # Subclasses should override this method.
95 self.__raise_not_implemented()
96
cee7a1c1 97 def get_file(self, filepath, mode='r'):
b0de01cf
CAW
98 """
99 Return a file-like object for reading/writing from this filepath.
100
101 Should create directories, buckets, whatever, as necessary.
102 """
0b9cf289
CAW
103 # Subclasses should override this method.
104 self.__raise_not_implemented()
105
106 def delete_file(self, filepath):
b0de01cf
CAW
107 """
108 Delete or dereference the file at filepath.
109
110 This might need to delete directories, buckets, whatever, for
111 cleanliness. (Be sure to avoid race conditions on that though)
112 """
0b9cf289
CAW
113 # Subclasses should override this method.
114 self.__raise_not_implemented()
115
f61a41b8 116 def file_url(self, filepath):
644614d4
CAW
117 """
118 Get the URL for this file. This assumes our storage has been
119 mounted with some kind of URL which makes this possible.
120 """
121 # Subclasses should override this method.
122 self.__raise_not_implemented()
123
2d1a6073 124 def get_unique_filepath(self, filepath):
797be93c
CAW
125 """
126 If a filename at filepath already exists, generate a new name.
127
128 Eg, if the filename doesn't exist:
129 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
130 [u'dir1', u'dir2', u'fname.jpg']
aa797ca1 131
797be93c
CAW
132 But if a file does exist, let's get one back with at uuid tacked on:
133 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
ef10e3a2 134 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
797be93c 135 """
b0bfb766
CAW
136 # Make sure we have a clean filepath to start with, since
137 # we'll be possibly tacking on stuff to the filename.
138 filepath = clean_listy_filepath(filepath)
139
0b9cf289
CAW
140 if self.file_exists(filepath):
141 return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
142 else:
143 return filepath
779f2b94 144
3a89c23e
CAW
145 def get_local_path(self, filepath):
146 """
147 If this is a local_storage implementation, give us a link to
148 the local filesystem reference to this file.
149
150 >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
151 u'/path/to/mounting/foo/bar/baz.jpg'
152 """
153 # Subclasses should override this method, if applicable.
154 self.__raise_not_implemented()
155
6a07362d
CAW
156 def copy_locally(self, filepath, dest_path):
157 """
158 Copy this file locally.
159
160 A basic working method for this is provided that should
161 function both for local_storage systems and remote storge
162 systems, but if more efficient systems for copying locally
163 apply to your system, override this method with something more
164 appropriate.
165 """
166 if self.local_storage:
167 shutil.copy(
168 self.get_local_path(filepath), dest_path)
169 else:
170 with self.get_file(filepath, 'rb') as source_file:
171 with file(dest_path, 'wb') as dest_file:
172 dest_file.write(source_file.read())
173
779f2b94
CAW
174
175class BasicFileStorage(StorageInterface):
176 """
177 Basic local filesystem implementation of storage API
178 """
179
3a89c23e
CAW
180 local_storage = True
181
5afb9227 182 def __init__(self, base_dir, base_url=None, **kwargs):
779f2b94
CAW
183 """
184 Keyword arguments:
185 - base_dir: Base directory things will be served out of. MUST
186 be an absolute path.
b1bb050b 187 - base_url: URL files will be served from
779f2b94
CAW
188 """
189 self.base_dir = base_dir
b1bb050b 190 self.base_url = base_url
779f2b94
CAW
191
192 def _resolve_filepath(self, filepath):
193 """
194 Transform the given filepath into a local filesystem filepath.
195 """
196 return os.path.join(
197 self.base_dir, *clean_listy_filepath(filepath))
aa797ca1 198
779f2b94
CAW
199 def file_exists(self, filepath):
200 return os.path.exists(self._resolve_filepath(filepath))
201
cee7a1c1
CAW
202 def get_file(self, filepath, mode='r'):
203 # Make directories if necessary
204 if len(filepath) > 1:
205 directory = self._resolve_filepath(filepath[:-1])
d0e3a534 206 if not os.path.exists(directory):
cee7a1c1
CAW
207 os.makedirs(directory)
208
209 # Grab and return the file in the mode specified
210 return open(self._resolve_filepath(filepath), mode)
211
779f2b94 212 def delete_file(self, filepath):
b1bb050b
CAW
213 # TODO: Also delete unused directories if empty (safely, with
214 # checks to avoid race conditions).
215 os.remove(self._resolve_filepath(filepath))
644614d4 216
f61a41b8 217 def file_url(self, filepath):
b1bb050b
CAW
218 if not self.base_url:
219 raise NoWebServing(
220 "base_url not set, cannot provide file urls")
221
222 return urlparse.urljoin(
223 self.base_url,
224 '/'.join(clean_listy_filepath(filepath)))
ffa22935 225
3a89c23e
CAW
226 def get_local_path(self, filepath):
227 return self._resolve_filepath(filepath)
228
ffa22935 229
aa797ca1
JW
230class CloudFilesStorage(StorageInterface):
231 def __init__(self, **kwargs):
232 self.param_container = kwargs.get('cloudfiles_container')
233 self.param_user = kwargs.get('cloudfiles_user')
234 self.param_api_key = kwargs.get('cloudfiles_api_key')
235 self.param_host = kwargs.get('cloudfiles_host')
236 self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet')
237
238 if not self.param_host:
239 print('No CloudFiles host URL specified, '
240 'defaulting to Rackspace US')
241
242 self.connection = cloudfiles.get_connection(
243 username=self.param_user,
244 api_key=self.param_api_key,
245 servicenet=True if self.param_use_servicenet == 'true' or \
246 self.param_use_servicenet == True else False)
247
248 if not self.param_container == \
249 self.connection.get_container(self.param_container):
250 self.container = self.connection.create_container(
251 self.param_container)
252 self.container.make_public(
253 ttl=60 * 60 * 2)
254 else:
255 self.container = self.connection.get_container(
256 self.param_container)
257
258 def _resolve_filepath(self, filepath):
259 return '/'.join(
260 clean_listy_filepath(filepath))
261
262 def file_exists(self, filepath):
263 try:
264 object = self.container.get_object(
265 self._resolve_filepath(filepath))
266 return True
267 except cloudfiles.errors.NoSuchObject:
268 return False
269
270 def get_file(self, filepath, mode='r'):
271 try:
272 obj = self.container.get_object(
273 self._resolve_filepath(filepath))
274 except cloudfiles.errors.NoSuchObject:
275 obj = self.container.create_object(
276 self._resolve_filepath(filepath))
277
278 return obj
279
280 def delete_file(self, filepath):
281 # TODO: Also delete unused directories if empty (safely, with
282 # checks to avoid race conditions).
283 self.container.delete_object(filepath)
284
285 def file_url(self, filepath):
286 return self.get_file(filepath).public_uri()
287
288
68cf996c 289class MountStorage(StorageInterface):
255f02c4
E
290 """
291 Experimental "Mount" virtual Storage Interface
292
f016fc65
CAW
293 This isn't an interface to some real storage, instead it's a
294 redirecting interface, that redirects requests to other
295 "StorageInterface"s.
296
297 For example, say you have the paths:
298
299 1. ['user_data', 'cwebber', 'avatar.jpg']
300 2. ['user_data', 'elrond', 'avatar.jpg']
301 3. ['media_entries', '34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg']
302
303 You could mount media_entries under CloudFileStorage and user_data
304 under BasicFileStorage. Then 1 would be passed to
305 BasicFileStorage under the path ['cwebber', 'avatar.jpg'] and 3
306 would be passed to CloudFileStorage under
307 ['34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg'].
308
309 In other words, this is kind of like mounting /home/ and /etc/
310 under different filesystems on your operating system... but with
311 mediagoblin filestorages :)
255f02c4 312
f016fc65
CAW
313 To set this up, you currently need to call the mount() method with
314 the target path and a backend, that shall be available under that
315 target path. You have to mount things in a sensible order,
255f02c4
E
316 especially you can't mount ["a", "b"] before ["a"].
317 """
68cf996c
E
318 def __init__(self, **kwargs):
319 self.mounttab = {}
320
321 def mount(self, dirpath, backend):
322 """
323 Mount a new backend under dirpath
324 """
325 new_ent = clean_listy_filepath(dirpath)
68cf996c
E
326
327 print "Mounting:", repr(new_ent)
eea31562 328 already, rem_1, table, rem_2 = self._resolve_to_backend(new_ent, True)
620fca54
E
329 print "===", repr(already), repr(rem_1), repr(rem_2), len(table)
330
331 assert (len(rem_2) > 0) or (None not in table), \
332 "That path is already mounted"
333 assert (len(rem_2) > 0) or (len(table)==0), \
334 "A longer path is already mounted here"
68cf996c 335
68cf996c
E
336 for part in rem_2:
337 table[part] = {}
338 table = table[part]
339 table[None] = backend
340
eea31562 341 def _resolve_to_backend(self, filepath, extra_info = False):
68cf996c
E
342 """
343 extra_info = True is for internal use!
344
345 Normally, returns the backend and the filepath inside that backend.
346
347 With extra_info = True it returns the last directory node and the
348 remaining filepath from there in addition.
349 """
350 table = self.mounttab
351 filepath = filepath[:]
352 res_fp = None
353 while True:
354 new_be = table.get(None)
355 if (new_be is not None) or res_fp is None:
356 res_be = new_be
357 res_fp = filepath[:]
358 res_extra = (table, filepath[:])
359 # print "... New res: %r, %r, %r" % (res_be, res_fp, res_extra)
360 if len(filepath) == 0:
361 break
362 query = filepath.pop(0)
363 entry = table.get(query)
364 if entry is not None:
365 table = entry
366 res_extra = (table, filepath[:])
367 else:
368 break
369 if extra_info:
370 return (res_be, res_fp) + res_extra
371 else:
372 return (res_be, res_fp)
373
eea31562
E
374 def resolve_to_backend(self, filepath):
375 backend, filepath = self._resolve_to_backend(filepath)
376 if backend is None:
377 raise Error("Path not mounted")
378 return backend, filepath
379
93b2796c 380 def __repr__(self, table = None, indent = []):
68cf996c
E
381 res = []
382 if table is None:
383 res.append("MountStorage<")
384 table = self.mounttab
385 v = table.get(None)
386 if v:
93b2796c 387 res.append(" " * len(indent) + repr(indent) + ": " + repr(v))
68cf996c
E
388 for k, v in table.iteritems():
389 if k == None:
390 continue
93b2796c
E
391 res.append(" " * len(indent) + repr(k) + ":")
392 res += self.__repr__(v, indent + [k])
68cf996c
E
393 if table is self.mounttab:
394 res.append(">")
395 return "\n".join(res)
396 else:
397 return res
398
937e2c88
E
399 def file_exists(self, filepath):
400 backend, filepath = self.resolve_to_backend(filepath)
401 return backend.file_exists(filepath)
402
403 def get_file(self, filepath, mode='r'):
404 backend, filepath = self.resolve_to_backend(filepath)
405 return backend.get_file(filepath, mode)
406
407 def delete_file(self, filepath):
408 backend, filepath = self.resolve_to_backend(filepath)
409 return backend.delete_file(filepath)
410
411 def file_url(self, filepath):
412 backend, filepath = self.resolve_to_backend(filepath)
413 return backend.file_url(filepath)
414
415 def get_local_path(self, filepath):
416 backend, filepath = self.resolve_to_backend(filepath)
417 return backend.get_local_path(filepath)
418
419 def copy_locally(self, filepath, dest_path):
420 """
421 Need to override copy_locally, because the local_storage
422 attribute is not correct.
423 """
424 backend, filepath = self.resolve_to_backend(filepath)
425 backend.copy_locally(filepath, dest_path)
426
68cf996c 427
d807b725
CAW
428###########
429# Utilities
430###########
431
432def clean_listy_filepath(listy_filepath):
433 """
434 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
435 clean out any nastiness from it.
436
3a89c23e 437
d807b725
CAW
438 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
439 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
440
441 Args:
442 - listy_filepath: a list of filepath components, mediagoblin
443 storage API style.
444
445 Returns:
446 A cleaned list of unicode objects.
447 """
448 cleaned_filepath = [
449 unicode(secure_filename(filepath))
450 for filepath in listy_filepath]
451
452 if u'' in cleaned_filepath:
453 raise InvalidFilepath(
454 "A filename component could not be resolved into a usable name.")
455
456 return cleaned_filepath
457
458
3c7d11ff 459def storage_system_from_config(paste_config, storage_prefix):
ffa22935
CAW
460 """
461 Utility for setting up a storage system from the paste app config.
462
463 Note that a special argument may be passed in to the paste_config
464 which is "${storage_prefix}_storage_class" which will provide an
465 import path to a storage system. This defaults to
466 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
467
468 Arguments:
469 - paste_config: dictionary of config parameters
470 - storage_prefix: the storage system we're setting up / will be
471 getting keys/arguments from. For example 'publicstore' will
472 grab all arguments that are like 'publicstore_FOO'.
473
474 Returns:
475 An instantiated storage system.
476
477 Example:
3c7d11ff 478 storage_system_from_config(
ffa22935
CAW
479 {'publicstore_base_url': '/media/',
480 'publicstore_base_dir': '/var/whatever/media/'},
481 'publicstore')
482
483 Will return:
484 BasicFileStorage(
485 base_url='/media/',
486 base_dir='/var/whatever/media')
487 """
488 prefix_re = re.compile('^%s_(.+)$' % re.escape(storage_prefix))
489
490 config_params = dict(
491 [(prefix_re.match(key).groups()[0], value)
492 for key, value in paste_config.iteritems()
493 if prefix_re.match(key)])
494
aa797ca1 495 if 'storage_class' in config_params:
ffa22935
CAW
496 storage_class = config_params['storage_class']
497 config_params.pop('storage_class')
498 else:
499 storage_class = "mediagoblin.storage:BasicFileStorage"
500
501 storage_class = util.import_component(storage_class)
502 return storage_class(**config_params)