508. Updates COPYING file; adds translations bit
[mediagoblin.git] / mediagoblin / storage.py
CommitLineData
8e1e744d 1# GNU MediaGoblin -- federated, autonomous media hosting
a6b378ef
CAW
2# Copyright (C) 2011 Free Software Foundation, Inc
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
779f2b94 17import os
6a07362d 18import shutil
f61a41b8 19import urlparse
2fdec827 20import uuid
851c51a3 21import cloudfiles
13b9f054 22import mimetypes
b5e7b967 23import tempfile
a6b378ef
CAW
24
25from werkzeug.utils import secure_filename
26
ffa22935
CAW
27from mediagoblin import util
28
d807b725
CAW
29########
30# Errors
31########
a6b378ef 32
770c12be 33
aa797ca1
JW
34class Error(Exception):
35 pass
36
37
38class InvalidFilepath(Error):
39 pass
40
41
42class NoWebServing(Error):
43 pass
44
45
46class NotImplementedError(Error):
47 pass
797be93c 48
770c12be 49
d807b725
CAW
50###############################################
51# Storage interface & basic file implementation
52###############################################
a6b378ef 53
797be93c
CAW
54class StorageInterface(object):
55 """
56 Interface for the storage API.
57
58 This interface doesn't actually provide behavior, but it defines
59 what kind of storage patterns subclasses should provide.
60
61 It is important to note that the storage API idea of a "filepath"
62 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
63 mind while reading method documentation.
5afb9227
CAW
64
65 You should set up your __init__ method with whatever keyword
66 arguments are appropriate to your storage system, but you should
67 also passively accept all extraneous keyword arguments like:
68
69 def __init__(self, **kwargs):
70 pass
71
72 See BasicFileStorage as a simple implementation of the
73 StorageInterface.
797be93c 74 """
797be93c 75
3a89c23e
CAW
76 # Whether this file store is on the local filesystem.
77 local_storage = False
78
797be93c
CAW
79 def __raise_not_implemented(self):
80 """
81 Raise a warning about some component not implemented by a
82 subclass of this interface.
83 """
84 raise NotImplementedError(
85 "This feature not implemented in this storage API implementation.")
86
87 def file_exists(self, filepath):
88 """
89 Return a boolean asserting whether or not file at filepath
90 exists in our storage system.
91
92 Returns:
93 True / False depending on whether file exists or not.
94 """
95 # Subclasses should override this method.
96 self.__raise_not_implemented()
97
cee7a1c1 98 def get_file(self, filepath, mode='r'):
b0de01cf
CAW
99 """
100 Return a file-like object for reading/writing from this filepath.
101
102 Should create directories, buckets, whatever, as necessary.
103 """
0b9cf289
CAW
104 # Subclasses should override this method.
105 self.__raise_not_implemented()
106
107 def delete_file(self, filepath):
b0de01cf
CAW
108 """
109 Delete or dereference the file at filepath.
110
111 This might need to delete directories, buckets, whatever, for
112 cleanliness. (Be sure to avoid race conditions on that though)
113 """
0b9cf289
CAW
114 # Subclasses should override this method.
115 self.__raise_not_implemented()
116
f61a41b8 117 def file_url(self, filepath):
644614d4
CAW
118 """
119 Get the URL for this file. This assumes our storage has been
120 mounted with some kind of URL which makes this possible.
121 """
122 # Subclasses should override this method.
123 self.__raise_not_implemented()
124
2d1a6073 125 def get_unique_filepath(self, filepath):
797be93c
CAW
126 """
127 If a filename at filepath already exists, generate a new name.
128
129 Eg, if the filename doesn't exist:
130 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
131 [u'dir1', u'dir2', u'fname.jpg']
aa797ca1 132
797be93c
CAW
133 But if a file does exist, let's get one back with at uuid tacked on:
134 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
ef10e3a2 135 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
797be93c 136 """
b0bfb766
CAW
137 # Make sure we have a clean filepath to start with, since
138 # we'll be possibly tacking on stuff to the filename.
139 filepath = clean_listy_filepath(filepath)
140
0b9cf289
CAW
141 if self.file_exists(filepath):
142 return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
143 else:
144 return filepath
779f2b94 145
3a89c23e
CAW
146 def get_local_path(self, filepath):
147 """
148 If this is a local_storage implementation, give us a link to
149 the local filesystem reference to this file.
150
151 >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
152 u'/path/to/mounting/foo/bar/baz.jpg'
153 """
154 # Subclasses should override this method, if applicable.
155 self.__raise_not_implemented()
156
6a07362d
CAW
157 def copy_locally(self, filepath, dest_path):
158 """
159 Copy this file locally.
160
161 A basic working method for this is provided that should
162 function both for local_storage systems and remote storge
163 systems, but if more efficient systems for copying locally
164 apply to your system, override this method with something more
165 appropriate.
166 """
167 if self.local_storage:
168 shutil.copy(
169 self.get_local_path(filepath), dest_path)
170 else:
171 with self.get_file(filepath, 'rb') as source_file:
172 with file(dest_path, 'wb') as dest_file:
173 dest_file.write(source_file.read())
174
779f2b94
CAW
175
176class BasicFileStorage(StorageInterface):
177 """
178 Basic local filesystem implementation of storage API
179 """
180
3a89c23e
CAW
181 local_storage = True
182
5afb9227 183 def __init__(self, base_dir, base_url=None, **kwargs):
779f2b94
CAW
184 """
185 Keyword arguments:
186 - base_dir: Base directory things will be served out of. MUST
187 be an absolute path.
b1bb050b 188 - base_url: URL files will be served from
779f2b94
CAW
189 """
190 self.base_dir = base_dir
b1bb050b 191 self.base_url = base_url
779f2b94
CAW
192
193 def _resolve_filepath(self, filepath):
194 """
195 Transform the given filepath into a local filesystem filepath.
196 """
197 return os.path.join(
198 self.base_dir, *clean_listy_filepath(filepath))
aa797ca1 199
779f2b94
CAW
200 def file_exists(self, filepath):
201 return os.path.exists(self._resolve_filepath(filepath))
202
cee7a1c1
CAW
203 def get_file(self, filepath, mode='r'):
204 # Make directories if necessary
205 if len(filepath) > 1:
206 directory = self._resolve_filepath(filepath[:-1])
d0e3a534 207 if not os.path.exists(directory):
cee7a1c1
CAW
208 os.makedirs(directory)
209
210 # Grab and return the file in the mode specified
211 return open(self._resolve_filepath(filepath), mode)
212
779f2b94 213 def delete_file(self, filepath):
b1bb050b
CAW
214 # TODO: Also delete unused directories if empty (safely, with
215 # checks to avoid race conditions).
216 os.remove(self._resolve_filepath(filepath))
644614d4 217
f61a41b8 218 def file_url(self, filepath):
b1bb050b
CAW
219 if not self.base_url:
220 raise NoWebServing(
221 "base_url not set, cannot provide file urls")
222
223 return urlparse.urljoin(
224 self.base_url,
225 '/'.join(clean_listy_filepath(filepath)))
ffa22935 226
3a89c23e
CAW
227 def get_local_path(self, filepath):
228 return self._resolve_filepath(filepath)
229
ffa22935 230
4dea708c
CAW
231# ----------------------------------------------------
232# OpenStack/Rackspace Cloud's Swift/CloudFiles support
233# ----------------------------------------------------
b5e7b967 234
4dea708c 235class CloudFilesStorage(StorageInterface):
aa797ca1
JW
236 def __init__(self, **kwargs):
237 self.param_container = kwargs.get('cloudfiles_container')
238 self.param_user = kwargs.get('cloudfiles_user')
239 self.param_api_key = kwargs.get('cloudfiles_api_key')
240 self.param_host = kwargs.get('cloudfiles_host')
241 self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet')
242
243 if not self.param_host:
244 print('No CloudFiles host URL specified, '
245 'defaulting to Rackspace US')
246
247 self.connection = cloudfiles.get_connection(
248 username=self.param_user,
249 api_key=self.param_api_key,
250 servicenet=True if self.param_use_servicenet == 'true' or \
251 self.param_use_servicenet == True else False)
252
253 if not self.param_container == \
254 self.connection.get_container(self.param_container):
255 self.container = self.connection.create_container(
256 self.param_container)
257 self.container.make_public(
258 ttl=60 * 60 * 2)
259 else:
260 self.container = self.connection.get_container(
261 self.param_container)
262
026074af
JW
263 self.container_uri = self.container.public_uri()
264
aa797ca1
JW
265 def _resolve_filepath(self, filepath):
266 return '/'.join(
267 clean_listy_filepath(filepath))
268
269 def file_exists(self, filepath):
270 try:
271 object = self.container.get_object(
272 self._resolve_filepath(filepath))
273 return True
274 except cloudfiles.errors.NoSuchObject:
275 return False
276
bf821aef 277 def get_file(self, filepath, *args, **kwargs):
b5e7b967
JW
278 """
279 - Doesn't care about the "mode" argument
280 """
aa797ca1
JW
281 try:
282 obj = self.container.get_object(
283 self._resolve_filepath(filepath))
284 except cloudfiles.errors.NoSuchObject:
285 obj = self.container.create_object(
286 self._resolve_filepath(filepath))
287
13b9f054
JW
288 mimetype = mimetypes.guess_type(
289 filepath[-1])
290
291 if mimetype:
292 obj.content_type = mimetype[0]
293
bf821aef 294 return StorageObjectWrapper(obj, *args, **kwargs)
aa797ca1
JW
295
296 def delete_file(self, filepath):
297 # TODO: Also delete unused directories if empty (safely, with
298 # checks to avoid race conditions).
502073f2
JW
299 self.container.delete_object(
300 self._resolve_filepath(filepath))
aa797ca1
JW
301
302 def file_url(self, filepath):
026074af
JW
303 return '/'.join([
304 self.container_uri,
305 self._resolve_filepath(filepath)])
aa797ca1
JW
306
307
4dea708c
CAW
308class StorageObjectWrapper():
309 """
310 Wrapper for python-cloudfiles's cloudfiles.storage_object.Object
311 used to circumvent the mystic `medium.jpg` corruption issue, where
312 we had both python-cloudfiles and PIL doing buffering on both
313 ends and that breaking things.
314
315 This wrapper currently meets mediagoblin's needs for a public_store
316 file-like object.
317 """
bf821aef 318 def __init__(self, storage_object, *args, **kwargs):
4dea708c
CAW
319 self.storage_object = storage_object
320
321 def read(self, *args, **kwargs):
322 return self.storage_object.read(*args, **kwargs)
323
324 def write(self, data, *args, **kwargs):
325 if self.storage_object.size and type(data) == str:
326 data = self.read() + data
327
328 self.storage_object.write(data, *args, **kwargs)
329
330
331# ------------
332# MountStorage
333# ------------
334
68cf996c 335class MountStorage(StorageInterface):
255f02c4
E
336 """
337 Experimental "Mount" virtual Storage Interface
338
f016fc65
CAW
339 This isn't an interface to some real storage, instead it's a
340 redirecting interface, that redirects requests to other
341 "StorageInterface"s.
342
343 For example, say you have the paths:
344
345 1. ['user_data', 'cwebber', 'avatar.jpg']
346 2. ['user_data', 'elrond', 'avatar.jpg']
347 3. ['media_entries', '34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg']
348
349 You could mount media_entries under CloudFileStorage and user_data
350 under BasicFileStorage. Then 1 would be passed to
351 BasicFileStorage under the path ['cwebber', 'avatar.jpg'] and 3
352 would be passed to CloudFileStorage under
353 ['34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg'].
354
355 In other words, this is kind of like mounting /home/ and /etc/
356 under different filesystems on your operating system... but with
357 mediagoblin filestorages :)
255f02c4 358
f016fc65
CAW
359 To set this up, you currently need to call the mount() method with
360 the target path and a backend, that shall be available under that
361 target path. You have to mount things in a sensible order,
255f02c4
E
362 especially you can't mount ["a", "b"] before ["a"].
363 """
68cf996c
E
364 def __init__(self, **kwargs):
365 self.mounttab = {}
366
367 def mount(self, dirpath, backend):
368 """
369 Mount a new backend under dirpath
370 """
371 new_ent = clean_listy_filepath(dirpath)
68cf996c
E
372
373 print "Mounting:", repr(new_ent)
eea31562 374 already, rem_1, table, rem_2 = self._resolve_to_backend(new_ent, True)
620fca54
E
375 print "===", repr(already), repr(rem_1), repr(rem_2), len(table)
376
377 assert (len(rem_2) > 0) or (None not in table), \
378 "That path is already mounted"
379 assert (len(rem_2) > 0) or (len(table)==0), \
380 "A longer path is already mounted here"
68cf996c 381
68cf996c
E
382 for part in rem_2:
383 table[part] = {}
384 table = table[part]
385 table[None] = backend
386
eea31562 387 def _resolve_to_backend(self, filepath, extra_info = False):
68cf996c
E
388 """
389 extra_info = True is for internal use!
390
391 Normally, returns the backend and the filepath inside that backend.
392
393 With extra_info = True it returns the last directory node and the
394 remaining filepath from there in addition.
395 """
396 table = self.mounttab
397 filepath = filepath[:]
398 res_fp = None
399 while True:
400 new_be = table.get(None)
401 if (new_be is not None) or res_fp is None:
402 res_be = new_be
403 res_fp = filepath[:]
404 res_extra = (table, filepath[:])
405 # print "... New res: %r, %r, %r" % (res_be, res_fp, res_extra)
406 if len(filepath) == 0:
407 break
408 query = filepath.pop(0)
409 entry = table.get(query)
410 if entry is not None:
411 table = entry
412 res_extra = (table, filepath[:])
413 else:
414 break
415 if extra_info:
416 return (res_be, res_fp) + res_extra
417 else:
418 return (res_be, res_fp)
419
eea31562
E
420 def resolve_to_backend(self, filepath):
421 backend, filepath = self._resolve_to_backend(filepath)
422 if backend is None:
423 raise Error("Path not mounted")
424 return backend, filepath
425
93b2796c 426 def __repr__(self, table = None, indent = []):
68cf996c
E
427 res = []
428 if table is None:
429 res.append("MountStorage<")
430 table = self.mounttab
431 v = table.get(None)
432 if v:
93b2796c 433 res.append(" " * len(indent) + repr(indent) + ": " + repr(v))
68cf996c
E
434 for k, v in table.iteritems():
435 if k == None:
436 continue
93b2796c
E
437 res.append(" " * len(indent) + repr(k) + ":")
438 res += self.__repr__(v, indent + [k])
68cf996c
E
439 if table is self.mounttab:
440 res.append(">")
441 return "\n".join(res)
442 else:
443 return res
444
937e2c88
E
445 def file_exists(self, filepath):
446 backend, filepath = self.resolve_to_backend(filepath)
447 return backend.file_exists(filepath)
448
449 def get_file(self, filepath, mode='r'):
450 backend, filepath = self.resolve_to_backend(filepath)
451 return backend.get_file(filepath, mode)
452
453 def delete_file(self, filepath):
454 backend, filepath = self.resolve_to_backend(filepath)
455 return backend.delete_file(filepath)
456
457 def file_url(self, filepath):
458 backend, filepath = self.resolve_to_backend(filepath)
459 return backend.file_url(filepath)
460
461 def get_local_path(self, filepath):
462 backend, filepath = self.resolve_to_backend(filepath)
463 return backend.get_local_path(filepath)
464
465 def copy_locally(self, filepath, dest_path):
466 """
467 Need to override copy_locally, because the local_storage
468 attribute is not correct.
469 """
470 backend, filepath = self.resolve_to_backend(filepath)
471 backend.copy_locally(filepath, dest_path)
472
68cf996c 473
d807b725
CAW
474###########
475# Utilities
476###########
477
478def clean_listy_filepath(listy_filepath):
479 """
480 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
481 clean out any nastiness from it.
482
3a89c23e 483
d807b725
CAW
484 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
485 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
486
487 Args:
488 - listy_filepath: a list of filepath components, mediagoblin
489 storage API style.
490
491 Returns:
492 A cleaned list of unicode objects.
493 """
494 cleaned_filepath = [
495 unicode(secure_filename(filepath))
496 for filepath in listy_filepath]
497
498 if u'' in cleaned_filepath:
499 raise InvalidFilepath(
500 "A filename component could not be resolved into a usable name.")
501
502 return cleaned_filepath
503
504
758eb746 505def storage_system_from_config(config_section):
ffa22935 506 """
758eb746 507 Utility for setting up a storage system from a config section.
ffa22935 508
758eb746
E
509 Note that a special argument may be passed in to
510 the config_section which is "storage_class" which will provide an
ffa22935
CAW
511 import path to a storage system. This defaults to
512 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
513
514 Arguments:
758eb746 515 - config_section: dictionary of config parameters
ffa22935
CAW
516
517 Returns:
518 An instantiated storage system.
519
520 Example:
3c7d11ff 521 storage_system_from_config(
758eb746
E
522 {'base_url': '/media/',
523 'base_dir': '/var/whatever/media/'})
ffa22935
CAW
524
525 Will return:
526 BasicFileStorage(
527 base_url='/media/',
528 base_dir='/var/whatever/media')
529 """
56fc7186
E
530 # This construct is needed, because dict(config) does
531 # not replace the variables in the config items.
758eb746 532 config_params = dict(config_section.iteritems())
ffa22935 533
aa797ca1 534 if 'storage_class' in config_params:
ffa22935
CAW
535 storage_class = config_params['storage_class']
536 config_params.pop('storage_class')
537 else:
538 storage_class = "mediagoblin.storage:BasicFileStorage"
539
540 storage_class = util.import_component(storage_class)
541 return storage_class(**config_params)