Feature #571 - Closing storage objects - Removed closing(), renamed
[mediagoblin.git] / mediagoblin / storage.py
CommitLineData
8e1e744d 1# GNU MediaGoblin -- federated, autonomous media hosting
12a100e4 2# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
a6b378ef
CAW
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
779f2b94 17import os
6a07362d 18import shutil
f61a41b8 19import urlparse
2fdec827 20import uuid
851c51a3 21import cloudfiles
13b9f054 22import mimetypes
b5e7b967 23import tempfile
a6b378ef
CAW
24
25from werkzeug.utils import secure_filename
26
ffa22935
CAW
27from mediagoblin import util
28
d807b725
CAW
29########
30# Errors
31########
a6b378ef 32
770c12be 33
aa797ca1
JW
34class Error(Exception):
35 pass
36
37
38class InvalidFilepath(Error):
39 pass
40
41
42class NoWebServing(Error):
43 pass
44
45
46class NotImplementedError(Error):
47 pass
797be93c 48
770c12be 49
d807b725
CAW
50###############################################
51# Storage interface & basic file implementation
52###############################################
a6b378ef 53
797be93c
CAW
54class StorageInterface(object):
55 """
56 Interface for the storage API.
57
58 This interface doesn't actually provide behavior, but it defines
59 what kind of storage patterns subclasses should provide.
60
61 It is important to note that the storage API idea of a "filepath"
62 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
63 mind while reading method documentation.
5afb9227
CAW
64
65 You should set up your __init__ method with whatever keyword
66 arguments are appropriate to your storage system, but you should
67 also passively accept all extraneous keyword arguments like:
68
69 def __init__(self, **kwargs):
70 pass
71
72 See BasicFileStorage as a simple implementation of the
73 StorageInterface.
797be93c 74 """
797be93c 75
3a89c23e
CAW
76 # Whether this file store is on the local filesystem.
77 local_storage = False
78
797be93c
CAW
79 def __raise_not_implemented(self):
80 """
81 Raise a warning about some component not implemented by a
82 subclass of this interface.
83 """
84 raise NotImplementedError(
85 "This feature not implemented in this storage API implementation.")
86
87 def file_exists(self, filepath):
88 """
89 Return a boolean asserting whether or not file at filepath
90 exists in our storage system.
91
92 Returns:
93 True / False depending on whether file exists or not.
94 """
95 # Subclasses should override this method.
96 self.__raise_not_implemented()
97
cee7a1c1 98 def get_file(self, filepath, mode='r'):
b0de01cf
CAW
99 """
100 Return a file-like object for reading/writing from this filepath.
101
102 Should create directories, buckets, whatever, as necessary.
103 """
0b9cf289
CAW
104 # Subclasses should override this method.
105 self.__raise_not_implemented()
106
107 def delete_file(self, filepath):
b0de01cf
CAW
108 """
109 Delete or dereference the file at filepath.
110
111 This might need to delete directories, buckets, whatever, for
112 cleanliness. (Be sure to avoid race conditions on that though)
113 """
0b9cf289
CAW
114 # Subclasses should override this method.
115 self.__raise_not_implemented()
116
f61a41b8 117 def file_url(self, filepath):
644614d4
CAW
118 """
119 Get the URL for this file. This assumes our storage has been
120 mounted with some kind of URL which makes this possible.
121 """
122 # Subclasses should override this method.
123 self.__raise_not_implemented()
124
2d1a6073 125 def get_unique_filepath(self, filepath):
797be93c
CAW
126 """
127 If a filename at filepath already exists, generate a new name.
128
129 Eg, if the filename doesn't exist:
130 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
131 [u'dir1', u'dir2', u'fname.jpg']
aa797ca1 132
797be93c
CAW
133 But if a file does exist, let's get one back with at uuid tacked on:
134 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
ef10e3a2 135 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
797be93c 136 """
b0bfb766
CAW
137 # Make sure we have a clean filepath to start with, since
138 # we'll be possibly tacking on stuff to the filename.
139 filepath = clean_listy_filepath(filepath)
140
0b9cf289
CAW
141 if self.file_exists(filepath):
142 return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
143 else:
144 return filepath
779f2b94 145
3a89c23e
CAW
146 def get_local_path(self, filepath):
147 """
148 If this is a local_storage implementation, give us a link to
149 the local filesystem reference to this file.
150
151 >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
152 u'/path/to/mounting/foo/bar/baz.jpg'
153 """
154 # Subclasses should override this method, if applicable.
155 self.__raise_not_implemented()
156
6a07362d
CAW
157 def copy_locally(self, filepath, dest_path):
158 """
159 Copy this file locally.
160
161 A basic working method for this is provided that should
162 function both for local_storage systems and remote storge
163 systems, but if more efficient systems for copying locally
164 apply to your system, override this method with something more
165 appropriate.
166 """
167 if self.local_storage:
168 shutil.copy(
169 self.get_local_path(filepath), dest_path)
170 else:
171 with self.get_file(filepath, 'rb') as source_file:
172 with file(dest_path, 'wb') as dest_file:
173 dest_file.write(source_file.read())
174
779f2b94
CAW
175
176class BasicFileStorage(StorageInterface):
177 """
178 Basic local filesystem implementation of storage API
179 """
180
3a89c23e
CAW
181 local_storage = True
182
5afb9227 183 def __init__(self, base_dir, base_url=None, **kwargs):
779f2b94
CAW
184 """
185 Keyword arguments:
186 - base_dir: Base directory things will be served out of. MUST
187 be an absolute path.
b1bb050b 188 - base_url: URL files will be served from
779f2b94
CAW
189 """
190 self.base_dir = base_dir
b1bb050b 191 self.base_url = base_url
779f2b94
CAW
192
193 def _resolve_filepath(self, filepath):
194 """
195 Transform the given filepath into a local filesystem filepath.
196 """
197 return os.path.join(
198 self.base_dir, *clean_listy_filepath(filepath))
aa797ca1 199
779f2b94
CAW
200 def file_exists(self, filepath):
201 return os.path.exists(self._resolve_filepath(filepath))
202
cee7a1c1
CAW
203 def get_file(self, filepath, mode='r'):
204 # Make directories if necessary
205 if len(filepath) > 1:
206 directory = self._resolve_filepath(filepath[:-1])
d0e3a534 207 if not os.path.exists(directory):
cee7a1c1
CAW
208 os.makedirs(directory)
209
210 # Grab and return the file in the mode specified
211 return open(self._resolve_filepath(filepath), mode)
212
779f2b94 213 def delete_file(self, filepath):
b1bb050b
CAW
214 # TODO: Also delete unused directories if empty (safely, with
215 # checks to avoid race conditions).
216 os.remove(self._resolve_filepath(filepath))
644614d4 217
f61a41b8 218 def file_url(self, filepath):
b1bb050b
CAW
219 if not self.base_url:
220 raise NoWebServing(
221 "base_url not set, cannot provide file urls")
222
223 return urlparse.urljoin(
224 self.base_url,
225 '/'.join(clean_listy_filepath(filepath)))
ffa22935 226
3a89c23e
CAW
227 def get_local_path(self, filepath):
228 return self._resolve_filepath(filepath)
229
ffa22935 230
4dea708c
CAW
231# ----------------------------------------------------
232# OpenStack/Rackspace Cloud's Swift/CloudFiles support
233# ----------------------------------------------------
b5e7b967 234
4dea708c 235class CloudFilesStorage(StorageInterface):
aa797ca1
JW
236 def __init__(self, **kwargs):
237 self.param_container = kwargs.get('cloudfiles_container')
238 self.param_user = kwargs.get('cloudfiles_user')
239 self.param_api_key = kwargs.get('cloudfiles_api_key')
240 self.param_host = kwargs.get('cloudfiles_host')
241 self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet')
242
243 if not self.param_host:
244 print('No CloudFiles host URL specified, '
245 'defaulting to Rackspace US')
246
247 self.connection = cloudfiles.get_connection(
248 username=self.param_user,
249 api_key=self.param_api_key,
250 servicenet=True if self.param_use_servicenet == 'true' or \
251 self.param_use_servicenet == True else False)
252
253 if not self.param_container == \
254 self.connection.get_container(self.param_container):
255 self.container = self.connection.create_container(
256 self.param_container)
257 self.container.make_public(
258 ttl=60 * 60 * 2)
259 else:
260 self.container = self.connection.get_container(
261 self.param_container)
262
026074af
JW
263 self.container_uri = self.container.public_uri()
264
aa797ca1
JW
265 def _resolve_filepath(self, filepath):
266 return '/'.join(
267 clean_listy_filepath(filepath))
268
269 def file_exists(self, filepath):
270 try:
271 object = self.container.get_object(
272 self._resolve_filepath(filepath))
273 return True
274 except cloudfiles.errors.NoSuchObject:
275 return False
276
bf821aef 277 def get_file(self, filepath, *args, **kwargs):
b5e7b967
JW
278 """
279 - Doesn't care about the "mode" argument
280 """
aa797ca1
JW
281 try:
282 obj = self.container.get_object(
283 self._resolve_filepath(filepath))
284 except cloudfiles.errors.NoSuchObject:
285 obj = self.container.create_object(
286 self._resolve_filepath(filepath))
287
13b9f054
JW
288 mimetype = mimetypes.guess_type(
289 filepath[-1])
290
291 if mimetype:
292 obj.content_type = mimetype[0]
293
c2b862d1 294 return CloudFilesStorageObjectWrapper(obj, *args, **kwargs)
aa797ca1
JW
295
296 def delete_file(self, filepath):
297 # TODO: Also delete unused directories if empty (safely, with
298 # checks to avoid race conditions).
502073f2
JW
299 self.container.delete_object(
300 self._resolve_filepath(filepath))
aa797ca1
JW
301
302 def file_url(self, filepath):
026074af
JW
303 return '/'.join([
304 self.container_uri,
305 self._resolve_filepath(filepath)])
aa797ca1
JW
306
307
c2b862d1 308class CloudFilesStorageObjectWrapper():
4dea708c
CAW
309 """
310 Wrapper for python-cloudfiles's cloudfiles.storage_object.Object
311 used to circumvent the mystic `medium.jpg` corruption issue, where
312 we had both python-cloudfiles and PIL doing buffering on both
313 ends and that breaking things.
314
315 This wrapper currently meets mediagoblin's needs for a public_store
316 file-like object.
317 """
bf821aef 318 def __init__(self, storage_object, *args, **kwargs):
4dea708c
CAW
319 self.storage_object = storage_object
320
321 def read(self, *args, **kwargs):
322 return self.storage_object.read(*args, **kwargs)
323
324 def write(self, data, *args, **kwargs):
c2b862d1
JW
325 """
326 write data to the cloudfiles storage object
327
328 The original motivation for this wrapper is to ensure
329 that buffered writing to a cloudfiles storage object does not overwrite
330 any preexisting data.
331
332 Currently this method does not support any write modes except "append".
333 However if we should need it it would be easy implement.
334 """
4dea708c
CAW
335 if self.storage_object.size and type(data) == str:
336 data = self.read() + data
337
338 self.storage_object.write(data, *args, **kwargs)
339
c2b862d1
JW
340 def close(self):
341 pass
342
343 def __enter__(self):
344 """
345 Context Manager API implementation
346 http://docs.python.org/library/stdtypes.html#context-manager-types
347 """
348 return self
349
350 def __exit__(self, *args):
351 """
352 Context Manger API implementation
353 see self.__enter__()
354 """
355 self.close()
356
4dea708c
CAW
357
358# ------------
359# MountStorage
360# ------------
361
68cf996c 362class MountStorage(StorageInterface):
255f02c4
E
363 """
364 Experimental "Mount" virtual Storage Interface
365
f016fc65
CAW
366 This isn't an interface to some real storage, instead it's a
367 redirecting interface, that redirects requests to other
368 "StorageInterface"s.
369
370 For example, say you have the paths:
371
372 1. ['user_data', 'cwebber', 'avatar.jpg']
373 2. ['user_data', 'elrond', 'avatar.jpg']
374 3. ['media_entries', '34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg']
375
376 You could mount media_entries under CloudFileStorage and user_data
377 under BasicFileStorage. Then 1 would be passed to
378 BasicFileStorage under the path ['cwebber', 'avatar.jpg'] and 3
379 would be passed to CloudFileStorage under
380 ['34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg'].
381
382 In other words, this is kind of like mounting /home/ and /etc/
383 under different filesystems on your operating system... but with
384 mediagoblin filestorages :)
255f02c4 385
f016fc65
CAW
386 To set this up, you currently need to call the mount() method with
387 the target path and a backend, that shall be available under that
388 target path. You have to mount things in a sensible order,
255f02c4
E
389 especially you can't mount ["a", "b"] before ["a"].
390 """
68cf996c
E
391 def __init__(self, **kwargs):
392 self.mounttab = {}
393
394 def mount(self, dirpath, backend):
395 """
396 Mount a new backend under dirpath
397 """
398 new_ent = clean_listy_filepath(dirpath)
68cf996c
E
399
400 print "Mounting:", repr(new_ent)
eea31562 401 already, rem_1, table, rem_2 = self._resolve_to_backend(new_ent, True)
620fca54
E
402 print "===", repr(already), repr(rem_1), repr(rem_2), len(table)
403
404 assert (len(rem_2) > 0) or (None not in table), \
405 "That path is already mounted"
406 assert (len(rem_2) > 0) or (len(table)==0), \
407 "A longer path is already mounted here"
68cf996c 408
68cf996c
E
409 for part in rem_2:
410 table[part] = {}
411 table = table[part]
412 table[None] = backend
413
eea31562 414 def _resolve_to_backend(self, filepath, extra_info = False):
68cf996c
E
415 """
416 extra_info = True is for internal use!
417
418 Normally, returns the backend and the filepath inside that backend.
419
420 With extra_info = True it returns the last directory node and the
421 remaining filepath from there in addition.
422 """
423 table = self.mounttab
424 filepath = filepath[:]
425 res_fp = None
426 while True:
427 new_be = table.get(None)
428 if (new_be is not None) or res_fp is None:
429 res_be = new_be
430 res_fp = filepath[:]
431 res_extra = (table, filepath[:])
432 # print "... New res: %r, %r, %r" % (res_be, res_fp, res_extra)
433 if len(filepath) == 0:
434 break
435 query = filepath.pop(0)
436 entry = table.get(query)
437 if entry is not None:
438 table = entry
439 res_extra = (table, filepath[:])
440 else:
441 break
442 if extra_info:
443 return (res_be, res_fp) + res_extra
444 else:
445 return (res_be, res_fp)
446
eea31562
E
447 def resolve_to_backend(self, filepath):
448 backend, filepath = self._resolve_to_backend(filepath)
449 if backend is None:
450 raise Error("Path not mounted")
451 return backend, filepath
452
93b2796c 453 def __repr__(self, table = None, indent = []):
68cf996c
E
454 res = []
455 if table is None:
456 res.append("MountStorage<")
457 table = self.mounttab
458 v = table.get(None)
459 if v:
93b2796c 460 res.append(" " * len(indent) + repr(indent) + ": " + repr(v))
68cf996c
E
461 for k, v in table.iteritems():
462 if k == None:
463 continue
93b2796c
E
464 res.append(" " * len(indent) + repr(k) + ":")
465 res += self.__repr__(v, indent + [k])
68cf996c
E
466 if table is self.mounttab:
467 res.append(">")
468 return "\n".join(res)
469 else:
470 return res
471
937e2c88
E
472 def file_exists(self, filepath):
473 backend, filepath = self.resolve_to_backend(filepath)
474 return backend.file_exists(filepath)
475
476 def get_file(self, filepath, mode='r'):
477 backend, filepath = self.resolve_to_backend(filepath)
478 return backend.get_file(filepath, mode)
479
480 def delete_file(self, filepath):
481 backend, filepath = self.resolve_to_backend(filepath)
482 return backend.delete_file(filepath)
483
484 def file_url(self, filepath):
485 backend, filepath = self.resolve_to_backend(filepath)
486 return backend.file_url(filepath)
487
488 def get_local_path(self, filepath):
489 backend, filepath = self.resolve_to_backend(filepath)
490 return backend.get_local_path(filepath)
491
492 def copy_locally(self, filepath, dest_path):
493 """
494 Need to override copy_locally, because the local_storage
495 attribute is not correct.
496 """
497 backend, filepath = self.resolve_to_backend(filepath)
498 backend.copy_locally(filepath, dest_path)
499
68cf996c 500
d807b725
CAW
501###########
502# Utilities
503###########
504
505def clean_listy_filepath(listy_filepath):
506 """
507 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
508 clean out any nastiness from it.
509
3a89c23e 510
d807b725
CAW
511 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
512 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
513
514 Args:
515 - listy_filepath: a list of filepath components, mediagoblin
516 storage API style.
517
518 Returns:
519 A cleaned list of unicode objects.
520 """
521 cleaned_filepath = [
522 unicode(secure_filename(filepath))
523 for filepath in listy_filepath]
524
525 if u'' in cleaned_filepath:
526 raise InvalidFilepath(
527 "A filename component could not be resolved into a usable name.")
528
529 return cleaned_filepath
530
531
758eb746 532def storage_system_from_config(config_section):
ffa22935 533 """
758eb746 534 Utility for setting up a storage system from a config section.
ffa22935 535
758eb746
E
536 Note that a special argument may be passed in to
537 the config_section which is "storage_class" which will provide an
ffa22935
CAW
538 import path to a storage system. This defaults to
539 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
540
541 Arguments:
758eb746 542 - config_section: dictionary of config parameters
ffa22935
CAW
543
544 Returns:
545 An instantiated storage system.
546
547 Example:
3c7d11ff 548 storage_system_from_config(
758eb746
E
549 {'base_url': '/media/',
550 'base_dir': '/var/whatever/media/'})
ffa22935
CAW
551
552 Will return:
553 BasicFileStorage(
554 base_url='/media/',
555 base_dir='/var/whatever/media')
556 """
56fc7186
E
557 # This construct is needed, because dict(config) does
558 # not replace the variables in the config items.
758eb746 559 config_params = dict(config_section.iteritems())
ffa22935 560
aa797ca1 561 if 'storage_class' in config_params:
ffa22935
CAW
562 storage_class = config_params['storage_class']
563 config_params.pop('storage_class')
564 else:
565 storage_class = "mediagoblin.storage:BasicFileStorage"
566
567 storage_class = util.import_component(storage_class)
568 return storage_class(**config_params)