1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 from werkzeug
.utils
import secure_filename
27 from mediagoblin
import util
34 class Error(Exception):
38 class InvalidFilepath(Error
):
42 class NoWebServing(Error
):
46 class NotImplementedError(Error
):
50 ###############################################
51 # Storage interface & basic file implementation
52 ###############################################
54 class StorageInterface(object):
56 Interface for the storage API.
58 This interface doesn't actually provide behavior, but it defines
59 what kind of storage patterns subclasses should provide.
61 It is important to note that the storage API idea of a "filepath"
62 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
63 mind while reading method documentation.
65 You should set up your __init__ method with whatever keyword
66 arguments are appropriate to your storage system, but you should
67 also passively accept all extraneous keyword arguments like:
69 def __init__(self, **kwargs):
72 See BasicFileStorage as a simple implementation of the
76 # Whether this file store is on the local filesystem.
79 def __raise_not_implemented(self
):
81 Raise a warning about some component not implemented by a
82 subclass of this interface.
84 raise NotImplementedError(
85 "This feature not implemented in this storage API implementation.")
87 def file_exists(self
, filepath
):
89 Return a boolean asserting whether or not file at filepath
90 exists in our storage system.
93 True / False depending on whether file exists or not.
95 # Subclasses should override this method.
96 self
.__raise
_not
_implemented
()
98 def get_file(self
, filepath
, mode
='r'):
100 Return a file-like object for reading/writing from this filepath.
102 Should create directories, buckets, whatever, as necessary.
104 # Subclasses should override this method.
105 self
.__raise
_not
_implemented
()
107 def delete_file(self
, filepath
):
109 Delete or dereference the file at filepath.
111 This might need to delete directories, buckets, whatever, for
112 cleanliness. (Be sure to avoid race conditions on that though)
114 # Subclasses should override this method.
115 self
.__raise
_not
_implemented
()
117 def file_url(self
, filepath
):
119 Get the URL for this file. This assumes our storage has been
120 mounted with some kind of URL which makes this possible.
122 # Subclasses should override this method.
123 self
.__raise
_not
_implemented
()
125 def get_unique_filepath(self
, filepath
):
127 If a filename at filepath already exists, generate a new name.
129 Eg, if the filename doesn't exist:
130 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
131 [u'dir1', u'dir2', u'fname.jpg']
133 But if a file does exist, let's get one back with at uuid tacked on:
134 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
135 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
137 # Make sure we have a clean filepath to start with, since
138 # we'll be possibly tacking on stuff to the filename.
139 filepath
= clean_listy_filepath(filepath
)
141 if self
.file_exists(filepath
):
142 return filepath
[:-1] + ["%s-%s" % (uuid
.uuid4(), filepath
[-1])]
146 def get_local_path(self
, filepath
):
148 If this is a local_storage implementation, give us a link to
149 the local filesystem reference to this file.
151 >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
152 u'/path/to/mounting/foo/bar/baz.jpg'
154 # Subclasses should override this method, if applicable.
155 self
.__raise
_not
_implemented
()
157 def copy_locally(self
, filepath
, dest_path
):
159 Copy this file locally.
161 A basic working method for this is provided that should
162 function both for local_storage systems and remote storge
163 systems, but if more efficient systems for copying locally
164 apply to your system, override this method with something more
167 if self
.local_storage
:
169 self
.get_local_path(filepath
), dest_path
)
171 with self
.get_file(filepath
, 'rb') as source_file
:
172 with
file(dest_path
, 'wb') as dest_file
:
173 dest_file
.write(source_file
.read())
176 class BasicFileStorage(StorageInterface
):
178 Basic local filesystem implementation of storage API
183 def __init__(self
, base_dir
, base_url
=None, **kwargs
):
186 - base_dir: Base directory things will be served out of. MUST
188 - base_url: URL files will be served from
190 self
.base_dir
= base_dir
191 self
.base_url
= base_url
193 def _resolve_filepath(self
, filepath
):
195 Transform the given filepath into a local filesystem filepath.
198 self
.base_dir
, *clean_listy_filepath(filepath
))
200 def file_exists(self
, filepath
):
201 return os
.path
.exists(self
._resolve
_filepath
(filepath
))
203 def get_file(self
, filepath
, mode
='r'):
204 # Make directories if necessary
205 if len(filepath
) > 1:
206 directory
= self
._resolve
_filepath
(filepath
[:-1])
207 if not os
.path
.exists(directory
):
208 os
.makedirs(directory
)
210 # Grab and return the file in the mode specified
211 return open(self
._resolve
_filepath
(filepath
), mode
)
213 def delete_file(self
, filepath
):
214 # TODO: Also delete unused directories if empty (safely, with
215 # checks to avoid race conditions).
216 os
.remove(self
._resolve
_filepath
(filepath
))
218 def file_url(self
, filepath
):
219 if not self
.base_url
:
221 "base_url not set, cannot provide file urls")
223 return urlparse
.urljoin(
225 '/'.join(clean_listy_filepath(filepath
)))
227 def get_local_path(self
, filepath
):
228 return self
._resolve
_filepath
(filepath
)
231 # ----------------------------------------------------
232 # OpenStack/Rackspace Cloud's Swift/CloudFiles support
233 # ----------------------------------------------------
235 class CloudFilesStorage(StorageInterface
):
236 def __init__(self
, **kwargs
):
237 self
.param_container
= kwargs
.get('cloudfiles_container')
238 self
.param_user
= kwargs
.get('cloudfiles_user')
239 self
.param_api_key
= kwargs
.get('cloudfiles_api_key')
240 self
.param_host
= kwargs
.get('cloudfiles_host')
241 self
.param_use_servicenet
= kwargs
.get('cloudfiles_use_servicenet')
243 if not self
.param_host
:
244 print('No CloudFiles host URL specified, '
245 'defaulting to Rackspace US')
247 self
.connection
= cloudfiles
.get_connection(
248 username
=self
.param_user
,
249 api_key
=self
.param_api_key
,
250 servicenet
=True if self
.param_use_servicenet
== 'true' or \
251 self
.param_use_servicenet
== True else False)
253 if not self
.param_container
== \
254 self
.connection
.get_container(self
.param_container
):
255 self
.container
= self
.connection
.create_container(
256 self
.param_container
)
257 self
.container
.make_public(
260 self
.container
= self
.connection
.get_container(
261 self
.param_container
)
263 self
.container_uri
= self
.container
.public_uri()
265 def _resolve_filepath(self
, filepath
):
267 clean_listy_filepath(filepath
))
269 def file_exists(self
, filepath
):
271 object = self
.container
.get_object(
272 self
._resolve
_filepath
(filepath
))
274 except cloudfiles
.errors
.NoSuchObject
:
277 def get_file(self
, filepath
, *args
, **kwargs
):
279 - Doesn't care about the "mode" argument
282 obj
= self
.container
.get_object(
283 self
._resolve
_filepath
(filepath
))
284 except cloudfiles
.errors
.NoSuchObject
:
285 obj
= self
.container
.create_object(
286 self
._resolve
_filepath
(filepath
))
288 mimetype
= mimetypes
.guess_type(
292 obj
.content_type
= mimetype
[0]
294 return StorageObjectWrapper(obj
, *args
, **kwargs
)
296 def delete_file(self
, filepath
):
297 # TODO: Also delete unused directories if empty (safely, with
298 # checks to avoid race conditions).
299 self
.container
.delete_object(
300 self
._resolve
_filepath
(filepath
))
302 def file_url(self
, filepath
):
305 self
._resolve
_filepath
(filepath
)])
308 class StorageObjectWrapper():
310 Wrapper for python-cloudfiles's cloudfiles.storage_object.Object
311 used to circumvent the mystic `medium.jpg` corruption issue, where
312 we had both python-cloudfiles and PIL doing buffering on both
313 ends and that breaking things.
315 This wrapper currently meets mediagoblin's needs for a public_store
318 def __init__(self
, storage_object
, *args
, **kwargs
):
319 self
.storage_object
= storage_object
321 def read(self
, *args
, **kwargs
):
322 return self
.storage_object
.read(*args
, **kwargs
)
324 def write(self
, data
, *args
, **kwargs
):
325 if self
.storage_object
.size
and type(data
) == str:
326 data
= self
.read() + data
328 self
.storage_object
.write(data
, *args
, **kwargs
)
335 class MountStorage(StorageInterface
):
337 Experimental "Mount" virtual Storage Interface
339 This isn't an interface to some real storage, instead it's a
340 redirecting interface, that redirects requests to other
343 For example, say you have the paths:
345 1. ['user_data', 'cwebber', 'avatar.jpg']
346 2. ['user_data', 'elrond', 'avatar.jpg']
347 3. ['media_entries', '34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg']
349 You could mount media_entries under CloudFileStorage and user_data
350 under BasicFileStorage. Then 1 would be passed to
351 BasicFileStorage under the path ['cwebber', 'avatar.jpg'] and 3
352 would be passed to CloudFileStorage under
353 ['34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg'].
355 In other words, this is kind of like mounting /home/ and /etc/
356 under different filesystems on your operating system... but with
357 mediagoblin filestorages :)
359 To set this up, you currently need to call the mount() method with
360 the target path and a backend, that shall be available under that
361 target path. You have to mount things in a sensible order,
362 especially you can't mount ["a", "b"] before ["a"].
364 def __init__(self
, **kwargs
):
367 def mount(self
, dirpath
, backend
):
369 Mount a new backend under dirpath
371 new_ent
= clean_listy_filepath(dirpath
)
373 print "Mounting:", repr(new_ent
)
374 already
, rem_1
, table
, rem_2
= self
._resolve
_to
_backend
(new_ent
, True)
375 print "===", repr(already
), repr(rem_1
), repr(rem_2
), len(table
)
377 assert (len(rem_2
) > 0) or (None not in table
), \
378 "That path is already mounted"
379 assert (len(rem_2
) > 0) or (len(table
)==0), \
380 "A longer path is already mounted here"
385 table
[None] = backend
387 def _resolve_to_backend(self
, filepath
, extra_info
= False):
389 extra_info = True is for internal use!
391 Normally, returns the backend and the filepath inside that backend.
393 With extra_info = True it returns the last directory node and the
394 remaining filepath from there in addition.
396 table
= self
.mounttab
397 filepath
= filepath
[:]
400 new_be
= table
.get(None)
401 if (new_be
is not None) or res_fp
is None:
404 res_extra
= (table
, filepath
[:])
405 # print "... New res: %r, %r, %r" % (res_be, res_fp, res_extra)
406 if len(filepath
) == 0:
408 query
= filepath
.pop(0)
409 entry
= table
.get(query
)
410 if entry
is not None:
412 res_extra
= (table
, filepath
[:])
416 return (res_be
, res_fp
) + res_extra
418 return (res_be
, res_fp
)
420 def resolve_to_backend(self
, filepath
):
421 backend
, filepath
= self
._resolve
_to
_backend
(filepath
)
423 raise Error("Path not mounted")
424 return backend
, filepath
426 def __repr__(self
, table
= None, indent
= []):
429 res
.append("MountStorage<")
430 table
= self
.mounttab
433 res
.append(" " * len(indent
) + repr(indent
) + ": " + repr(v
))
434 for k
, v
in table
.iteritems():
437 res
.append(" " * len(indent
) + repr(k
) + ":")
438 res
+= self
.__repr
__(v
, indent
+ [k
])
439 if table
is self
.mounttab
:
441 return "\n".join(res
)
445 def file_exists(self
, filepath
):
446 backend
, filepath
= self
.resolve_to_backend(filepath
)
447 return backend
.file_exists(filepath
)
449 def get_file(self
, filepath
, mode
='r'):
450 backend
, filepath
= self
.resolve_to_backend(filepath
)
451 return backend
.get_file(filepath
, mode
)
453 def delete_file(self
, filepath
):
454 backend
, filepath
= self
.resolve_to_backend(filepath
)
455 return backend
.delete_file(filepath
)
457 def file_url(self
, filepath
):
458 backend
, filepath
= self
.resolve_to_backend(filepath
)
459 return backend
.file_url(filepath
)
461 def get_local_path(self
, filepath
):
462 backend
, filepath
= self
.resolve_to_backend(filepath
)
463 return backend
.get_local_path(filepath
)
465 def copy_locally(self
, filepath
, dest_path
):
467 Need to override copy_locally, because the local_storage
468 attribute is not correct.
470 backend
, filepath
= self
.resolve_to_backend(filepath
)
471 backend
.copy_locally(filepath
, dest_path
)
478 def clean_listy_filepath(listy_filepath
):
480 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
481 clean out any nastiness from it.
484 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
485 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
488 - listy_filepath: a list of filepath components, mediagoblin
492 A cleaned list of unicode objects.
495 unicode(secure_filename(filepath
))
496 for filepath
in listy_filepath
]
498 if u
'' in cleaned_filepath
:
499 raise InvalidFilepath(
500 "A filename component could not be resolved into a usable name.")
502 return cleaned_filepath
505 def storage_system_from_config(config_section
):
507 Utility for setting up a storage system from a config section.
509 Note that a special argument may be passed in to
510 the config_section which is "storage_class" which will provide an
511 import path to a storage system. This defaults to
512 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
515 - config_section: dictionary of config parameters
518 An instantiated storage system.
521 storage_system_from_config(
522 {'base_url': '/media/',
523 'base_dir': '/var/whatever/media/'})
528 base_dir='/var/whatever/media')
530 # This construct is needed, because dict(config) does
531 # not replace the variables in the config items.
532 config_params
= dict(config_section
.iteritems())
534 if 'storage_class' in config_params
:
535 storage_class
= config_params
['storage_class']
536 config_params
.pop('storage_class')
538 storage_class
= "mediagoblin.storage:BasicFileStorage"
540 storage_class
= util
.import_component(storage_class
)
541 return storage_class(**config_params
)