1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011 Free Software Foundation, Inc
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
24 from werkzeug
.utils
import secure_filename
26 from mediagoblin
import util
33 class Error(Exception):
37 class InvalidFilepath(Error
):
41 class NoWebServing(Error
):
45 class NotImplementedError(Error
):
49 ###############################################
50 # Storage interface & basic file implementation
51 ###############################################
53 class StorageInterface(object):
55 Interface for the storage API.
57 This interface doesn't actually provide behavior, but it defines
58 what kind of storage patterns subclasses should provide.
60 It is important to note that the storage API idea of a "filepath"
61 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
62 mind while reading method documentation.
64 You should set up your __init__ method with whatever keyword
65 arguments are appropriate to your storage system, but you should
66 also passively accept all extraneous keyword arguments like:
68 def __init__(self, **kwargs):
71 See BasicFileStorage as a simple implementation of the
75 # Whether this file store is on the local filesystem.
78 def __raise_not_implemented(self
):
80 Raise a warning about some component not implemented by a
81 subclass of this interface.
83 raise NotImplementedError(
84 "This feature not implemented in this storage API implementation.")
86 def file_exists(self
, filepath
):
88 Return a boolean asserting whether or not file at filepath
89 exists in our storage system.
92 True / False depending on whether file exists or not.
94 # Subclasses should override this method.
95 self
.__raise
_not
_implemented
()
97 def get_file(self
, filepath
, mode
='r'):
99 Return a file-like object for reading/writing from this filepath.
101 Should create directories, buckets, whatever, as necessary.
103 # Subclasses should override this method.
104 self
.__raise
_not
_implemented
()
106 def delete_file(self
, filepath
):
108 Delete or dereference the file at filepath.
110 This might need to delete directories, buckets, whatever, for
111 cleanliness. (Be sure to avoid race conditions on that though)
113 # Subclasses should override this method.
114 self
.__raise
_not
_implemented
()
116 def file_url(self
, filepath
):
118 Get the URL for this file. This assumes our storage has been
119 mounted with some kind of URL which makes this possible.
121 # Subclasses should override this method.
122 self
.__raise
_not
_implemented
()
124 def get_unique_filepath(self
, filepath
):
126 If a filename at filepath already exists, generate a new name.
128 Eg, if the filename doesn't exist:
129 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
130 [u'dir1', u'dir2', u'fname.jpg']
132 But if a file does exist, let's get one back with at uuid tacked on:
133 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
134 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
136 # Make sure we have a clean filepath to start with, since
137 # we'll be possibly tacking on stuff to the filename.
138 filepath
= clean_listy_filepath(filepath
)
140 if self
.file_exists(filepath
):
141 return filepath
[:-1] + ["%s-%s" % (uuid
.uuid4(), filepath
[-1])]
145 def get_local_path(self
, filepath
):
147 If this is a local_storage implementation, give us a link to
148 the local filesystem reference to this file.
150 >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
151 u'/path/to/mounting/foo/bar/baz.jpg'
153 # Subclasses should override this method, if applicable.
154 self
.__raise
_not
_implemented
()
156 def copy_locally(self
, filepath
, dest_path
):
158 Copy this file locally.
160 A basic working method for this is provided that should
161 function both for local_storage systems and remote storge
162 systems, but if more efficient systems for copying locally
163 apply to your system, override this method with something more
166 if self
.local_storage
:
168 self
.get_local_path(filepath
), dest_path
)
170 with self
.get_file(filepath
, 'rb') as source_file
:
171 with
file(dest_path
, 'wb') as dest_file
:
172 dest_file
.write(source_file
.read())
175 class BasicFileStorage(StorageInterface
):
177 Basic local filesystem implementation of storage API
182 def __init__(self
, base_dir
, base_url
=None, **kwargs
):
185 - base_dir: Base directory things will be served out of. MUST
187 - base_url: URL files will be served from
189 self
.base_dir
= base_dir
190 self
.base_url
= base_url
192 def _resolve_filepath(self
, filepath
):
194 Transform the given filepath into a local filesystem filepath.
197 self
.base_dir
, *clean_listy_filepath(filepath
))
199 def file_exists(self
, filepath
):
200 return os
.path
.exists(self
._resolve
_filepath
(filepath
))
202 def get_file(self
, filepath
, mode
='r'):
203 # Make directories if necessary
204 if len(filepath
) > 1:
205 directory
= self
._resolve
_filepath
(filepath
[:-1])
206 if not os
.path
.exists(directory
):
207 os
.makedirs(directory
)
209 # Grab and return the file in the mode specified
210 return open(self
._resolve
_filepath
(filepath
), mode
)
212 def delete_file(self
, filepath
):
213 # TODO: Also delete unused directories if empty (safely, with
214 # checks to avoid race conditions).
215 os
.remove(self
._resolve
_filepath
(filepath
))
217 def file_url(self
, filepath
):
218 if not self
.base_url
:
220 "base_url not set, cannot provide file urls")
222 return urlparse
.urljoin(
224 '/'.join(clean_listy_filepath(filepath
)))
226 def get_local_path(self
, filepath
):
227 return self
._resolve
_filepath
(filepath
)
230 class CloudFilesStorage(StorageInterface
):
231 def __init__(self
, **kwargs
):
232 self
.param_container
= kwargs
.get('cloudfiles_container')
233 self
.param_user
= kwargs
.get('cloudfiles_user')
234 self
.param_api_key
= kwargs
.get('cloudfiles_api_key')
235 self
.param_host
= kwargs
.get('cloudfiles_host')
236 self
.param_use_servicenet
= kwargs
.get('cloudfiles_use_servicenet')
238 if not self
.param_host
:
239 print('No CloudFiles host URL specified, '
240 'defaulting to Rackspace US')
242 self
.connection
= cloudfiles
.get_connection(
243 username
=self
.param_user
,
244 api_key
=self
.param_api_key
,
245 servicenet
=True if self
.param_use_servicenet
== 'true' or \
246 self
.param_use_servicenet
== True else False)
248 if not self
.param_container
== \
249 self
.connection
.get_container(self
.param_container
):
250 self
.container
= self
.connection
.create_container(
251 self
.param_container
)
252 self
.container
.make_public(
255 self
.container
= self
.connection
.get_container(
256 self
.param_container
)
258 def _resolve_filepath(self
, filepath
):
260 clean_listy_filepath(filepath
))
262 def file_exists(self
, filepath
):
264 object = self
.container
.get_object(
265 self
._resolve
_filepath
(filepath
))
267 except cloudfiles
.errors
.NoSuchObject
:
270 def get_file(self
, filepath
, mode
='r'):
272 obj
= self
.container
.get_object(
273 self
._resolve
_filepath
(filepath
))
274 except cloudfiles
.errors
.NoSuchObject
:
275 obj
= self
.container
.create_object(
276 self
._resolve
_filepath
(filepath
))
280 def delete_file(self
, filepath
):
281 # TODO: Also delete unused directories if empty (safely, with
282 # checks to avoid race conditions).
283 self
.container
.delete_object(filepath
)
285 def file_url(self
, filepath
):
286 return self
.get_file(filepath
).public_uri()
289 class MountStorage(StorageInterface
):
291 Experimental "Mount" virtual Storage Interface
293 This isn't an interface to some real storage, instead it's a
294 redirecting interface, that redirects requests to other
297 For example, say you have the paths:
299 1. ['user_data', 'cwebber', 'avatar.jpg']
300 2. ['user_data', 'elrond', 'avatar.jpg']
301 3. ['media_entries', '34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg']
303 You could mount media_entries under CloudFileStorage and user_data
304 under BasicFileStorage. Then 1 would be passed to
305 BasicFileStorage under the path ['cwebber', 'avatar.jpg'] and 3
306 would be passed to CloudFileStorage under
307 ['34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg'].
309 In other words, this is kind of like mounting /home/ and /etc/
310 under different filesystems on your operating system... but with
311 mediagoblin filestorages :)
313 To set this up, you currently need to call the mount() method with
314 the target path and a backend, that shall be available under that
315 target path. You have to mount things in a sensible order,
316 especially you can't mount ["a", "b"] before ["a"].
318 def __init__(self
, **kwargs
):
321 def mount(self
, dirpath
, backend
):
323 Mount a new backend under dirpath
325 new_ent
= clean_listy_filepath(dirpath
)
327 print "Mounting:", repr(new_ent
)
328 already
, rem_1
, table
, rem_2
= self
._resolve
_to
_backend
(new_ent
, True)
329 print "===", repr(already
), repr(rem_1
), repr(rem_2
), len(table
)
331 assert (len(rem_2
) > 0) or (None not in table
), \
332 "That path is already mounted"
333 assert (len(rem_2
) > 0) or (len(table
)==0), \
334 "A longer path is already mounted here"
339 table
[None] = backend
341 def _resolve_to_backend(self
, filepath
, extra_info
= False):
343 extra_info = True is for internal use!
345 Normally, returns the backend and the filepath inside that backend.
347 With extra_info = True it returns the last directory node and the
348 remaining filepath from there in addition.
350 table
= self
.mounttab
351 filepath
= filepath
[:]
354 new_be
= table
.get(None)
355 if (new_be
is not None) or res_fp
is None:
358 res_extra
= (table
, filepath
[:])
359 # print "... New res: %r, %r, %r" % (res_be, res_fp, res_extra)
360 if len(filepath
) == 0:
362 query
= filepath
.pop(0)
363 entry
= table
.get(query
)
364 if entry
is not None:
366 res_extra
= (table
, filepath
[:])
370 return (res_be
, res_fp
) + res_extra
372 return (res_be
, res_fp
)
374 def resolve_to_backend(self
, filepath
):
375 backend
, filepath
= self
._resolve
_to
_backend
(filepath
)
377 raise Error("Path not mounted")
378 return backend
, filepath
380 def __repr__(self
, table
= None, indent
= []):
383 res
.append("MountStorage<")
384 table
= self
.mounttab
387 res
.append(" " * len(indent
) + repr(indent
) + ": " + repr(v
))
388 for k
, v
in table
.iteritems():
391 res
.append(" " * len(indent
) + repr(k
) + ":")
392 res
+= self
.__repr
__(v
, indent
+ [k
])
393 if table
is self
.mounttab
:
395 return "\n".join(res
)
399 def file_exists(self
, filepath
):
400 backend
, filepath
= self
.resolve_to_backend(filepath
)
401 return backend
.file_exists(filepath
)
403 def get_file(self
, filepath
, mode
='r'):
404 backend
, filepath
= self
.resolve_to_backend(filepath
)
405 return backend
.get_file(filepath
, mode
)
407 def delete_file(self
, filepath
):
408 backend
, filepath
= self
.resolve_to_backend(filepath
)
409 return backend
.delete_file(filepath
)
411 def file_url(self
, filepath
):
412 backend
, filepath
= self
.resolve_to_backend(filepath
)
413 return backend
.file_url(filepath
)
415 def get_local_path(self
, filepath
):
416 backend
, filepath
= self
.resolve_to_backend(filepath
)
417 return backend
.get_local_path(filepath
)
419 def copy_locally(self
, filepath
, dest_path
):
421 Need to override copy_locally, because the local_storage
422 attribute is not correct.
424 backend
, filepath
= self
.resolve_to_backend(filepath
)
425 backend
.copy_locally(filepath
, dest_path
)
432 def clean_listy_filepath(listy_filepath
):
434 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
435 clean out any nastiness from it.
438 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
439 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
442 - listy_filepath: a list of filepath components, mediagoblin
446 A cleaned list of unicode objects.
449 unicode(secure_filename(filepath
))
450 for filepath
in listy_filepath
]
452 if u
'' in cleaned_filepath
:
453 raise InvalidFilepath(
454 "A filename component could not be resolved into a usable name.")
456 return cleaned_filepath
459 def storage_system_from_config(paste_config
, storage_prefix
):
461 Utility for setting up a storage system from the paste app config.
463 Note that a special argument may be passed in to the paste_config
464 which is "${storage_prefix}_storage_class" which will provide an
465 import path to a storage system. This defaults to
466 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
469 - paste_config: dictionary of config parameters
470 - storage_prefix: the storage system we're setting up / will be
471 getting keys/arguments from. For example 'publicstore' will
472 grab all arguments that are like 'publicstore_FOO'.
475 An instantiated storage system.
478 storage_system_from_config(
479 {'publicstore_base_url': '/media/',
480 'publicstore_base_dir': '/var/whatever/media/'},
486 base_dir='/var/whatever/media')
488 prefix_re
= re
.compile('^%s_(.+)$' % re
.escape(storage_prefix
))
490 config_params
= dict(
491 [(prefix_re
.match(key
).groups()[0], value
)
492 for key
, value
in paste_config
.iteritems()
493 if prefix_re
.match(key
)])
495 if 'storage_class' in config_params
:
496 storage_class
= config_params
['storage_class']
497 config_params
.pop('storage_class')
499 storage_class
= "mediagoblin.storage:BasicFileStorage"
501 storage_class
= util
.import_component(storage_class
)
502 return storage_class(**config_params
)