d994268b94b729231bf0b24ca389a1cb7fe2e3c6
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011 Free Software Foundation, Inc
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 from werkzeug
.utils
import secure_filename
25 from mediagoblin
import util
31 class Error(Exception): pass
32 class InvalidFilepath(Error
): pass
33 class NoWebServing(Error
): pass
35 class NotImplementedError(Error
): pass
38 ###############################################
39 # Storage interface & basic file implementation
40 ###############################################
42 class StorageInterface(object):
44 Interface for the storage API.
46 This interface doesn't actually provide behavior, but it defines
47 what kind of storage patterns subclasses should provide.
49 It is important to note that the storage API idea of a "filepath"
50 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
51 mind while reading method documentation.
53 You should set up your __init__ method with whatever keyword
54 arguments are appropriate to your storage system, but you should
55 also passively accept all extraneous keyword arguments like:
57 def __init__(self, **kwargs):
60 See BasicFileStorage as a simple implementation of the
64 # Whether this file store is on the local filesystem.
67 def __raise_not_implemented(self
):
69 Raise a warning about some component not implemented by a
70 subclass of this interface.
72 raise NotImplementedError(
73 "This feature not implemented in this storage API implementation.")
75 def file_exists(self
, filepath
):
77 Return a boolean asserting whether or not file at filepath
78 exists in our storage system.
81 True / False depending on whether file exists or not.
83 # Subclasses should override this method.
84 self
.__raise
_not
_implemented
()
86 def get_file(self
, filepath
, mode
='r'):
88 Return a file-like object for reading/writing from this filepath.
90 Should create directories, buckets, whatever, as necessary.
92 # Subclasses should override this method.
93 self
.__raise
_not
_implemented
()
95 def delete_file(self
, filepath
):
97 Delete or dereference the file at filepath.
99 This might need to delete directories, buckets, whatever, for
100 cleanliness. (Be sure to avoid race conditions on that though)
102 # Subclasses should override this method.
103 self
.__raise
_not
_implemented
()
105 def file_url(self
, filepath
):
107 Get the URL for this file. This assumes our storage has been
108 mounted with some kind of URL which makes this possible.
110 # Subclasses should override this method.
111 self
.__raise
_not
_implemented
()
113 def get_unique_filepath(self
, filepath
):
115 If a filename at filepath already exists, generate a new name.
117 Eg, if the filename doesn't exist:
118 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
119 [u'dir1', u'dir2', u'fname.jpg']
121 But if a file does exist, let's get one back with at uuid tacked on:
122 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
123 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
125 # Make sure we have a clean filepath to start with, since
126 # we'll be possibly tacking on stuff to the filename.
127 filepath
= clean_listy_filepath(filepath
)
129 if self
.file_exists(filepath
):
130 return filepath
[:-1] + ["%s-%s" % (uuid
.uuid4(), filepath
[-1])]
134 def get_local_path(self
, filepath
):
136 If this is a local_storage implementation, give us a link to
137 the local filesystem reference to this file.
139 >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
140 u'/path/to/mounting/foo/bar/baz.jpg'
142 # Subclasses should override this method, if applicable.
143 self
.__raise
_not
_implemented
()
145 def copy_locally(self
, filepath
, dest_path
):
147 Copy this file locally.
149 A basic working method for this is provided that should
150 function both for local_storage systems and remote storge
151 systems, but if more efficient systems for copying locally
152 apply to your system, override this method with something more
155 if self
.local_storage
:
157 self
.get_local_path(filepath
), dest_path
)
159 with self
.get_file(filepath
, 'rb') as source_file
:
160 with
file(dest_path
, 'wb') as dest_file
:
161 dest_file
.write(source_file
.read())
164 class BasicFileStorage(StorageInterface
):
166 Basic local filesystem implementation of storage API
171 def __init__(self
, base_dir
, base_url
=None, **kwargs
):
174 - base_dir: Base directory things will be served out of. MUST
176 - base_url: URL files will be served from
178 self
.base_dir
= base_dir
179 self
.base_url
= base_url
181 def _resolve_filepath(self
, filepath
):
183 Transform the given filepath into a local filesystem filepath.
186 self
.base_dir
, *clean_listy_filepath(filepath
))
188 def file_exists(self
, filepath
):
189 return os
.path
.exists(self
._resolve
_filepath
(filepath
))
191 def get_file(self
, filepath
, mode
='r'):
192 # Make directories if necessary
193 if len(filepath
) > 1:
194 directory
= self
._resolve
_filepath
(filepath
[:-1])
195 if not os
.path
.exists(directory
):
196 os
.makedirs(directory
)
198 # Grab and return the file in the mode specified
199 return open(self
._resolve
_filepath
(filepath
), mode
)
201 def delete_file(self
, filepath
):
202 # TODO: Also delete unused directories if empty (safely, with
203 # checks to avoid race conditions).
204 os
.remove(self
._resolve
_filepath
(filepath
))
206 def file_url(self
, filepath
):
207 if not self
.base_url
:
209 "base_url not set, cannot provide file urls")
211 return urlparse
.urljoin(
213 '/'.join(clean_listy_filepath(filepath
)))
215 def get_local_path(self
, filepath
):
216 return self
._resolve
_filepath
(filepath
)
219 class MountStorage(StorageInterface
):
220 def __init__(self
, **kwargs
):
223 def mount(self
, dirpath
, backend
):
225 Mount a new backend under dirpath
227 new_ent
= clean_listy_filepath(dirpath
)
230 print "Mounting:", repr(new_ent
)
231 already
, rem_1
, table
, rem_2
= self
.resolve_to_backend(new_ent
, True)
232 print "===", repr(already
), repr(rem_1
), repr(rem_2
)
234 assert rem_1
.pop(-1) == u
'', "Internal Error 1"
235 assert rem_2
.pop(-1) == u
'', "Internal Error 2"
236 assert (already
is None) or (len(rem_2
) > 0), "Already mounted"
240 assert not table
.has_key(None), "Huh? Already mounted?!"
241 table
[None] = backend
243 def resolve_to_backend(self
, filepath
, extra_info
= False):
245 extra_info = True is for internal use!
247 Normally, returns the backend and the filepath inside that backend.
249 With extra_info = True it returns the last directory node and the
250 remaining filepath from there in addition.
252 table
= self
.mounttab
253 filepath
= filepath
[:]
256 new_be
= table
.get(None)
257 if (new_be
is not None) or res_fp
is None:
260 res_extra
= (table
, filepath
[:])
261 # print "... New res: %r, %r, %r" % (res_be, res_fp, res_extra)
262 if len(filepath
) == 0:
264 query
= filepath
.pop(0)
265 entry
= table
.get(query
)
266 if entry
is not None:
268 res_extra
= (table
, filepath
[:])
272 return (res_be
, res_fp
) + res_extra
274 return (res_be
, res_fp
)
276 def __repr__(self
, table
= None, indent
= []):
279 res
.append("MountStorage<")
280 table
= self
.mounttab
283 res
.append(" " * len(indent
) + repr(indent
) + ": " + repr(v
))
284 for k
, v
in table
.iteritems():
287 res
.append(" " * len(indent
) + repr(k
) + ":")
288 res
+= self
.__repr
__(v
, indent
+ [k
])
289 if table
is self
.mounttab
:
291 return "\n".join(res
)
300 def clean_listy_filepath(listy_filepath
):
302 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
303 clean out any nastiness from it.
306 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
307 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
310 - listy_filepath: a list of filepath components, mediagoblin
314 A cleaned list of unicode objects.
317 unicode(secure_filename(filepath
))
318 for filepath
in listy_filepath
]
320 if u
'' in cleaned_filepath
:
321 raise InvalidFilepath(
322 "A filename component could not be resolved into a usable name.")
324 return cleaned_filepath
327 def storage_system_from_config(paste_config
, storage_prefix
):
329 Utility for setting up a storage system from the paste app config.
331 Note that a special argument may be passed in to the paste_config
332 which is "${storage_prefix}_storage_class" which will provide an
333 import path to a storage system. This defaults to
334 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
337 - paste_config: dictionary of config parameters
338 - storage_prefix: the storage system we're setting up / will be
339 getting keys/arguments from. For example 'publicstore' will
340 grab all arguments that are like 'publicstore_FOO'.
343 An instantiated storage system.
346 storage_system_from_config(
347 {'publicstore_base_url': '/media/',
348 'publicstore_base_dir': '/var/whatever/media/'},
354 base_dir='/var/whatever/media')
356 prefix_re
= re
.compile('^%s_(.+)$' % re
.escape(storage_prefix
))
358 config_params
= dict(
359 [(prefix_re
.match(key
).groups()[0], value
)
360 for key
, value
in paste_config
.iteritems()
361 if prefix_re
.match(key
)])
363 if config_params
.has_key('storage_class'):
364 storage_class
= config_params
['storage_class']
365 config_params
.pop('storage_class')
367 storage_class
= "mediagoblin.storage:BasicFileStorage"
369 storage_class
= util
.import_component(storage_class
)
370 return storage_class(**config_params
)