d994268b94b729231bf0b24ca389a1cb7fe2e3c6
[mediagoblin.git] / mediagoblin / storage.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011 Free Software Foundation, Inc
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 import os
18 import re
19 import shutil
20 import urlparse
21 import uuid
22
23 from werkzeug.utils import secure_filename
24
25 from mediagoblin import util
26
27 ########
28 # Errors
29 ########
30
31 class Error(Exception): pass
32 class InvalidFilepath(Error): pass
33 class NoWebServing(Error): pass
34
35 class NotImplementedError(Error): pass
36
37
38 ###############################################
39 # Storage interface & basic file implementation
40 ###############################################
41
42 class StorageInterface(object):
43 """
44 Interface for the storage API.
45
46 This interface doesn't actually provide behavior, but it defines
47 what kind of storage patterns subclasses should provide.
48
49 It is important to note that the storage API idea of a "filepath"
50 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
51 mind while reading method documentation.
52
53 You should set up your __init__ method with whatever keyword
54 arguments are appropriate to your storage system, but you should
55 also passively accept all extraneous keyword arguments like:
56
57 def __init__(self, **kwargs):
58 pass
59
60 See BasicFileStorage as a simple implementation of the
61 StorageInterface.
62 """
63
64 # Whether this file store is on the local filesystem.
65 local_storage = False
66
67 def __raise_not_implemented(self):
68 """
69 Raise a warning about some component not implemented by a
70 subclass of this interface.
71 """
72 raise NotImplementedError(
73 "This feature not implemented in this storage API implementation.")
74
75 def file_exists(self, filepath):
76 """
77 Return a boolean asserting whether or not file at filepath
78 exists in our storage system.
79
80 Returns:
81 True / False depending on whether file exists or not.
82 """
83 # Subclasses should override this method.
84 self.__raise_not_implemented()
85
86 def get_file(self, filepath, mode='r'):
87 """
88 Return a file-like object for reading/writing from this filepath.
89
90 Should create directories, buckets, whatever, as necessary.
91 """
92 # Subclasses should override this method.
93 self.__raise_not_implemented()
94
95 def delete_file(self, filepath):
96 """
97 Delete or dereference the file at filepath.
98
99 This might need to delete directories, buckets, whatever, for
100 cleanliness. (Be sure to avoid race conditions on that though)
101 """
102 # Subclasses should override this method.
103 self.__raise_not_implemented()
104
105 def file_url(self, filepath):
106 """
107 Get the URL for this file. This assumes our storage has been
108 mounted with some kind of URL which makes this possible.
109 """
110 # Subclasses should override this method.
111 self.__raise_not_implemented()
112
113 def get_unique_filepath(self, filepath):
114 """
115 If a filename at filepath already exists, generate a new name.
116
117 Eg, if the filename doesn't exist:
118 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
119 [u'dir1', u'dir2', u'fname.jpg']
120
121 But if a file does exist, let's get one back with at uuid tacked on:
122 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
123 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
124 """
125 # Make sure we have a clean filepath to start with, since
126 # we'll be possibly tacking on stuff to the filename.
127 filepath = clean_listy_filepath(filepath)
128
129 if self.file_exists(filepath):
130 return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
131 else:
132 return filepath
133
134 def get_local_path(self, filepath):
135 """
136 If this is a local_storage implementation, give us a link to
137 the local filesystem reference to this file.
138
139 >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
140 u'/path/to/mounting/foo/bar/baz.jpg'
141 """
142 # Subclasses should override this method, if applicable.
143 self.__raise_not_implemented()
144
145 def copy_locally(self, filepath, dest_path):
146 """
147 Copy this file locally.
148
149 A basic working method for this is provided that should
150 function both for local_storage systems and remote storge
151 systems, but if more efficient systems for copying locally
152 apply to your system, override this method with something more
153 appropriate.
154 """
155 if self.local_storage:
156 shutil.copy(
157 self.get_local_path(filepath), dest_path)
158 else:
159 with self.get_file(filepath, 'rb') as source_file:
160 with file(dest_path, 'wb') as dest_file:
161 dest_file.write(source_file.read())
162
163
164 class BasicFileStorage(StorageInterface):
165 """
166 Basic local filesystem implementation of storage API
167 """
168
169 local_storage = True
170
171 def __init__(self, base_dir, base_url=None, **kwargs):
172 """
173 Keyword arguments:
174 - base_dir: Base directory things will be served out of. MUST
175 be an absolute path.
176 - base_url: URL files will be served from
177 """
178 self.base_dir = base_dir
179 self.base_url = base_url
180
181 def _resolve_filepath(self, filepath):
182 """
183 Transform the given filepath into a local filesystem filepath.
184 """
185 return os.path.join(
186 self.base_dir, *clean_listy_filepath(filepath))
187
188 def file_exists(self, filepath):
189 return os.path.exists(self._resolve_filepath(filepath))
190
191 def get_file(self, filepath, mode='r'):
192 # Make directories if necessary
193 if len(filepath) > 1:
194 directory = self._resolve_filepath(filepath[:-1])
195 if not os.path.exists(directory):
196 os.makedirs(directory)
197
198 # Grab and return the file in the mode specified
199 return open(self._resolve_filepath(filepath), mode)
200
201 def delete_file(self, filepath):
202 # TODO: Also delete unused directories if empty (safely, with
203 # checks to avoid race conditions).
204 os.remove(self._resolve_filepath(filepath))
205
206 def file_url(self, filepath):
207 if not self.base_url:
208 raise NoWebServing(
209 "base_url not set, cannot provide file urls")
210
211 return urlparse.urljoin(
212 self.base_url,
213 '/'.join(clean_listy_filepath(filepath)))
214
215 def get_local_path(self, filepath):
216 return self._resolve_filepath(filepath)
217
218
219 class MountStorage(StorageInterface):
220 def __init__(self, **kwargs):
221 self.mounttab = {}
222
223 def mount(self, dirpath, backend):
224 """
225 Mount a new backend under dirpath
226 """
227 new_ent = clean_listy_filepath(dirpath)
228 new_ent.append(u'')
229
230 print "Mounting:", repr(new_ent)
231 already, rem_1, table, rem_2 = self.resolve_to_backend(new_ent, True)
232 print "===", repr(already), repr(rem_1), repr(rem_2)
233
234 assert rem_1.pop(-1) == u'', "Internal Error 1"
235 assert rem_2.pop(-1) == u'', "Internal Error 2"
236 assert (already is None) or (len(rem_2) > 0), "Already mounted"
237 for part in rem_2:
238 table[part] = {}
239 table = table[part]
240 assert not table.has_key(None), "Huh? Already mounted?!"
241 table[None] = backend
242
243 def resolve_to_backend(self, filepath, extra_info = False):
244 """
245 extra_info = True is for internal use!
246
247 Normally, returns the backend and the filepath inside that backend.
248
249 With extra_info = True it returns the last directory node and the
250 remaining filepath from there in addition.
251 """
252 table = self.mounttab
253 filepath = filepath[:]
254 res_fp = None
255 while True:
256 new_be = table.get(None)
257 if (new_be is not None) or res_fp is None:
258 res_be = new_be
259 res_fp = filepath[:]
260 res_extra = (table, filepath[:])
261 # print "... New res: %r, %r, %r" % (res_be, res_fp, res_extra)
262 if len(filepath) == 0:
263 break
264 query = filepath.pop(0)
265 entry = table.get(query)
266 if entry is not None:
267 table = entry
268 res_extra = (table, filepath[:])
269 else:
270 break
271 if extra_info:
272 return (res_be, res_fp) + res_extra
273 else:
274 return (res_be, res_fp)
275
276 def __repr__(self, table = None, indent = []):
277 res = []
278 if table is None:
279 res.append("MountStorage<")
280 table = self.mounttab
281 v = table.get(None)
282 if v:
283 res.append(" " * len(indent) + repr(indent) + ": " + repr(v))
284 for k, v in table.iteritems():
285 if k == None:
286 continue
287 res.append(" " * len(indent) + repr(k) + ":")
288 res += self.__repr__(v, indent + [k])
289 if table is self.mounttab:
290 res.append(">")
291 return "\n".join(res)
292 else:
293 return res
294
295
296 ###########
297 # Utilities
298 ###########
299
300 def clean_listy_filepath(listy_filepath):
301 """
302 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
303 clean out any nastiness from it.
304
305
306 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
307 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
308
309 Args:
310 - listy_filepath: a list of filepath components, mediagoblin
311 storage API style.
312
313 Returns:
314 A cleaned list of unicode objects.
315 """
316 cleaned_filepath = [
317 unicode(secure_filename(filepath))
318 for filepath in listy_filepath]
319
320 if u'' in cleaned_filepath:
321 raise InvalidFilepath(
322 "A filename component could not be resolved into a usable name.")
323
324 return cleaned_filepath
325
326
327 def storage_system_from_config(paste_config, storage_prefix):
328 """
329 Utility for setting up a storage system from the paste app config.
330
331 Note that a special argument may be passed in to the paste_config
332 which is "${storage_prefix}_storage_class" which will provide an
333 import path to a storage system. This defaults to
334 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
335
336 Arguments:
337 - paste_config: dictionary of config parameters
338 - storage_prefix: the storage system we're setting up / will be
339 getting keys/arguments from. For example 'publicstore' will
340 grab all arguments that are like 'publicstore_FOO'.
341
342 Returns:
343 An instantiated storage system.
344
345 Example:
346 storage_system_from_config(
347 {'publicstore_base_url': '/media/',
348 'publicstore_base_dir': '/var/whatever/media/'},
349 'publicstore')
350
351 Will return:
352 BasicFileStorage(
353 base_url='/media/',
354 base_dir='/var/whatever/media')
355 """
356 prefix_re = re.compile('^%s_(.+)$' % re.escape(storage_prefix))
357
358 config_params = dict(
359 [(prefix_re.match(key).groups()[0], value)
360 for key, value in paste_config.iteritems()
361 if prefix_re.match(key)])
362
363 if config_params.has_key('storage_class'):
364 storage_class = config_params['storage_class']
365 config_params.pop('storage_class')
366 else:
367 storage_class = "mediagoblin.storage:BasicFileStorage"
368
369 storage_class = util.import_component(storage_class)
370 return storage_class(**config_params)
371
372