Commit | Line | Data |
---|---|---|
8e1e744d | 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
12a100e4 | 2 | # Copyright (C) 2011 MediaGoblin contributors. See AUTHORS. |
a6b378ef CAW |
3 | # |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU Affero General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU Affero General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | ||
779f2b94 | 17 | import os |
6a07362d | 18 | import shutil |
f61a41b8 | 19 | import urlparse |
2fdec827 | 20 | import uuid |
851c51a3 | 21 | import cloudfiles |
13b9f054 | 22 | import mimetypes |
b5e7b967 | 23 | import tempfile |
a6b378ef CAW |
24 | |
25 | from werkzeug.utils import secure_filename | |
26 | ||
ffa22935 CAW |
27 | from mediagoblin import util |
28 | ||
d807b725 CAW |
29 | ######## |
30 | # Errors | |
31 | ######## | |
a6b378ef | 32 | |
770c12be | 33 | |
aa797ca1 JW |
34 | class Error(Exception): |
35 | pass | |
36 | ||
37 | ||
38 | class InvalidFilepath(Error): | |
39 | pass | |
40 | ||
41 | ||
42 | class NoWebServing(Error): | |
43 | pass | |
44 | ||
45 | ||
46 | class NotImplementedError(Error): | |
47 | pass | |
797be93c | 48 | |
770c12be | 49 | |
d807b725 CAW |
50 | ############################################### |
51 | # Storage interface & basic file implementation | |
52 | ############################################### | |
a6b378ef | 53 | |
797be93c CAW |
54 | class StorageInterface(object): |
55 | """ | |
56 | Interface for the storage API. | |
57 | ||
58 | This interface doesn't actually provide behavior, but it defines | |
59 | what kind of storage patterns subclasses should provide. | |
60 | ||
61 | It is important to note that the storage API idea of a "filepath" | |
62 | is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in | |
63 | mind while reading method documentation. | |
5afb9227 CAW |
64 | |
65 | You should set up your __init__ method with whatever keyword | |
66 | arguments are appropriate to your storage system, but you should | |
67 | also passively accept all extraneous keyword arguments like: | |
68 | ||
69 | def __init__(self, **kwargs): | |
70 | pass | |
71 | ||
72 | See BasicFileStorage as a simple implementation of the | |
73 | StorageInterface. | |
797be93c | 74 | """ |
797be93c | 75 | |
3a89c23e CAW |
76 | # Whether this file store is on the local filesystem. |
77 | local_storage = False | |
78 | ||
797be93c CAW |
79 | def __raise_not_implemented(self): |
80 | """ | |
81 | Raise a warning about some component not implemented by a | |
82 | subclass of this interface. | |
83 | """ | |
84 | raise NotImplementedError( | |
85 | "This feature not implemented in this storage API implementation.") | |
86 | ||
87 | def file_exists(self, filepath): | |
88 | """ | |
89 | Return a boolean asserting whether or not file at filepath | |
90 | exists in our storage system. | |
91 | ||
92 | Returns: | |
93 | True / False depending on whether file exists or not. | |
94 | """ | |
95 | # Subclasses should override this method. | |
96 | self.__raise_not_implemented() | |
97 | ||
cee7a1c1 | 98 | def get_file(self, filepath, mode='r'): |
b0de01cf CAW |
99 | """ |
100 | Return a file-like object for reading/writing from this filepath. | |
101 | ||
102 | Should create directories, buckets, whatever, as necessary. | |
103 | """ | |
0b9cf289 CAW |
104 | # Subclasses should override this method. |
105 | self.__raise_not_implemented() | |
106 | ||
107 | def delete_file(self, filepath): | |
b0de01cf CAW |
108 | """ |
109 | Delete or dereference the file at filepath. | |
110 | ||
111 | This might need to delete directories, buckets, whatever, for | |
112 | cleanliness. (Be sure to avoid race conditions on that though) | |
113 | """ | |
0b9cf289 CAW |
114 | # Subclasses should override this method. |
115 | self.__raise_not_implemented() | |
116 | ||
f61a41b8 | 117 | def file_url(self, filepath): |
644614d4 CAW |
118 | """ |
119 | Get the URL for this file. This assumes our storage has been | |
120 | mounted with some kind of URL which makes this possible. | |
121 | """ | |
122 | # Subclasses should override this method. | |
123 | self.__raise_not_implemented() | |
124 | ||
2d1a6073 | 125 | def get_unique_filepath(self, filepath): |
797be93c CAW |
126 | """ |
127 | If a filename at filepath already exists, generate a new name. | |
128 | ||
129 | Eg, if the filename doesn't exist: | |
130 | >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg']) | |
131 | [u'dir1', u'dir2', u'fname.jpg'] | |
aa797ca1 | 132 | |
797be93c CAW |
133 | But if a file does exist, let's get one back with at uuid tacked on: |
134 | >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg']) | |
ef10e3a2 | 135 | [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg'] |
797be93c | 136 | """ |
b0bfb766 CAW |
137 | # Make sure we have a clean filepath to start with, since |
138 | # we'll be possibly tacking on stuff to the filename. | |
139 | filepath = clean_listy_filepath(filepath) | |
140 | ||
0b9cf289 CAW |
141 | if self.file_exists(filepath): |
142 | return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])] | |
143 | else: | |
144 | return filepath | |
779f2b94 | 145 | |
3a89c23e CAW |
146 | def get_local_path(self, filepath): |
147 | """ | |
148 | If this is a local_storage implementation, give us a link to | |
149 | the local filesystem reference to this file. | |
150 | ||
151 | >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg']) | |
152 | u'/path/to/mounting/foo/bar/baz.jpg' | |
153 | """ | |
154 | # Subclasses should override this method, if applicable. | |
155 | self.__raise_not_implemented() | |
156 | ||
6a07362d CAW |
157 | def copy_locally(self, filepath, dest_path): |
158 | """ | |
159 | Copy this file locally. | |
160 | ||
161 | A basic working method for this is provided that should | |
162 | function both for local_storage systems and remote storge | |
163 | systems, but if more efficient systems for copying locally | |
164 | apply to your system, override this method with something more | |
165 | appropriate. | |
166 | """ | |
167 | if self.local_storage: | |
168 | shutil.copy( | |
169 | self.get_local_path(filepath), dest_path) | |
170 | else: | |
171 | with self.get_file(filepath, 'rb') as source_file: | |
172 | with file(dest_path, 'wb') as dest_file: | |
173 | dest_file.write(source_file.read()) | |
174 | ||
779f2b94 CAW |
175 | |
176 | class BasicFileStorage(StorageInterface): | |
177 | """ | |
178 | Basic local filesystem implementation of storage API | |
179 | """ | |
180 | ||
3a89c23e CAW |
181 | local_storage = True |
182 | ||
5afb9227 | 183 | def __init__(self, base_dir, base_url=None, **kwargs): |
779f2b94 CAW |
184 | """ |
185 | Keyword arguments: | |
186 | - base_dir: Base directory things will be served out of. MUST | |
187 | be an absolute path. | |
b1bb050b | 188 | - base_url: URL files will be served from |
779f2b94 CAW |
189 | """ |
190 | self.base_dir = base_dir | |
b1bb050b | 191 | self.base_url = base_url |
779f2b94 CAW |
192 | |
193 | def _resolve_filepath(self, filepath): | |
194 | """ | |
195 | Transform the given filepath into a local filesystem filepath. | |
196 | """ | |
197 | return os.path.join( | |
198 | self.base_dir, *clean_listy_filepath(filepath)) | |
aa797ca1 | 199 | |
779f2b94 CAW |
200 | def file_exists(self, filepath): |
201 | return os.path.exists(self._resolve_filepath(filepath)) | |
202 | ||
cee7a1c1 CAW |
203 | def get_file(self, filepath, mode='r'): |
204 | # Make directories if necessary | |
205 | if len(filepath) > 1: | |
206 | directory = self._resolve_filepath(filepath[:-1]) | |
d0e3a534 | 207 | if not os.path.exists(directory): |
cee7a1c1 CAW |
208 | os.makedirs(directory) |
209 | ||
210 | # Grab and return the file in the mode specified | |
211 | return open(self._resolve_filepath(filepath), mode) | |
212 | ||
779f2b94 | 213 | def delete_file(self, filepath): |
b1bb050b CAW |
214 | # TODO: Also delete unused directories if empty (safely, with |
215 | # checks to avoid race conditions). | |
216 | os.remove(self._resolve_filepath(filepath)) | |
644614d4 | 217 | |
f61a41b8 | 218 | def file_url(self, filepath): |
b1bb050b CAW |
219 | if not self.base_url: |
220 | raise NoWebServing( | |
221 | "base_url not set, cannot provide file urls") | |
222 | ||
223 | return urlparse.urljoin( | |
224 | self.base_url, | |
225 | '/'.join(clean_listy_filepath(filepath))) | |
ffa22935 | 226 | |
3a89c23e CAW |
227 | def get_local_path(self, filepath): |
228 | return self._resolve_filepath(filepath) | |
229 | ||
ffa22935 | 230 | |
4dea708c CAW |
231 | # ---------------------------------------------------- |
232 | # OpenStack/Rackspace Cloud's Swift/CloudFiles support | |
233 | # ---------------------------------------------------- | |
b5e7b967 | 234 | |
4dea708c | 235 | class CloudFilesStorage(StorageInterface): |
aa797ca1 JW |
236 | def __init__(self, **kwargs): |
237 | self.param_container = kwargs.get('cloudfiles_container') | |
238 | self.param_user = kwargs.get('cloudfiles_user') | |
239 | self.param_api_key = kwargs.get('cloudfiles_api_key') | |
240 | self.param_host = kwargs.get('cloudfiles_host') | |
241 | self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet') | |
242 | ||
243 | if not self.param_host: | |
244 | print('No CloudFiles host URL specified, ' | |
245 | 'defaulting to Rackspace US') | |
246 | ||
247 | self.connection = cloudfiles.get_connection( | |
248 | username=self.param_user, | |
249 | api_key=self.param_api_key, | |
250 | servicenet=True if self.param_use_servicenet == 'true' or \ | |
251 | self.param_use_servicenet == True else False) | |
252 | ||
253 | if not self.param_container == \ | |
254 | self.connection.get_container(self.param_container): | |
255 | self.container = self.connection.create_container( | |
256 | self.param_container) | |
257 | self.container.make_public( | |
258 | ttl=60 * 60 * 2) | |
259 | else: | |
260 | self.container = self.connection.get_container( | |
261 | self.param_container) | |
262 | ||
026074af JW |
263 | self.container_uri = self.container.public_uri() |
264 | ||
aa797ca1 JW |
265 | def _resolve_filepath(self, filepath): |
266 | return '/'.join( | |
267 | clean_listy_filepath(filepath)) | |
268 | ||
269 | def file_exists(self, filepath): | |
270 | try: | |
271 | object = self.container.get_object( | |
272 | self._resolve_filepath(filepath)) | |
273 | return True | |
274 | except cloudfiles.errors.NoSuchObject: | |
275 | return False | |
276 | ||
bf821aef | 277 | def get_file(self, filepath, *args, **kwargs): |
b5e7b967 JW |
278 | """ |
279 | - Doesn't care about the "mode" argument | |
280 | """ | |
aa797ca1 JW |
281 | try: |
282 | obj = self.container.get_object( | |
283 | self._resolve_filepath(filepath)) | |
284 | except cloudfiles.errors.NoSuchObject: | |
285 | obj = self.container.create_object( | |
286 | self._resolve_filepath(filepath)) | |
287 | ||
13b9f054 JW |
288 | mimetype = mimetypes.guess_type( |
289 | filepath[-1]) | |
290 | ||
291 | if mimetype: | |
292 | obj.content_type = mimetype[0] | |
293 | ||
c2b862d1 | 294 | return CloudFilesStorageObjectWrapper(obj, *args, **kwargs) |
aa797ca1 JW |
295 | |
296 | def delete_file(self, filepath): | |
297 | # TODO: Also delete unused directories if empty (safely, with | |
298 | # checks to avoid race conditions). | |
502073f2 JW |
299 | self.container.delete_object( |
300 | self._resolve_filepath(filepath)) | |
aa797ca1 JW |
301 | |
302 | def file_url(self, filepath): | |
026074af JW |
303 | return '/'.join([ |
304 | self.container_uri, | |
305 | self._resolve_filepath(filepath)]) | |
aa797ca1 JW |
306 | |
307 | ||
c2b862d1 | 308 | class CloudFilesStorageObjectWrapper(): |
4dea708c CAW |
309 | """ |
310 | Wrapper for python-cloudfiles's cloudfiles.storage_object.Object | |
311 | used to circumvent the mystic `medium.jpg` corruption issue, where | |
312 | we had both python-cloudfiles and PIL doing buffering on both | |
313 | ends and that breaking things. | |
314 | ||
315 | This wrapper currently meets mediagoblin's needs for a public_store | |
316 | file-like object. | |
317 | """ | |
bf821aef | 318 | def __init__(self, storage_object, *args, **kwargs): |
4dea708c CAW |
319 | self.storage_object = storage_object |
320 | ||
321 | def read(self, *args, **kwargs): | |
322 | return self.storage_object.read(*args, **kwargs) | |
323 | ||
324 | def write(self, data, *args, **kwargs): | |
c2b862d1 JW |
325 | """ |
326 | write data to the cloudfiles storage object | |
327 | ||
328 | The original motivation for this wrapper is to ensure | |
329 | that buffered writing to a cloudfiles storage object does not overwrite | |
330 | any preexisting data. | |
331 | ||
332 | Currently this method does not support any write modes except "append". | |
333 | However if we should need it it would be easy implement. | |
334 | """ | |
4dea708c CAW |
335 | if self.storage_object.size and type(data) == str: |
336 | data = self.read() + data | |
337 | ||
338 | self.storage_object.write(data, *args, **kwargs) | |
339 | ||
c2b862d1 JW |
340 | def close(self): |
341 | pass | |
342 | ||
343 | def __enter__(self): | |
344 | """ | |
345 | Context Manager API implementation | |
346 | http://docs.python.org/library/stdtypes.html#context-manager-types | |
347 | """ | |
348 | return self | |
349 | ||
350 | def __exit__(self, *args): | |
351 | """ | |
352 | Context Manger API implementation | |
353 | see self.__enter__() | |
354 | """ | |
355 | self.close() | |
356 | ||
4dea708c CAW |
357 | |
358 | # ------------ | |
359 | # MountStorage | |
360 | # ------------ | |
361 | ||
68cf996c | 362 | class MountStorage(StorageInterface): |
255f02c4 E |
363 | """ |
364 | Experimental "Mount" virtual Storage Interface | |
365 | ||
f016fc65 CAW |
366 | This isn't an interface to some real storage, instead it's a |
367 | redirecting interface, that redirects requests to other | |
368 | "StorageInterface"s. | |
369 | ||
370 | For example, say you have the paths: | |
371 | ||
372 | 1. ['user_data', 'cwebber', 'avatar.jpg'] | |
373 | 2. ['user_data', 'elrond', 'avatar.jpg'] | |
374 | 3. ['media_entries', '34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg'] | |
375 | ||
376 | You could mount media_entries under CloudFileStorage and user_data | |
377 | under BasicFileStorage. Then 1 would be passed to | |
378 | BasicFileStorage under the path ['cwebber', 'avatar.jpg'] and 3 | |
379 | would be passed to CloudFileStorage under | |
380 | ['34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg']. | |
381 | ||
382 | In other words, this is kind of like mounting /home/ and /etc/ | |
383 | under different filesystems on your operating system... but with | |
384 | mediagoblin filestorages :) | |
255f02c4 | 385 | |
f016fc65 CAW |
386 | To set this up, you currently need to call the mount() method with |
387 | the target path and a backend, that shall be available under that | |
388 | target path. You have to mount things in a sensible order, | |
255f02c4 E |
389 | especially you can't mount ["a", "b"] before ["a"]. |
390 | """ | |
68cf996c E |
391 | def __init__(self, **kwargs): |
392 | self.mounttab = {} | |
393 | ||
394 | def mount(self, dirpath, backend): | |
395 | """ | |
396 | Mount a new backend under dirpath | |
397 | """ | |
398 | new_ent = clean_listy_filepath(dirpath) | |
68cf996c E |
399 | |
400 | print "Mounting:", repr(new_ent) | |
eea31562 | 401 | already, rem_1, table, rem_2 = self._resolve_to_backend(new_ent, True) |
620fca54 E |
402 | print "===", repr(already), repr(rem_1), repr(rem_2), len(table) |
403 | ||
404 | assert (len(rem_2) > 0) or (None not in table), \ | |
405 | "That path is already mounted" | |
406 | assert (len(rem_2) > 0) or (len(table)==0), \ | |
407 | "A longer path is already mounted here" | |
68cf996c | 408 | |
68cf996c E |
409 | for part in rem_2: |
410 | table[part] = {} | |
411 | table = table[part] | |
412 | table[None] = backend | |
413 | ||
eea31562 | 414 | def _resolve_to_backend(self, filepath, extra_info = False): |
68cf996c E |
415 | """ |
416 | extra_info = True is for internal use! | |
417 | ||
418 | Normally, returns the backend and the filepath inside that backend. | |
419 | ||
420 | With extra_info = True it returns the last directory node and the | |
421 | remaining filepath from there in addition. | |
422 | """ | |
423 | table = self.mounttab | |
424 | filepath = filepath[:] | |
425 | res_fp = None | |
426 | while True: | |
427 | new_be = table.get(None) | |
428 | if (new_be is not None) or res_fp is None: | |
429 | res_be = new_be | |
430 | res_fp = filepath[:] | |
431 | res_extra = (table, filepath[:]) | |
432 | # print "... New res: %r, %r, %r" % (res_be, res_fp, res_extra) | |
433 | if len(filepath) == 0: | |
434 | break | |
435 | query = filepath.pop(0) | |
436 | entry = table.get(query) | |
437 | if entry is not None: | |
438 | table = entry | |
439 | res_extra = (table, filepath[:]) | |
440 | else: | |
441 | break | |
442 | if extra_info: | |
443 | return (res_be, res_fp) + res_extra | |
444 | else: | |
445 | return (res_be, res_fp) | |
446 | ||
eea31562 E |
447 | def resolve_to_backend(self, filepath): |
448 | backend, filepath = self._resolve_to_backend(filepath) | |
449 | if backend is None: | |
450 | raise Error("Path not mounted") | |
451 | return backend, filepath | |
452 | ||
93b2796c | 453 | def __repr__(self, table = None, indent = []): |
68cf996c E |
454 | res = [] |
455 | if table is None: | |
456 | res.append("MountStorage<") | |
457 | table = self.mounttab | |
458 | v = table.get(None) | |
459 | if v: | |
93b2796c | 460 | res.append(" " * len(indent) + repr(indent) + ": " + repr(v)) |
68cf996c E |
461 | for k, v in table.iteritems(): |
462 | if k == None: | |
463 | continue | |
93b2796c E |
464 | res.append(" " * len(indent) + repr(k) + ":") |
465 | res += self.__repr__(v, indent + [k]) | |
68cf996c E |
466 | if table is self.mounttab: |
467 | res.append(">") | |
468 | return "\n".join(res) | |
469 | else: | |
470 | return res | |
471 | ||
937e2c88 E |
472 | def file_exists(self, filepath): |
473 | backend, filepath = self.resolve_to_backend(filepath) | |
474 | return backend.file_exists(filepath) | |
475 | ||
476 | def get_file(self, filepath, mode='r'): | |
477 | backend, filepath = self.resolve_to_backend(filepath) | |
478 | return backend.get_file(filepath, mode) | |
479 | ||
480 | def delete_file(self, filepath): | |
481 | backend, filepath = self.resolve_to_backend(filepath) | |
482 | return backend.delete_file(filepath) | |
483 | ||
484 | def file_url(self, filepath): | |
485 | backend, filepath = self.resolve_to_backend(filepath) | |
486 | return backend.file_url(filepath) | |
487 | ||
488 | def get_local_path(self, filepath): | |
489 | backend, filepath = self.resolve_to_backend(filepath) | |
490 | return backend.get_local_path(filepath) | |
491 | ||
492 | def copy_locally(self, filepath, dest_path): | |
493 | """ | |
494 | Need to override copy_locally, because the local_storage | |
495 | attribute is not correct. | |
496 | """ | |
497 | backend, filepath = self.resolve_to_backend(filepath) | |
498 | backend.copy_locally(filepath, dest_path) | |
499 | ||
68cf996c | 500 | |
d807b725 CAW |
501 | ########### |
502 | # Utilities | |
503 | ########### | |
504 | ||
505 | def clean_listy_filepath(listy_filepath): | |
506 | """ | |
507 | Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and | |
508 | clean out any nastiness from it. | |
509 | ||
3a89c23e | 510 | |
d807b725 CAW |
511 | >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg']) |
512 | [u'dir1', u'foo_.._nasty', u'linooks.jpg'] | |
513 | ||
514 | Args: | |
515 | - listy_filepath: a list of filepath components, mediagoblin | |
516 | storage API style. | |
517 | ||
518 | Returns: | |
519 | A cleaned list of unicode objects. | |
520 | """ | |
521 | cleaned_filepath = [ | |
522 | unicode(secure_filename(filepath)) | |
523 | for filepath in listy_filepath] | |
524 | ||
525 | if u'' in cleaned_filepath: | |
526 | raise InvalidFilepath( | |
527 | "A filename component could not be resolved into a usable name.") | |
528 | ||
529 | return cleaned_filepath | |
530 | ||
531 | ||
758eb746 | 532 | def storage_system_from_config(config_section): |
ffa22935 | 533 | """ |
758eb746 | 534 | Utility for setting up a storage system from a config section. |
ffa22935 | 535 | |
758eb746 E |
536 | Note that a special argument may be passed in to |
537 | the config_section which is "storage_class" which will provide an | |
ffa22935 CAW |
538 | import path to a storage system. This defaults to |
539 | "mediagoblin.storage:BasicFileStorage" if otherwise undefined. | |
540 | ||
541 | Arguments: | |
758eb746 | 542 | - config_section: dictionary of config parameters |
ffa22935 CAW |
543 | |
544 | Returns: | |
545 | An instantiated storage system. | |
546 | ||
547 | Example: | |
3c7d11ff | 548 | storage_system_from_config( |
758eb746 E |
549 | {'base_url': '/media/', |
550 | 'base_dir': '/var/whatever/media/'}) | |
ffa22935 CAW |
551 | |
552 | Will return: | |
553 | BasicFileStorage( | |
554 | base_url='/media/', | |
555 | base_dir='/var/whatever/media') | |
556 | """ | |
56fc7186 E |
557 | # This construct is needed, because dict(config) does |
558 | # not replace the variables in the config items. | |
758eb746 | 559 | config_params = dict(config_section.iteritems()) |
ffa22935 | 560 | |
aa797ca1 | 561 | if 'storage_class' in config_params: |
ffa22935 CAW |
562 | storage_class = config_params['storage_class'] |
563 | config_params.pop('storage_class') | |
564 | else: | |
565 | storage_class = "mediagoblin.storage:BasicFileStorage" | |
566 | ||
567 | storage_class = util.import_component(storage_class) | |
568 | return storage_class(**config_params) |