X-Git-Url: https://vcs.fsf.org/?a=blobdiff_plain;f=mediagoblin%2Fstorage.py;h=7ada95e1165c3d177a37993066f715bdb4449ff9;hb=7dc3a66f924c734cbee68677e84f5b656eefe280;hp=ad8f8829dce562da22c63eb16260b4ef30fea5ce;hpb=2d1a60739ecfb64c16e35940b7cee1eb018eb448;p=mediagoblin.git diff --git a/mediagoblin/storage.py b/mediagoblin/storage.py index ad8f8829..7ada95e1 100644 --- a/mediagoblin/storage.py +++ b/mediagoblin/storage.py @@ -1,4 +1,4 @@ -# GNU Mediagoblin -- federated, autonomous media hosting +# GNU MediaGoblin -- federated, autonomous media hosting # Copyright (C) 2011 Free Software Foundation, Inc # # This program is free software: you can redistribute it and/or modify @@ -15,42 +15,39 @@ # along with this program. If not, see . import os +import shutil +import urlparse +import uuid +import cloudfiles from werkzeug.utils import secure_filename +from mediagoblin import util -class Error(Exception): pass -class InvalidFilepath(Error): pass +######## +# Errors +######## -class NotImplementedError(Error): pass +class Error(Exception): + pass -def clean_listy_filepath(listy_filepath): - """ - Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and - clean out any nastiness from it. - For example: - >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg']) - [u'dir1', u'foo_.._nasty', u'linooks.jpg'] +class InvalidFilepath(Error): + pass - Args: - - listy_filepath: a list of filepath components, mediagoblin - storage API style. - Returns: - A cleaned list of unicode objects. - """ - cleaned_filepath = [ - unicode(secure_filename(filepath)) - for filepath in listy_filepath] +class NoWebServing(Error): + pass - if u'' in cleaned_filepath: - raise InvalidFilepath( - "A filename component could not be resolved into a usable name.") - return cleaned_filepath +class NotImplementedError(Error): + pass + +############################################### +# Storage interface & basic file implementation +############################################### class StorageInterface(object): """ @@ -62,9 +59,20 @@ class StorageInterface(object): It is important to note that the storage API idea of a "filepath" is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in mind while reading method documentation. + + You should set up your __init__ method with whatever keyword + arguments are appropriate to your storage system, but you should + also passively accept all extraneous keyword arguments like: + + def __init__(self, **kwargs): + pass + + See BasicFileStorage as a simple implementation of the + StorageInterface. """ - # def __init__(self, *args, **kwargs): - # pass + + # Whether this file store is on the local filesystem. + local_storage = False def __raise_not_implemented(self): """ @@ -85,7 +93,7 @@ class StorageInterface(object): # Subclasses should override this method. self.__raise_not_implemented() - def get_file(self, filepath, mode): + def get_file(self, filepath, mode='r'): """ Return a file-like object for reading/writing from this filepath. @@ -104,7 +112,7 @@ class StorageInterface(object): # Subclasses should override this method. self.__raise_not_implemented() - def url_for_file(self, filepath): + def file_url(self, filepath): """ Get the URL for this file. This assumes our storage has been mounted with some kind of URL which makes this possible. @@ -119,31 +127,66 @@ class StorageInterface(object): Eg, if the filename doesn't exist: >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg']) [u'dir1', u'dir2', u'fname.jpg'] - + But if a file does exist, let's get one back with at uuid tacked on: >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg']) [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg'] """ + # Make sure we have a clean filepath to start with, since + # we'll be possibly tacking on stuff to the filename. + filepath = clean_listy_filepath(filepath) + if self.file_exists(filepath): return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])] else: return filepath + def get_local_path(self, filepath): + """ + If this is a local_storage implementation, give us a link to + the local filesystem reference to this file. + + >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg']) + u'/path/to/mounting/foo/bar/baz.jpg' + """ + # Subclasses should override this method, if applicable. + self.__raise_not_implemented() + + def copy_locally(self, filepath, dest_path): + """ + Copy this file locally. + + A basic working method for this is provided that should + function both for local_storage systems and remote storge + systems, but if more efficient systems for copying locally + apply to your system, override this method with something more + appropriate. + """ + if self.local_storage: + shutil.copy( + self.get_local_path(filepath), dest_path) + else: + with self.get_file(filepath, 'rb') as source_file: + with file(dest_path, 'wb') as dest_file: + dest_file.write(source_file.read()) + class BasicFileStorage(StorageInterface): """ Basic local filesystem implementation of storage API """ - def __init__(self, base_dir, serve_url=None): + local_storage = True + + def __init__(self, base_dir, base_url=None, **kwargs): """ Keyword arguments: - base_dir: Base directory things will be served out of. MUST be an absolute path. - - serve_url: URL files will be served from + - base_url: URL files will be served from """ self.base_dir = base_dir - self.serve_url = serve_url + self.base_url = base_url def _resolve_filepath(self, filepath): """ @@ -151,15 +194,305 @@ class BasicFileStorage(StorageInterface): """ return os.path.join( self.base_dir, *clean_listy_filepath(filepath)) - + def file_exists(self, filepath): return os.path.exists(self._resolve_filepath(filepath)) - def get_file(self, filepath, mode): - pass + def get_file(self, filepath, mode='r'): + # Make directories if necessary + if len(filepath) > 1: + directory = self._resolve_filepath(filepath[:-1]) + if not os.path.exists(directory): + os.makedirs(directory) + + # Grab and return the file in the mode specified + return open(self._resolve_filepath(filepath), mode) + + def delete_file(self, filepath): + # TODO: Also delete unused directories if empty (safely, with + # checks to avoid race conditions). + os.remove(self._resolve_filepath(filepath)) + + def file_url(self, filepath): + if not self.base_url: + raise NoWebServing( + "base_url not set, cannot provide file urls") + + return urlparse.urljoin( + self.base_url, + '/'.join(clean_listy_filepath(filepath))) + + def get_local_path(self, filepath): + return self._resolve_filepath(filepath) + + +class CloudFilesStorage(StorageInterface): + def __init__(self, **kwargs): + self.param_container = kwargs.get('cloudfiles_container') + self.param_user = kwargs.get('cloudfiles_user') + self.param_api_key = kwargs.get('cloudfiles_api_key') + self.param_host = kwargs.get('cloudfiles_host') + self.param_use_servicenet = kwargs.get('cloudfiles_use_servicenet') + + if not self.param_host: + print('No CloudFiles host URL specified, ' + 'defaulting to Rackspace US') + + self.connection = cloudfiles.get_connection( + username=self.param_user, + api_key=self.param_api_key, + servicenet=True if self.param_use_servicenet == 'true' or \ + self.param_use_servicenet == True else False) + + if not self.param_container == \ + self.connection.get_container(self.param_container): + self.container = self.connection.create_container( + self.param_container) + self.container.make_public( + ttl=60 * 60 * 2) + else: + self.container = self.connection.get_container( + self.param_container) + + self.container_uri = self.container.public_uri() + + def _resolve_filepath(self, filepath): + return '/'.join( + clean_listy_filepath(filepath)) + + def file_exists(self, filepath): + try: + object = self.container.get_object( + self._resolve_filepath(filepath)) + return True + except cloudfiles.errors.NoSuchObject: + return False + + def get_file(self, filepath, mode='r'): + try: + obj = self.container.get_object( + self._resolve_filepath(filepath)) + except cloudfiles.errors.NoSuchObject: + obj = self.container.create_object( + self._resolve_filepath(filepath)) + + return obj + + def delete_file(self, filepath): + # TODO: Also delete unused directories if empty (safely, with + # checks to avoid race conditions). + self.container.delete_object(filepath) + + def file_url(self, filepath): + return '/'.join([ + self.container_uri, + self._resolve_filepath(filepath)]) + + +class MountStorage(StorageInterface): + """ + Experimental "Mount" virtual Storage Interface + + This isn't an interface to some real storage, instead it's a + redirecting interface, that redirects requests to other + "StorageInterface"s. + + For example, say you have the paths: + + 1. ['user_data', 'cwebber', 'avatar.jpg'] + 2. ['user_data', 'elrond', 'avatar.jpg'] + 3. ['media_entries', '34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg'] + + You could mount media_entries under CloudFileStorage and user_data + under BasicFileStorage. Then 1 would be passed to + BasicFileStorage under the path ['cwebber', 'avatar.jpg'] and 3 + would be passed to CloudFileStorage under + ['34352f304c3f4d0ad8ad0f043522b6f2', 'thumb.jpg']. + + In other words, this is kind of like mounting /home/ and /etc/ + under different filesystems on your operating system... but with + mediagoblin filestorages :) + + To set this up, you currently need to call the mount() method with + the target path and a backend, that shall be available under that + target path. You have to mount things in a sensible order, + especially you can't mount ["a", "b"] before ["a"]. + """ + def __init__(self, **kwargs): + self.mounttab = {} + + def mount(self, dirpath, backend): + """ + Mount a new backend under dirpath + """ + new_ent = clean_listy_filepath(dirpath) + + print "Mounting:", repr(new_ent) + already, rem_1, table, rem_2 = self._resolve_to_backend(new_ent, True) + print "===", repr(already), repr(rem_1), repr(rem_2), len(table) + + assert (len(rem_2) > 0) or (None not in table), \ + "That path is already mounted" + assert (len(rem_2) > 0) or (len(table)==0), \ + "A longer path is already mounted here" + + for part in rem_2: + table[part] = {} + table = table[part] + table[None] = backend + + def _resolve_to_backend(self, filepath, extra_info = False): + """ + extra_info = True is for internal use! + + Normally, returns the backend and the filepath inside that backend. + + With extra_info = True it returns the last directory node and the + remaining filepath from there in addition. + """ + table = self.mounttab + filepath = filepath[:] + res_fp = None + while True: + new_be = table.get(None) + if (new_be is not None) or res_fp is None: + res_be = new_be + res_fp = filepath[:] + res_extra = (table, filepath[:]) + # print "... New res: %r, %r, %r" % (res_be, res_fp, res_extra) + if len(filepath) == 0: + break + query = filepath.pop(0) + entry = table.get(query) + if entry is not None: + table = entry + res_extra = (table, filepath[:]) + else: + break + if extra_info: + return (res_be, res_fp) + res_extra + else: + return (res_be, res_fp) + + def resolve_to_backend(self, filepath): + backend, filepath = self._resolve_to_backend(filepath) + if backend is None: + raise Error("Path not mounted") + return backend, filepath + + def __repr__(self, table = None, indent = []): + res = [] + if table is None: + res.append("MountStorage<") + table = self.mounttab + v = table.get(None) + if v: + res.append(" " * len(indent) + repr(indent) + ": " + repr(v)) + for k, v in table.iteritems(): + if k == None: + continue + res.append(" " * len(indent) + repr(k) + ":") + res += self.__repr__(v, indent + [k]) + if table is self.mounttab: + res.append(">") + return "\n".join(res) + else: + return res + + def file_exists(self, filepath): + backend, filepath = self.resolve_to_backend(filepath) + return backend.file_exists(filepath) + + def get_file(self, filepath, mode='r'): + backend, filepath = self.resolve_to_backend(filepath) + return backend.get_file(filepath, mode) def delete_file(self, filepath): - pass + backend, filepath = self.resolve_to_backend(filepath) + return backend.delete_file(filepath) + + def file_url(self, filepath): + backend, filepath = self.resolve_to_backend(filepath) + return backend.file_url(filepath) + + def get_local_path(self, filepath): + backend, filepath = self.resolve_to_backend(filepath) + return backend.get_local_path(filepath) + + def copy_locally(self, filepath, dest_path): + """ + Need to override copy_locally, because the local_storage + attribute is not correct. + """ + backend, filepath = self.resolve_to_backend(filepath) + backend.copy_locally(filepath, dest_path) + + +########### +# Utilities +########### + +def clean_listy_filepath(listy_filepath): + """ + Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and + clean out any nastiness from it. + + + >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg']) + [u'dir1', u'foo_.._nasty', u'linooks.jpg'] + + Args: + - listy_filepath: a list of filepath components, mediagoblin + storage API style. + + Returns: + A cleaned list of unicode objects. + """ + cleaned_filepath = [ + unicode(secure_filename(filepath)) + for filepath in listy_filepath] + + if u'' in cleaned_filepath: + raise InvalidFilepath( + "A filename component could not be resolved into a usable name.") + + return cleaned_filepath + + +def storage_system_from_config(config_section): + """ + Utility for setting up a storage system from a config section. + + Note that a special argument may be passed in to + the config_section which is "storage_class" which will provide an + import path to a storage system. This defaults to + "mediagoblin.storage:BasicFileStorage" if otherwise undefined. - def url_for_file(self, filepath): - pass + Arguments: + - config_section: dictionary of config parameters + + Returns: + An instantiated storage system. + + Example: + storage_system_from_config( + {'base_url': '/media/', + 'base_dir': '/var/whatever/media/'}) + + Will return: + BasicFileStorage( + base_url='/media/', + base_dir='/var/whatever/media') + """ + # This construct is needed, because dict(config) does + # not replace the variables in the config items. + config_params = dict(config_section.iteritems()) + + if 'storage_class' in config_params: + storage_class = config_params['storage_class'] + config_params.pop('storage_class') + else: + storage_class = "mediagoblin.storage:BasicFileStorage" + + storage_class = util.import_component(storage_class) + return storage_class(**config_params)