| 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
| 2 | # Copyright (C) 2011 Free Software Foundation, Inc |
| 3 | # |
| 4 | # This program is free software: you can redistribute it and/or modify |
| 5 | # it under the terms of the GNU Affero General Public License as published by |
| 6 | # the Free Software Foundation, either version 3 of the License, or |
| 7 | # (at your option) any later version. |
| 8 | # |
| 9 | # This program is distributed in the hope that it will be useful, |
| 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | # GNU Affero General Public License for more details. |
| 13 | # |
| 14 | # You should have received a copy of the GNU Affero General Public License |
| 15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 16 | |
| 17 | import os |
| 18 | import re |
| 19 | import urlparse |
| 20 | import uuid |
| 21 | |
| 22 | from werkzeug.utils import secure_filename |
| 23 | |
| 24 | from mediagoblin import util |
| 25 | |
| 26 | ######## |
| 27 | # Errors |
| 28 | ######## |
| 29 | |
| 30 | class Error(Exception): pass |
| 31 | class InvalidFilepath(Error): pass |
| 32 | class NoWebServing(Error): pass |
| 33 | |
| 34 | class NotImplementedError(Error): pass |
| 35 | |
| 36 | |
| 37 | ############################################### |
| 38 | # Storage interface & basic file implementation |
| 39 | ############################################### |
| 40 | |
| 41 | class StorageInterface(object): |
| 42 | """ |
| 43 | Interface for the storage API. |
| 44 | |
| 45 | This interface doesn't actually provide behavior, but it defines |
| 46 | what kind of storage patterns subclasses should provide. |
| 47 | |
| 48 | It is important to note that the storage API idea of a "filepath" |
| 49 | is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in |
| 50 | mind while reading method documentation. |
| 51 | |
| 52 | You should set up your __init__ method with whatever keyword |
| 53 | arguments are appropriate to your storage system, but you should |
| 54 | also passively accept all extraneous keyword arguments like: |
| 55 | |
| 56 | def __init__(self, **kwargs): |
| 57 | pass |
| 58 | |
| 59 | See BasicFileStorage as a simple implementation of the |
| 60 | StorageInterface. |
| 61 | """ |
| 62 | |
| 63 | def __raise_not_implemented(self): |
| 64 | """ |
| 65 | Raise a warning about some component not implemented by a |
| 66 | subclass of this interface. |
| 67 | """ |
| 68 | raise NotImplementedError( |
| 69 | "This feature not implemented in this storage API implementation.") |
| 70 | |
| 71 | def file_exists(self, filepath): |
| 72 | """ |
| 73 | Return a boolean asserting whether or not file at filepath |
| 74 | exists in our storage system. |
| 75 | |
| 76 | Returns: |
| 77 | True / False depending on whether file exists or not. |
| 78 | """ |
| 79 | # Subclasses should override this method. |
| 80 | self.__raise_not_implemented() |
| 81 | |
| 82 | def get_file(self, filepath, mode='r'): |
| 83 | """ |
| 84 | Return a file-like object for reading/writing from this filepath. |
| 85 | |
| 86 | Should create directories, buckets, whatever, as necessary. |
| 87 | """ |
| 88 | # Subclasses should override this method. |
| 89 | self.__raise_not_implemented() |
| 90 | |
| 91 | def delete_file(self, filepath): |
| 92 | """ |
| 93 | Delete or dereference the file at filepath. |
| 94 | |
| 95 | This might need to delete directories, buckets, whatever, for |
| 96 | cleanliness. (Be sure to avoid race conditions on that though) |
| 97 | """ |
| 98 | # Subclasses should override this method. |
| 99 | self.__raise_not_implemented() |
| 100 | |
| 101 | def file_url(self, filepath): |
| 102 | """ |
| 103 | Get the URL for this file. This assumes our storage has been |
| 104 | mounted with some kind of URL which makes this possible. |
| 105 | """ |
| 106 | # Subclasses should override this method. |
| 107 | self.__raise_not_implemented() |
| 108 | |
| 109 | def get_unique_filepath(self, filepath): |
| 110 | """ |
| 111 | If a filename at filepath already exists, generate a new name. |
| 112 | |
| 113 | Eg, if the filename doesn't exist: |
| 114 | >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg']) |
| 115 | [u'dir1', u'dir2', u'fname.jpg'] |
| 116 | |
| 117 | But if a file does exist, let's get one back with at uuid tacked on: |
| 118 | >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg']) |
| 119 | [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg'] |
| 120 | """ |
| 121 | # Make sure we have a clean filepath to start with, since |
| 122 | # we'll be possibly tacking on stuff to the filename. |
| 123 | filepath = clean_listy_filepath(filepath) |
| 124 | |
| 125 | if self.file_exists(filepath): |
| 126 | return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])] |
| 127 | else: |
| 128 | return filepath |
| 129 | |
| 130 | |
| 131 | class BasicFileStorage(StorageInterface): |
| 132 | """ |
| 133 | Basic local filesystem implementation of storage API |
| 134 | """ |
| 135 | |
| 136 | def __init__(self, base_dir, base_url=None, **kwargs): |
| 137 | """ |
| 138 | Keyword arguments: |
| 139 | - base_dir: Base directory things will be served out of. MUST |
| 140 | be an absolute path. |
| 141 | - base_url: URL files will be served from |
| 142 | """ |
| 143 | self.base_dir = base_dir |
| 144 | self.base_url = base_url |
| 145 | |
| 146 | def _resolve_filepath(self, filepath): |
| 147 | """ |
| 148 | Transform the given filepath into a local filesystem filepath. |
| 149 | """ |
| 150 | return os.path.join( |
| 151 | self.base_dir, *clean_listy_filepath(filepath)) |
| 152 | |
| 153 | def file_exists(self, filepath): |
| 154 | return os.path.exists(self._resolve_filepath(filepath)) |
| 155 | |
| 156 | def get_file(self, filepath, mode='r'): |
| 157 | # Make directories if necessary |
| 158 | if len(filepath) > 1: |
| 159 | directory = self._resolve_filepath(filepath[:-1]) |
| 160 | if not os.path.exists(directory): |
| 161 | os.makedirs(directory) |
| 162 | |
| 163 | # Grab and return the file in the mode specified |
| 164 | return open(self._resolve_filepath(filepath), mode) |
| 165 | |
| 166 | def delete_file(self, filepath): |
| 167 | # TODO: Also delete unused directories if empty (safely, with |
| 168 | # checks to avoid race conditions). |
| 169 | os.remove(self._resolve_filepath(filepath)) |
| 170 | |
| 171 | def file_url(self, filepath): |
| 172 | if not self.base_url: |
| 173 | raise NoWebServing( |
| 174 | "base_url not set, cannot provide file urls") |
| 175 | |
| 176 | return urlparse.urljoin( |
| 177 | self.base_url, |
| 178 | '/'.join(clean_listy_filepath(filepath))) |
| 179 | |
| 180 | |
| 181 | ########### |
| 182 | # Utilities |
| 183 | ########### |
| 184 | |
| 185 | def clean_listy_filepath(listy_filepath): |
| 186 | """ |
| 187 | Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and |
| 188 | clean out any nastiness from it. |
| 189 | |
| 190 | For example: |
| 191 | >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg']) |
| 192 | [u'dir1', u'foo_.._nasty', u'linooks.jpg'] |
| 193 | |
| 194 | Args: |
| 195 | - listy_filepath: a list of filepath components, mediagoblin |
| 196 | storage API style. |
| 197 | |
| 198 | Returns: |
| 199 | A cleaned list of unicode objects. |
| 200 | """ |
| 201 | cleaned_filepath = [ |
| 202 | unicode(secure_filename(filepath)) |
| 203 | for filepath in listy_filepath] |
| 204 | |
| 205 | if u'' in cleaned_filepath: |
| 206 | raise InvalidFilepath( |
| 207 | "A filename component could not be resolved into a usable name.") |
| 208 | |
| 209 | return cleaned_filepath |
| 210 | |
| 211 | |
| 212 | def storage_system_from_paste_config(paste_config, storage_prefix): |
| 213 | """ |
| 214 | Utility for setting up a storage system from the paste app config. |
| 215 | |
| 216 | Note that a special argument may be passed in to the paste_config |
| 217 | which is "${storage_prefix}_storage_class" which will provide an |
| 218 | import path to a storage system. This defaults to |
| 219 | "mediagoblin.storage:BasicFileStorage" if otherwise undefined. |
| 220 | |
| 221 | Arguments: |
| 222 | - paste_config: dictionary of config parameters |
| 223 | - storage_prefix: the storage system we're setting up / will be |
| 224 | getting keys/arguments from. For example 'publicstore' will |
| 225 | grab all arguments that are like 'publicstore_FOO'. |
| 226 | |
| 227 | Returns: |
| 228 | An instantiated storage system. |
| 229 | |
| 230 | Example: |
| 231 | storage_system_from_paste_config( |
| 232 | {'publicstore_base_url': '/media/', |
| 233 | 'publicstore_base_dir': '/var/whatever/media/'}, |
| 234 | 'publicstore') |
| 235 | |
| 236 | Will return: |
| 237 | BasicFileStorage( |
| 238 | base_url='/media/', |
| 239 | base_dir='/var/whatever/media') |
| 240 | """ |
| 241 | prefix_re = re.compile('^%s_(.+)$' % re.escape(storage_prefix)) |
| 242 | |
| 243 | config_params = dict( |
| 244 | [(prefix_re.match(key).groups()[0], value) |
| 245 | for key, value in paste_config.iteritems() |
| 246 | if prefix_re.match(key)]) |
| 247 | |
| 248 | if config_params.has_key('storage_class'): |
| 249 | storage_class = config_params['storage_class'] |
| 250 | config_params.pop('storage_class') |
| 251 | else: |
| 252 | storage_class = "mediagoblin.storage:BasicFileStorage" |
| 253 | |
| 254 | storage_class = util.import_component(storage_class) |
| 255 | return storage_class(**config_params) |