renaming storage_system_from_paste_config()->storage_system_from_config()
[mediagoblin.git] / mediagoblin / storage.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011 Free Software Foundation, Inc
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 import os
18 import re
19 import shutil
20 import urlparse
21 import uuid
22
23 from werkzeug.utils import secure_filename
24
25 from mediagoblin import util
26
27 ########
28 # Errors
29 ########
30
31 class Error(Exception): pass
32 class InvalidFilepath(Error): pass
33 class NoWebServing(Error): pass
34
35 class NotImplementedError(Error): pass
36
37
38 ###############################################
39 # Storage interface & basic file implementation
40 ###############################################
41
42 class StorageInterface(object):
43 """
44 Interface for the storage API.
45
46 This interface doesn't actually provide behavior, but it defines
47 what kind of storage patterns subclasses should provide.
48
49 It is important to note that the storage API idea of a "filepath"
50 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
51 mind while reading method documentation.
52
53 You should set up your __init__ method with whatever keyword
54 arguments are appropriate to your storage system, but you should
55 also passively accept all extraneous keyword arguments like:
56
57 def __init__(self, **kwargs):
58 pass
59
60 See BasicFileStorage as a simple implementation of the
61 StorageInterface.
62 """
63
64 # Whether this file store is on the local filesystem.
65 local_storage = False
66
67 def __raise_not_implemented(self):
68 """
69 Raise a warning about some component not implemented by a
70 subclass of this interface.
71 """
72 raise NotImplementedError(
73 "This feature not implemented in this storage API implementation.")
74
75 def file_exists(self, filepath):
76 """
77 Return a boolean asserting whether or not file at filepath
78 exists in our storage system.
79
80 Returns:
81 True / False depending on whether file exists or not.
82 """
83 # Subclasses should override this method.
84 self.__raise_not_implemented()
85
86 def get_file(self, filepath, mode='r'):
87 """
88 Return a file-like object for reading/writing from this filepath.
89
90 Should create directories, buckets, whatever, as necessary.
91 """
92 # Subclasses should override this method.
93 self.__raise_not_implemented()
94
95 def delete_file(self, filepath):
96 """
97 Delete or dereference the file at filepath.
98
99 This might need to delete directories, buckets, whatever, for
100 cleanliness. (Be sure to avoid race conditions on that though)
101 """
102 # Subclasses should override this method.
103 self.__raise_not_implemented()
104
105 def file_url(self, filepath):
106 """
107 Get the URL for this file. This assumes our storage has been
108 mounted with some kind of URL which makes this possible.
109 """
110 # Subclasses should override this method.
111 self.__raise_not_implemented()
112
113 def get_unique_filepath(self, filepath):
114 """
115 If a filename at filepath already exists, generate a new name.
116
117 Eg, if the filename doesn't exist:
118 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
119 [u'dir1', u'dir2', u'fname.jpg']
120
121 But if a file does exist, let's get one back with at uuid tacked on:
122 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
123 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
124 """
125 # Make sure we have a clean filepath to start with, since
126 # we'll be possibly tacking on stuff to the filename.
127 filepath = clean_listy_filepath(filepath)
128
129 if self.file_exists(filepath):
130 return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
131 else:
132 return filepath
133
134 def get_local_path(self, filepath):
135 """
136 If this is a local_storage implementation, give us a link to
137 the local filesystem reference to this file.
138
139 >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
140 u'/path/to/mounting/foo/bar/baz.jpg'
141 """
142 # Subclasses should override this method, if applicable.
143 self.__raise_not_implemented()
144
145 def copy_locally(self, filepath, dest_path):
146 """
147 Copy this file locally.
148
149 A basic working method for this is provided that should
150 function both for local_storage systems and remote storge
151 systems, but if more efficient systems for copying locally
152 apply to your system, override this method with something more
153 appropriate.
154 """
155 if self.local_storage:
156 shutil.copy(
157 self.get_local_path(filepath), dest_path)
158 else:
159 with self.get_file(filepath, 'rb') as source_file:
160 with file(dest_path, 'wb') as dest_file:
161 dest_file.write(source_file.read())
162
163
164 class BasicFileStorage(StorageInterface):
165 """
166 Basic local filesystem implementation of storage API
167 """
168
169 local_storage = True
170
171 def __init__(self, base_dir, base_url=None, **kwargs):
172 """
173 Keyword arguments:
174 - base_dir: Base directory things will be served out of. MUST
175 be an absolute path.
176 - base_url: URL files will be served from
177 """
178 self.base_dir = base_dir
179 self.base_url = base_url
180
181 def _resolve_filepath(self, filepath):
182 """
183 Transform the given filepath into a local filesystem filepath.
184 """
185 return os.path.join(
186 self.base_dir, *clean_listy_filepath(filepath))
187
188 def file_exists(self, filepath):
189 return os.path.exists(self._resolve_filepath(filepath))
190
191 def get_file(self, filepath, mode='r'):
192 # Make directories if necessary
193 if len(filepath) > 1:
194 directory = self._resolve_filepath(filepath[:-1])
195 if not os.path.exists(directory):
196 os.makedirs(directory)
197
198 # Grab and return the file in the mode specified
199 return open(self._resolve_filepath(filepath), mode)
200
201 def delete_file(self, filepath):
202 # TODO: Also delete unused directories if empty (safely, with
203 # checks to avoid race conditions).
204 os.remove(self._resolve_filepath(filepath))
205
206 def file_url(self, filepath):
207 if not self.base_url:
208 raise NoWebServing(
209 "base_url not set, cannot provide file urls")
210
211 return urlparse.urljoin(
212 self.base_url,
213 '/'.join(clean_listy_filepath(filepath)))
214
215 def get_local_path(self, filepath):
216 return self._resolve_filepath(filepath)
217
218
219 ###########
220 # Utilities
221 ###########
222
223 def clean_listy_filepath(listy_filepath):
224 """
225 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
226 clean out any nastiness from it.
227
228
229 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
230 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
231
232 Args:
233 - listy_filepath: a list of filepath components, mediagoblin
234 storage API style.
235
236 Returns:
237 A cleaned list of unicode objects.
238 """
239 cleaned_filepath = [
240 unicode(secure_filename(filepath))
241 for filepath in listy_filepath]
242
243 if u'' in cleaned_filepath:
244 raise InvalidFilepath(
245 "A filename component could not be resolved into a usable name.")
246
247 return cleaned_filepath
248
249
250 def storage_system_from_config(paste_config, storage_prefix):
251 """
252 Utility for setting up a storage system from the paste app config.
253
254 Note that a special argument may be passed in to the paste_config
255 which is "${storage_prefix}_storage_class" which will provide an
256 import path to a storage system. This defaults to
257 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
258
259 Arguments:
260 - paste_config: dictionary of config parameters
261 - storage_prefix: the storage system we're setting up / will be
262 getting keys/arguments from. For example 'publicstore' will
263 grab all arguments that are like 'publicstore_FOO'.
264
265 Returns:
266 An instantiated storage system.
267
268 Example:
269 storage_system_from_config(
270 {'publicstore_base_url': '/media/',
271 'publicstore_base_dir': '/var/whatever/media/'},
272 'publicstore')
273
274 Will return:
275 BasicFileStorage(
276 base_url='/media/',
277 base_dir='/var/whatever/media')
278 """
279 prefix_re = re.compile('^%s_(.+)$' % re.escape(storage_prefix))
280
281 config_params = dict(
282 [(prefix_re.match(key).groups()[0], value)
283 for key, value in paste_config.iteritems()
284 if prefix_re.match(key)])
285
286 if config_params.has_key('storage_class'):
287 storage_class = config_params['storage_class']
288 config_params.pop('storage_class')
289 else:
290 storage_class = "mediagoblin.storage:BasicFileStorage"
291
292 storage_class = util.import_component(storage_class)
293 return storage_class(**config_params)
294
295