Merge remote branch 'remotes/gandaro/324-bad-media-types'
[mediagoblin.git] / mediagoblin / storage.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011 Free Software Foundation, Inc
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 import os
18 import re
19 import urlparse
20 import uuid
21
22 from werkzeug.utils import secure_filename
23
24 from mediagoblin import util
25
26 ########
27 # Errors
28 ########
29
30 class Error(Exception): pass
31 class InvalidFilepath(Error): pass
32 class NoWebServing(Error): pass
33
34 class NotImplementedError(Error): pass
35
36
37 ###############################################
38 # Storage interface & basic file implementation
39 ###############################################
40
41 class StorageInterface(object):
42 """
43 Interface for the storage API.
44
45 This interface doesn't actually provide behavior, but it defines
46 what kind of storage patterns subclasses should provide.
47
48 It is important to note that the storage API idea of a "filepath"
49 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
50 mind while reading method documentation.
51
52 You should set up your __init__ method with whatever keyword
53 arguments are appropriate to your storage system, but you should
54 also passively accept all extraneous keyword arguments like:
55
56 def __init__(self, **kwargs):
57 pass
58
59 See BasicFileStorage as a simple implementation of the
60 StorageInterface.
61 """
62
63 def __raise_not_implemented(self):
64 """
65 Raise a warning about some component not implemented by a
66 subclass of this interface.
67 """
68 raise NotImplementedError(
69 "This feature not implemented in this storage API implementation.")
70
71 def file_exists(self, filepath):
72 """
73 Return a boolean asserting whether or not file at filepath
74 exists in our storage system.
75
76 Returns:
77 True / False depending on whether file exists or not.
78 """
79 # Subclasses should override this method.
80 self.__raise_not_implemented()
81
82 def get_file(self, filepath, mode='r'):
83 """
84 Return a file-like object for reading/writing from this filepath.
85
86 Should create directories, buckets, whatever, as necessary.
87 """
88 # Subclasses should override this method.
89 self.__raise_not_implemented()
90
91 def delete_file(self, filepath):
92 """
93 Delete or dereference the file at filepath.
94
95 This might need to delete directories, buckets, whatever, for
96 cleanliness. (Be sure to avoid race conditions on that though)
97 """
98 # Subclasses should override this method.
99 self.__raise_not_implemented()
100
101 def file_url(self, filepath):
102 """
103 Get the URL for this file. This assumes our storage has been
104 mounted with some kind of URL which makes this possible.
105 """
106 # Subclasses should override this method.
107 self.__raise_not_implemented()
108
109 def get_unique_filepath(self, filepath):
110 """
111 If a filename at filepath already exists, generate a new name.
112
113 Eg, if the filename doesn't exist:
114 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
115 [u'dir1', u'dir2', u'fname.jpg']
116
117 But if a file does exist, let's get one back with at uuid tacked on:
118 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
119 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
120 """
121 # Make sure we have a clean filepath to start with, since
122 # we'll be possibly tacking on stuff to the filename.
123 filepath = clean_listy_filepath(filepath)
124
125 if self.file_exists(filepath):
126 return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
127 else:
128 return filepath
129
130
131 class BasicFileStorage(StorageInterface):
132 """
133 Basic local filesystem implementation of storage API
134 """
135
136 def __init__(self, base_dir, base_url=None, **kwargs):
137 """
138 Keyword arguments:
139 - base_dir: Base directory things will be served out of. MUST
140 be an absolute path.
141 - base_url: URL files will be served from
142 """
143 self.base_dir = base_dir
144 self.base_url = base_url
145
146 def _resolve_filepath(self, filepath):
147 """
148 Transform the given filepath into a local filesystem filepath.
149 """
150 return os.path.join(
151 self.base_dir, *clean_listy_filepath(filepath))
152
153 def file_exists(self, filepath):
154 return os.path.exists(self._resolve_filepath(filepath))
155
156 def get_file(self, filepath, mode='r'):
157 # Make directories if necessary
158 if len(filepath) > 1:
159 directory = self._resolve_filepath(filepath[:-1])
160 if not os.path.exists(directory):
161 os.makedirs(directory)
162
163 # Grab and return the file in the mode specified
164 return open(self._resolve_filepath(filepath), mode)
165
166 def delete_file(self, filepath):
167 # TODO: Also delete unused directories if empty (safely, with
168 # checks to avoid race conditions).
169 os.remove(self._resolve_filepath(filepath))
170
171 def file_url(self, filepath):
172 if not self.base_url:
173 raise NoWebServing(
174 "base_url not set, cannot provide file urls")
175
176 return urlparse.urljoin(
177 self.base_url,
178 '/'.join(clean_listy_filepath(filepath)))
179
180
181 ###########
182 # Utilities
183 ###########
184
185 def clean_listy_filepath(listy_filepath):
186 """
187 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
188 clean out any nastiness from it.
189
190 For example:
191 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
192 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
193
194 Args:
195 - listy_filepath: a list of filepath components, mediagoblin
196 storage API style.
197
198 Returns:
199 A cleaned list of unicode objects.
200 """
201 cleaned_filepath = [
202 unicode(secure_filename(filepath))
203 for filepath in listy_filepath]
204
205 if u'' in cleaned_filepath:
206 raise InvalidFilepath(
207 "A filename component could not be resolved into a usable name.")
208
209 return cleaned_filepath
210
211
212 def storage_system_from_paste_config(paste_config, storage_prefix):
213 """
214 Utility for setting up a storage system from the paste app config.
215
216 Note that a special argument may be passed in to the paste_config
217 which is "${storage_prefix}_storage_class" which will provide an
218 import path to a storage system. This defaults to
219 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
220
221 Arguments:
222 - paste_config: dictionary of config parameters
223 - storage_prefix: the storage system we're setting up / will be
224 getting keys/arguments from. For example 'publicstore' will
225 grab all arguments that are like 'publicstore_FOO'.
226
227 Returns:
228 An instantiated storage system.
229
230 Example:
231 storage_system_from_paste_config(
232 {'publicstore_base_url': '/media/',
233 'publicstore_base_dir': '/var/whatever/media/'},
234 'publicstore')
235
236 Will return:
237 BasicFileStorage(
238 base_url='/media/',
239 base_dir='/var/whatever/media')
240 """
241 prefix_re = re.compile('^%s_(.+)$' % re.escape(storage_prefix))
242
243 config_params = dict(
244 [(prefix_re.match(key).groups()[0], value)
245 for key, value in paste_config.iteritems()
246 if prefix_re.match(key)])
247
248 if config_params.has_key('storage_class'):
249 storage_class = config_params['storage_class']
250 config_params.pop('storage_class')
251 else:
252 storage_class = "mediagoblin.storage:BasicFileStorage"
253
254 storage_class = util.import_component(storage_class)
255 return storage_class(**config_params)