Commit | Line | Data |
---|---|---|
8e1e744d | 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
a6b378ef CAW |
2 | # Copyright (C) 2011 Free Software Foundation, Inc |
3 | # | |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU Affero General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU Affero General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
16 | ||
779f2b94 | 17 | import os |
ffa22935 | 18 | import re |
6a07362d | 19 | import shutil |
f61a41b8 | 20 | import urlparse |
2fdec827 | 21 | import uuid |
a6b378ef CAW |
22 | |
23 | from werkzeug.utils import secure_filename | |
24 | ||
ffa22935 CAW |
25 | from mediagoblin import util |
26 | ||
d807b725 CAW |
27 | ######## |
28 | # Errors | |
29 | ######## | |
a6b378ef | 30 | |
770c12be CAW |
31 | class Error(Exception): pass |
32 | class InvalidFilepath(Error): pass | |
b1bb050b | 33 | class NoWebServing(Error): pass |
770c12be | 34 | |
797be93c CAW |
35 | class NotImplementedError(Error): pass |
36 | ||
770c12be | 37 | |
d807b725 CAW |
38 | ############################################### |
39 | # Storage interface & basic file implementation | |
40 | ############################################### | |
a6b378ef | 41 | |
797be93c CAW |
42 | class StorageInterface(object): |
43 | """ | |
44 | Interface for the storage API. | |
45 | ||
46 | This interface doesn't actually provide behavior, but it defines | |
47 | what kind of storage patterns subclasses should provide. | |
48 | ||
49 | It is important to note that the storage API idea of a "filepath" | |
50 | is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in | |
51 | mind while reading method documentation. | |
5afb9227 CAW |
52 | |
53 | You should set up your __init__ method with whatever keyword | |
54 | arguments are appropriate to your storage system, but you should | |
55 | also passively accept all extraneous keyword arguments like: | |
56 | ||
57 | def __init__(self, **kwargs): | |
58 | pass | |
59 | ||
60 | See BasicFileStorage as a simple implementation of the | |
61 | StorageInterface. | |
797be93c | 62 | """ |
797be93c | 63 | |
3a89c23e CAW |
64 | # Whether this file store is on the local filesystem. |
65 | local_storage = False | |
66 | ||
797be93c CAW |
67 | def __raise_not_implemented(self): |
68 | """ | |
69 | Raise a warning about some component not implemented by a | |
70 | subclass of this interface. | |
71 | """ | |
72 | raise NotImplementedError( | |
73 | "This feature not implemented in this storage API implementation.") | |
74 | ||
75 | def file_exists(self, filepath): | |
76 | """ | |
77 | Return a boolean asserting whether or not file at filepath | |
78 | exists in our storage system. | |
79 | ||
80 | Returns: | |
81 | True / False depending on whether file exists or not. | |
82 | """ | |
83 | # Subclasses should override this method. | |
84 | self.__raise_not_implemented() | |
85 | ||
cee7a1c1 | 86 | def get_file(self, filepath, mode='r'): |
b0de01cf CAW |
87 | """ |
88 | Return a file-like object for reading/writing from this filepath. | |
89 | ||
90 | Should create directories, buckets, whatever, as necessary. | |
91 | """ | |
0b9cf289 CAW |
92 | # Subclasses should override this method. |
93 | self.__raise_not_implemented() | |
94 | ||
95 | def delete_file(self, filepath): | |
b0de01cf CAW |
96 | """ |
97 | Delete or dereference the file at filepath. | |
98 | ||
99 | This might need to delete directories, buckets, whatever, for | |
100 | cleanliness. (Be sure to avoid race conditions on that though) | |
101 | """ | |
0b9cf289 CAW |
102 | # Subclasses should override this method. |
103 | self.__raise_not_implemented() | |
104 | ||
f61a41b8 | 105 | def file_url(self, filepath): |
644614d4 CAW |
106 | """ |
107 | Get the URL for this file. This assumes our storage has been | |
108 | mounted with some kind of URL which makes this possible. | |
109 | """ | |
110 | # Subclasses should override this method. | |
111 | self.__raise_not_implemented() | |
112 | ||
2d1a6073 | 113 | def get_unique_filepath(self, filepath): |
797be93c CAW |
114 | """ |
115 | If a filename at filepath already exists, generate a new name. | |
116 | ||
117 | Eg, if the filename doesn't exist: | |
118 | >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg']) | |
119 | [u'dir1', u'dir2', u'fname.jpg'] | |
120 | ||
121 | But if a file does exist, let's get one back with at uuid tacked on: | |
122 | >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg']) | |
ef10e3a2 | 123 | [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg'] |
797be93c | 124 | """ |
b0bfb766 CAW |
125 | # Make sure we have a clean filepath to start with, since |
126 | # we'll be possibly tacking on stuff to the filename. | |
127 | filepath = clean_listy_filepath(filepath) | |
128 | ||
0b9cf289 CAW |
129 | if self.file_exists(filepath): |
130 | return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])] | |
131 | else: | |
132 | return filepath | |
779f2b94 | 133 | |
3a89c23e CAW |
134 | def get_local_path(self, filepath): |
135 | """ | |
136 | If this is a local_storage implementation, give us a link to | |
137 | the local filesystem reference to this file. | |
138 | ||
139 | >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg']) | |
140 | u'/path/to/mounting/foo/bar/baz.jpg' | |
141 | """ | |
142 | # Subclasses should override this method, if applicable. | |
143 | self.__raise_not_implemented() | |
144 | ||
6a07362d CAW |
145 | def copy_locally(self, filepath, dest_path): |
146 | """ | |
147 | Copy this file locally. | |
148 | ||
149 | A basic working method for this is provided that should | |
150 | function both for local_storage systems and remote storge | |
151 | systems, but if more efficient systems for copying locally | |
152 | apply to your system, override this method with something more | |
153 | appropriate. | |
154 | """ | |
155 | if self.local_storage: | |
156 | shutil.copy( | |
157 | self.get_local_path(filepath), dest_path) | |
158 | else: | |
159 | with self.get_file(filepath, 'rb') as source_file: | |
160 | with file(dest_path, 'wb') as dest_file: | |
161 | dest_file.write(source_file.read()) | |
162 | ||
779f2b94 CAW |
163 | |
164 | class BasicFileStorage(StorageInterface): | |
165 | """ | |
166 | Basic local filesystem implementation of storage API | |
167 | """ | |
168 | ||
3a89c23e CAW |
169 | local_storage = True |
170 | ||
5afb9227 | 171 | def __init__(self, base_dir, base_url=None, **kwargs): |
779f2b94 CAW |
172 | """ |
173 | Keyword arguments: | |
174 | - base_dir: Base directory things will be served out of. MUST | |
175 | be an absolute path. | |
b1bb050b | 176 | - base_url: URL files will be served from |
779f2b94 CAW |
177 | """ |
178 | self.base_dir = base_dir | |
b1bb050b | 179 | self.base_url = base_url |
779f2b94 CAW |
180 | |
181 | def _resolve_filepath(self, filepath): | |
182 | """ | |
183 | Transform the given filepath into a local filesystem filepath. | |
184 | """ | |
185 | return os.path.join( | |
186 | self.base_dir, *clean_listy_filepath(filepath)) | |
187 | ||
779f2b94 CAW |
188 | def file_exists(self, filepath): |
189 | return os.path.exists(self._resolve_filepath(filepath)) | |
190 | ||
cee7a1c1 CAW |
191 | def get_file(self, filepath, mode='r'): |
192 | # Make directories if necessary | |
193 | if len(filepath) > 1: | |
194 | directory = self._resolve_filepath(filepath[:-1]) | |
d0e3a534 | 195 | if not os.path.exists(directory): |
cee7a1c1 CAW |
196 | os.makedirs(directory) |
197 | ||
198 | # Grab and return the file in the mode specified | |
199 | return open(self._resolve_filepath(filepath), mode) | |
200 | ||
779f2b94 | 201 | def delete_file(self, filepath): |
b1bb050b CAW |
202 | # TODO: Also delete unused directories if empty (safely, with |
203 | # checks to avoid race conditions). | |
204 | os.remove(self._resolve_filepath(filepath)) | |
644614d4 | 205 | |
f61a41b8 | 206 | def file_url(self, filepath): |
b1bb050b CAW |
207 | if not self.base_url: |
208 | raise NoWebServing( | |
209 | "base_url not set, cannot provide file urls") | |
210 | ||
211 | return urlparse.urljoin( | |
212 | self.base_url, | |
213 | '/'.join(clean_listy_filepath(filepath))) | |
ffa22935 | 214 | |
3a89c23e CAW |
215 | def get_local_path(self, filepath): |
216 | return self._resolve_filepath(filepath) | |
217 | ||
ffa22935 | 218 | |
d807b725 CAW |
219 | ########### |
220 | # Utilities | |
221 | ########### | |
222 | ||
223 | def clean_listy_filepath(listy_filepath): | |
224 | """ | |
225 | Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and | |
226 | clean out any nastiness from it. | |
227 | ||
3a89c23e | 228 | |
d807b725 CAW |
229 | >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg']) |
230 | [u'dir1', u'foo_.._nasty', u'linooks.jpg'] | |
231 | ||
232 | Args: | |
233 | - listy_filepath: a list of filepath components, mediagoblin | |
234 | storage API style. | |
235 | ||
236 | Returns: | |
237 | A cleaned list of unicode objects. | |
238 | """ | |
239 | cleaned_filepath = [ | |
240 | unicode(secure_filename(filepath)) | |
241 | for filepath in listy_filepath] | |
242 | ||
243 | if u'' in cleaned_filepath: | |
244 | raise InvalidFilepath( | |
245 | "A filename component could not be resolved into a usable name.") | |
246 | ||
247 | return cleaned_filepath | |
248 | ||
249 | ||
3c7d11ff | 250 | def storage_system_from_config(paste_config, storage_prefix): |
ffa22935 CAW |
251 | """ |
252 | Utility for setting up a storage system from the paste app config. | |
253 | ||
254 | Note that a special argument may be passed in to the paste_config | |
255 | which is "${storage_prefix}_storage_class" which will provide an | |
256 | import path to a storage system. This defaults to | |
257 | "mediagoblin.storage:BasicFileStorage" if otherwise undefined. | |
258 | ||
259 | Arguments: | |
260 | - paste_config: dictionary of config parameters | |
261 | - storage_prefix: the storage system we're setting up / will be | |
262 | getting keys/arguments from. For example 'publicstore' will | |
263 | grab all arguments that are like 'publicstore_FOO'. | |
264 | ||
265 | Returns: | |
266 | An instantiated storage system. | |
267 | ||
268 | Example: | |
3c7d11ff | 269 | storage_system_from_config( |
ffa22935 CAW |
270 | {'publicstore_base_url': '/media/', |
271 | 'publicstore_base_dir': '/var/whatever/media/'}, | |
272 | 'publicstore') | |
273 | ||
274 | Will return: | |
275 | BasicFileStorage( | |
276 | base_url='/media/', | |
277 | base_dir='/var/whatever/media') | |
278 | """ | |
279 | prefix_re = re.compile('^%s_(.+)$' % re.escape(storage_prefix)) | |
280 | ||
281 | config_params = dict( | |
282 | [(prefix_re.match(key).groups()[0], value) | |
283 | for key, value in paste_config.iteritems() | |
284 | if prefix_re.match(key)]) | |
285 | ||
286 | if config_params.has_key('storage_class'): | |
287 | storage_class = config_params['storage_class'] | |
288 | config_params.pop('storage_class') | |
289 | else: | |
290 | storage_class = "mediagoblin.storage:BasicFileStorage" | |
291 | ||
292 | storage_class = util.import_component(storage_class) | |
293 | return storage_class(**config_params) | |
6a07362d CAW |
294 | |
295 |