Document the db submodule a bit
[mediagoblin.git] / mediagoblin / storage.py
CommitLineData
8e1e744d 1# GNU MediaGoblin -- federated, autonomous media hosting
a6b378ef
CAW
2# Copyright (C) 2011 Free Software Foundation, Inc
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
779f2b94 17import os
ffa22935 18import re
6a07362d 19import shutil
f61a41b8 20import urlparse
2fdec827 21import uuid
a6b378ef
CAW
22
23from werkzeug.utils import secure_filename
24
ffa22935
CAW
25from mediagoblin import util
26
d807b725
CAW
27########
28# Errors
29########
a6b378ef 30
770c12be
CAW
31class Error(Exception): pass
32class InvalidFilepath(Error): pass
b1bb050b 33class NoWebServing(Error): pass
770c12be 34
797be93c
CAW
35class NotImplementedError(Error): pass
36
770c12be 37
d807b725
CAW
38###############################################
39# Storage interface & basic file implementation
40###############################################
a6b378ef 41
797be93c
CAW
42class StorageInterface(object):
43 """
44 Interface for the storage API.
45
46 This interface doesn't actually provide behavior, but it defines
47 what kind of storage patterns subclasses should provide.
48
49 It is important to note that the storage API idea of a "filepath"
50 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
51 mind while reading method documentation.
5afb9227
CAW
52
53 You should set up your __init__ method with whatever keyword
54 arguments are appropriate to your storage system, but you should
55 also passively accept all extraneous keyword arguments like:
56
57 def __init__(self, **kwargs):
58 pass
59
60 See BasicFileStorage as a simple implementation of the
61 StorageInterface.
797be93c 62 """
797be93c 63
3a89c23e
CAW
64 # Whether this file store is on the local filesystem.
65 local_storage = False
66
797be93c
CAW
67 def __raise_not_implemented(self):
68 """
69 Raise a warning about some component not implemented by a
70 subclass of this interface.
71 """
72 raise NotImplementedError(
73 "This feature not implemented in this storage API implementation.")
74
75 def file_exists(self, filepath):
76 """
77 Return a boolean asserting whether or not file at filepath
78 exists in our storage system.
79
80 Returns:
81 True / False depending on whether file exists or not.
82 """
83 # Subclasses should override this method.
84 self.__raise_not_implemented()
85
cee7a1c1 86 def get_file(self, filepath, mode='r'):
b0de01cf
CAW
87 """
88 Return a file-like object for reading/writing from this filepath.
89
90 Should create directories, buckets, whatever, as necessary.
91 """
0b9cf289
CAW
92 # Subclasses should override this method.
93 self.__raise_not_implemented()
94
95 def delete_file(self, filepath):
b0de01cf
CAW
96 """
97 Delete or dereference the file at filepath.
98
99 This might need to delete directories, buckets, whatever, for
100 cleanliness. (Be sure to avoid race conditions on that though)
101 """
0b9cf289
CAW
102 # Subclasses should override this method.
103 self.__raise_not_implemented()
104
f61a41b8 105 def file_url(self, filepath):
644614d4
CAW
106 """
107 Get the URL for this file. This assumes our storage has been
108 mounted with some kind of URL which makes this possible.
109 """
110 # Subclasses should override this method.
111 self.__raise_not_implemented()
112
2d1a6073 113 def get_unique_filepath(self, filepath):
797be93c
CAW
114 """
115 If a filename at filepath already exists, generate a new name.
116
117 Eg, if the filename doesn't exist:
118 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
119 [u'dir1', u'dir2', u'fname.jpg']
120
121 But if a file does exist, let's get one back with at uuid tacked on:
122 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
ef10e3a2 123 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
797be93c 124 """
b0bfb766
CAW
125 # Make sure we have a clean filepath to start with, since
126 # we'll be possibly tacking on stuff to the filename.
127 filepath = clean_listy_filepath(filepath)
128
0b9cf289
CAW
129 if self.file_exists(filepath):
130 return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
131 else:
132 return filepath
779f2b94 133
3a89c23e
CAW
134 def get_local_path(self, filepath):
135 """
136 If this is a local_storage implementation, give us a link to
137 the local filesystem reference to this file.
138
139 >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
140 u'/path/to/mounting/foo/bar/baz.jpg'
141 """
142 # Subclasses should override this method, if applicable.
143 self.__raise_not_implemented()
144
6a07362d
CAW
145 def copy_locally(self, filepath, dest_path):
146 """
147 Copy this file locally.
148
149 A basic working method for this is provided that should
150 function both for local_storage systems and remote storge
151 systems, but if more efficient systems for copying locally
152 apply to your system, override this method with something more
153 appropriate.
154 """
155 if self.local_storage:
156 shutil.copy(
157 self.get_local_path(filepath), dest_path)
158 else:
159 with self.get_file(filepath, 'rb') as source_file:
160 with file(dest_path, 'wb') as dest_file:
161 dest_file.write(source_file.read())
162
779f2b94
CAW
163
164class BasicFileStorage(StorageInterface):
165 """
166 Basic local filesystem implementation of storage API
167 """
168
3a89c23e
CAW
169 local_storage = True
170
5afb9227 171 def __init__(self, base_dir, base_url=None, **kwargs):
779f2b94
CAW
172 """
173 Keyword arguments:
174 - base_dir: Base directory things will be served out of. MUST
175 be an absolute path.
b1bb050b 176 - base_url: URL files will be served from
779f2b94
CAW
177 """
178 self.base_dir = base_dir
b1bb050b 179 self.base_url = base_url
779f2b94
CAW
180
181 def _resolve_filepath(self, filepath):
182 """
183 Transform the given filepath into a local filesystem filepath.
184 """
185 return os.path.join(
186 self.base_dir, *clean_listy_filepath(filepath))
187
779f2b94
CAW
188 def file_exists(self, filepath):
189 return os.path.exists(self._resolve_filepath(filepath))
190
cee7a1c1
CAW
191 def get_file(self, filepath, mode='r'):
192 # Make directories if necessary
193 if len(filepath) > 1:
194 directory = self._resolve_filepath(filepath[:-1])
d0e3a534 195 if not os.path.exists(directory):
cee7a1c1
CAW
196 os.makedirs(directory)
197
198 # Grab and return the file in the mode specified
199 return open(self._resolve_filepath(filepath), mode)
200
779f2b94 201 def delete_file(self, filepath):
b1bb050b
CAW
202 # TODO: Also delete unused directories if empty (safely, with
203 # checks to avoid race conditions).
204 os.remove(self._resolve_filepath(filepath))
644614d4 205
f61a41b8 206 def file_url(self, filepath):
b1bb050b
CAW
207 if not self.base_url:
208 raise NoWebServing(
209 "base_url not set, cannot provide file urls")
210
211 return urlparse.urljoin(
212 self.base_url,
213 '/'.join(clean_listy_filepath(filepath)))
ffa22935 214
3a89c23e
CAW
215 def get_local_path(self, filepath):
216 return self._resolve_filepath(filepath)
217
ffa22935 218
d807b725
CAW
219###########
220# Utilities
221###########
222
223def clean_listy_filepath(listy_filepath):
224 """
225 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
226 clean out any nastiness from it.
227
3a89c23e 228
d807b725
CAW
229 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
230 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
231
232 Args:
233 - listy_filepath: a list of filepath components, mediagoblin
234 storage API style.
235
236 Returns:
237 A cleaned list of unicode objects.
238 """
239 cleaned_filepath = [
240 unicode(secure_filename(filepath))
241 for filepath in listy_filepath]
242
243 if u'' in cleaned_filepath:
244 raise InvalidFilepath(
245 "A filename component could not be resolved into a usable name.")
246
247 return cleaned_filepath
248
249
3c7d11ff 250def storage_system_from_config(paste_config, storage_prefix):
ffa22935
CAW
251 """
252 Utility for setting up a storage system from the paste app config.
253
254 Note that a special argument may be passed in to the paste_config
255 which is "${storage_prefix}_storage_class" which will provide an
256 import path to a storage system. This defaults to
257 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
258
259 Arguments:
260 - paste_config: dictionary of config parameters
261 - storage_prefix: the storage system we're setting up / will be
262 getting keys/arguments from. For example 'publicstore' will
263 grab all arguments that are like 'publicstore_FOO'.
264
265 Returns:
266 An instantiated storage system.
267
268 Example:
3c7d11ff 269 storage_system_from_config(
ffa22935
CAW
270 {'publicstore_base_url': '/media/',
271 'publicstore_base_dir': '/var/whatever/media/'},
272 'publicstore')
273
274 Will return:
275 BasicFileStorage(
276 base_url='/media/',
277 base_dir='/var/whatever/media')
278 """
279 prefix_re = re.compile('^%s_(.+)$' % re.escape(storage_prefix))
280
281 config_params = dict(
282 [(prefix_re.match(key).groups()[0], value)
283 for key, value in paste_config.iteritems()
284 if prefix_re.match(key)])
285
286 if config_params.has_key('storage_class'):
287 storage_class = config_params['storage_class']
288 config_params.pop('storage_class')
289 else:
290 storage_class = "mediagoblin.storage:BasicFileStorage"
291
292 storage_class = util.import_component(storage_class)
293 return storage_class(**config_params)
6a07362d
CAW
294
295