Modified logo to change on :hover
[mediagoblin.git] / mediagoblin / storage.py
CommitLineData
8e1e744d 1# GNU MediaGoblin -- federated, autonomous media hosting
a6b378ef
CAW
2# Copyright (C) 2011 Free Software Foundation, Inc
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
779f2b94 17import os
ffa22935 18import re
f61a41b8 19import urlparse
2fdec827 20import uuid
a6b378ef
CAW
21
22from werkzeug.utils import secure_filename
23
ffa22935
CAW
24from mediagoblin import util
25
d807b725
CAW
26########
27# Errors
28########
a6b378ef 29
770c12be
CAW
30class Error(Exception): pass
31class InvalidFilepath(Error): pass
b1bb050b 32class NoWebServing(Error): pass
770c12be 33
797be93c
CAW
34class NotImplementedError(Error): pass
35
770c12be 36
d807b725
CAW
37###############################################
38# Storage interface & basic file implementation
39###############################################
a6b378ef 40
797be93c
CAW
41class StorageInterface(object):
42 """
43 Interface for the storage API.
44
45 This interface doesn't actually provide behavior, but it defines
46 what kind of storage patterns subclasses should provide.
47
48 It is important to note that the storage API idea of a "filepath"
49 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
50 mind while reading method documentation.
5afb9227
CAW
51
52 You should set up your __init__ method with whatever keyword
53 arguments are appropriate to your storage system, but you should
54 also passively accept all extraneous keyword arguments like:
55
56 def __init__(self, **kwargs):
57 pass
58
59 See BasicFileStorage as a simple implementation of the
60 StorageInterface.
797be93c 61 """
797be93c
CAW
62
63 def __raise_not_implemented(self):
64 """
65 Raise a warning about some component not implemented by a
66 subclass of this interface.
67 """
68 raise NotImplementedError(
69 "This feature not implemented in this storage API implementation.")
70
71 def file_exists(self, filepath):
72 """
73 Return a boolean asserting whether or not file at filepath
74 exists in our storage system.
75
76 Returns:
77 True / False depending on whether file exists or not.
78 """
79 # Subclasses should override this method.
80 self.__raise_not_implemented()
81
cee7a1c1 82 def get_file(self, filepath, mode='r'):
b0de01cf
CAW
83 """
84 Return a file-like object for reading/writing from this filepath.
85
86 Should create directories, buckets, whatever, as necessary.
87 """
0b9cf289
CAW
88 # Subclasses should override this method.
89 self.__raise_not_implemented()
90
91 def delete_file(self, filepath):
b0de01cf
CAW
92 """
93 Delete or dereference the file at filepath.
94
95 This might need to delete directories, buckets, whatever, for
96 cleanliness. (Be sure to avoid race conditions on that though)
97 """
0b9cf289
CAW
98 # Subclasses should override this method.
99 self.__raise_not_implemented()
100
f61a41b8 101 def file_url(self, filepath):
644614d4
CAW
102 """
103 Get the URL for this file. This assumes our storage has been
104 mounted with some kind of URL which makes this possible.
105 """
106 # Subclasses should override this method.
107 self.__raise_not_implemented()
108
2d1a6073 109 def get_unique_filepath(self, filepath):
797be93c
CAW
110 """
111 If a filename at filepath already exists, generate a new name.
112
113 Eg, if the filename doesn't exist:
114 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
115 [u'dir1', u'dir2', u'fname.jpg']
116
117 But if a file does exist, let's get one back with at uuid tacked on:
118 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
ef10e3a2 119 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
797be93c 120 """
b0bfb766
CAW
121 # Make sure we have a clean filepath to start with, since
122 # we'll be possibly tacking on stuff to the filename.
123 filepath = clean_listy_filepath(filepath)
124
0b9cf289
CAW
125 if self.file_exists(filepath):
126 return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
127 else:
128 return filepath
779f2b94
CAW
129
130
131class BasicFileStorage(StorageInterface):
132 """
133 Basic local filesystem implementation of storage API
134 """
135
5afb9227 136 def __init__(self, base_dir, base_url=None, **kwargs):
779f2b94
CAW
137 """
138 Keyword arguments:
139 - base_dir: Base directory things will be served out of. MUST
140 be an absolute path.
b1bb050b 141 - base_url: URL files will be served from
779f2b94
CAW
142 """
143 self.base_dir = base_dir
b1bb050b 144 self.base_url = base_url
779f2b94
CAW
145
146 def _resolve_filepath(self, filepath):
147 """
148 Transform the given filepath into a local filesystem filepath.
149 """
150 return os.path.join(
151 self.base_dir, *clean_listy_filepath(filepath))
152
779f2b94
CAW
153 def file_exists(self, filepath):
154 return os.path.exists(self._resolve_filepath(filepath))
155
cee7a1c1
CAW
156 def get_file(self, filepath, mode='r'):
157 # Make directories if necessary
158 if len(filepath) > 1:
159 directory = self._resolve_filepath(filepath[:-1])
d0e3a534 160 if not os.path.exists(directory):
cee7a1c1
CAW
161 os.makedirs(directory)
162
163 # Grab and return the file in the mode specified
164 return open(self._resolve_filepath(filepath), mode)
165
779f2b94 166 def delete_file(self, filepath):
b1bb050b
CAW
167 # TODO: Also delete unused directories if empty (safely, with
168 # checks to avoid race conditions).
169 os.remove(self._resolve_filepath(filepath))
644614d4 170
f61a41b8 171 def file_url(self, filepath):
b1bb050b
CAW
172 if not self.base_url:
173 raise NoWebServing(
174 "base_url not set, cannot provide file urls")
175
176 return urlparse.urljoin(
177 self.base_url,
178 '/'.join(clean_listy_filepath(filepath)))
ffa22935
CAW
179
180
d807b725
CAW
181###########
182# Utilities
183###########
184
185def clean_listy_filepath(listy_filepath):
186 """
187 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
188 clean out any nastiness from it.
189
190 For example:
191 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
192 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
193
194 Args:
195 - listy_filepath: a list of filepath components, mediagoblin
196 storage API style.
197
198 Returns:
199 A cleaned list of unicode objects.
200 """
201 cleaned_filepath = [
202 unicode(secure_filename(filepath))
203 for filepath in listy_filepath]
204
205 if u'' in cleaned_filepath:
206 raise InvalidFilepath(
207 "A filename component could not be resolved into a usable name.")
208
209 return cleaned_filepath
210
211
ffa22935
CAW
212def storage_system_from_paste_config(paste_config, storage_prefix):
213 """
214 Utility for setting up a storage system from the paste app config.
215
216 Note that a special argument may be passed in to the paste_config
217 which is "${storage_prefix}_storage_class" which will provide an
218 import path to a storage system. This defaults to
219 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
220
221 Arguments:
222 - paste_config: dictionary of config parameters
223 - storage_prefix: the storage system we're setting up / will be
224 getting keys/arguments from. For example 'publicstore' will
225 grab all arguments that are like 'publicstore_FOO'.
226
227 Returns:
228 An instantiated storage system.
229
230 Example:
231 storage_system_from_paste_config(
232 {'publicstore_base_url': '/media/',
233 'publicstore_base_dir': '/var/whatever/media/'},
234 'publicstore')
235
236 Will return:
237 BasicFileStorage(
238 base_url='/media/',
239 base_dir='/var/whatever/media')
240 """
241 prefix_re = re.compile('^%s_(.+)$' % re.escape(storage_prefix))
242
243 config_params = dict(
244 [(prefix_re.match(key).groups()[0], value)
245 for key, value in paste_config.iteritems()
246 if prefix_re.match(key)])
247
248 if config_params.has_key('storage_class'):
249 storage_class = config_params['storage_class']
250 config_params.pop('storage_class')
251 else:
252 storage_class = "mediagoblin.storage:BasicFileStorage"
253
254 storage_class = util.import_component(storage_class)
255 return storage_class(**config_params)