Merge branch 'master' into joar-skip_transcoding
[mediagoblin.git] / mediagoblin / storage / __init__.py
CommitLineData
a2468d18 1# GNU MediaGoblin -- federated, autonomous media hosting
cf29e8a8 2# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
a2468d18
JW
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
a2468d18 17import shutil
a2468d18
JW
18import uuid
19
20from werkzeug.utils import secure_filename
21
152a3bfa 22from mediagoblin.tools import common
a2468d18
JW
23
24########
25# Errors
26########
27
28
29class Error(Exception):
30 pass
31
32
33class InvalidFilepath(Error):
34 pass
35
36
37class NoWebServing(Error):
38 pass
39
40
41class NotImplementedError(Error):
42 pass
43
44
45###############################################
46# Storage interface & basic file implementation
47###############################################
48
49class StorageInterface(object):
50 """
51 Interface for the storage API.
52
53 This interface doesn't actually provide behavior, but it defines
54 what kind of storage patterns subclasses should provide.
55
56 It is important to note that the storage API idea of a "filepath"
57 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
58 mind while reading method documentation.
59
60 You should set up your __init__ method with whatever keyword
61 arguments are appropriate to your storage system, but you should
62 also passively accept all extraneous keyword arguments like:
63
64 def __init__(self, **kwargs):
65 pass
66
67 See BasicFileStorage as a simple implementation of the
68 StorageInterface.
69 """
70
71 # Whether this file store is on the local filesystem.
72 local_storage = False
73
74 def __raise_not_implemented(self):
75 """
76 Raise a warning about some component not implemented by a
77 subclass of this interface.
78 """
79 raise NotImplementedError(
80 "This feature not implemented in this storage API implementation.")
81
82 def file_exists(self, filepath):
83 """
84 Return a boolean asserting whether or not file at filepath
85 exists in our storage system.
86
87 Returns:
88 True / False depending on whether file exists or not.
89 """
90 # Subclasses should override this method.
91 self.__raise_not_implemented()
92
93 def get_file(self, filepath, mode='r'):
94 """
95 Return a file-like object for reading/writing from this filepath.
96
97 Should create directories, buckets, whatever, as necessary.
98 """
99 # Subclasses should override this method.
100 self.__raise_not_implemented()
101
102 def delete_file(self, filepath):
103 """
104 Delete or dereference the file at filepath.
105
106 This might need to delete directories, buckets, whatever, for
107 cleanliness. (Be sure to avoid race conditions on that though)
108 """
109 # Subclasses should override this method.
110 self.__raise_not_implemented()
111
112 def file_url(self, filepath):
113 """
114 Get the URL for this file. This assumes our storage has been
115 mounted with some kind of URL which makes this possible.
116 """
117 # Subclasses should override this method.
118 self.__raise_not_implemented()
119
120 def get_unique_filepath(self, filepath):
121 """
122 If a filename at filepath already exists, generate a new name.
123
124 Eg, if the filename doesn't exist:
125 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
126 [u'dir1', u'dir2', u'fname.jpg']
127
128 But if a file does exist, let's get one back with at uuid tacked on:
129 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
130 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
131 """
132 # Make sure we have a clean filepath to start with, since
133 # we'll be possibly tacking on stuff to the filename.
134 filepath = clean_listy_filepath(filepath)
135
136 if self.file_exists(filepath):
137 return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
138 else:
139 return filepath
140
141 def get_local_path(self, filepath):
142 """
143 If this is a local_storage implementation, give us a link to
144 the local filesystem reference to this file.
145
146 >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
147 u'/path/to/mounting/foo/bar/baz.jpg'
148 """
149 # Subclasses should override this method, if applicable.
150 self.__raise_not_implemented()
151
152 def copy_locally(self, filepath, dest_path):
153 """
154 Copy this file locally.
155
156 A basic working method for this is provided that should
157 function both for local_storage systems and remote storge
158 systems, but if more efficient systems for copying locally
159 apply to your system, override this method with something more
160 appropriate.
161 """
162 if self.local_storage:
163 shutil.copy(
164 self.get_local_path(filepath), dest_path)
165 else:
166 with self.get_file(filepath, 'rb') as source_file:
167 with file(dest_path, 'wb') as dest_file:
168 dest_file.write(source_file.read())
98f6efb0
CAW
169
170 def copy_local_to_storage(self, filename, filepath):
171 """
172 Copy this file from locally to the storage system.
2e8fbc8f
CAW
173
174 This is kind of the opposite of copy_locally. It's likely you
175 could override this method with something more appropriate to
176 your storage system.
98f6efb0
CAW
177 """
178 with self.get_file(filepath, 'wb') as dest_file:
179 with file(filename, 'rb') as source_file:
180 dest_file.write(source_file.read())
a2468d18
JW
181
182
183###########
184# Utilities
185###########
186
187def clean_listy_filepath(listy_filepath):
188 """
189 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
190 clean out any nastiness from it.
191
192
193 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
194 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
195
196 Args:
197 - listy_filepath: a list of filepath components, mediagoblin
198 storage API style.
199
200 Returns:
201 A cleaned list of unicode objects.
202 """
203 cleaned_filepath = [
204 unicode(secure_filename(filepath))
205 for filepath in listy_filepath]
206
207 if u'' in cleaned_filepath:
208 raise InvalidFilepath(
209 "A filename component could not be resolved into a usable name.")
210
211 return cleaned_filepath
212
213
214def storage_system_from_config(config_section):
215 """
216 Utility for setting up a storage system from a config section.
217
218 Note that a special argument may be passed in to
219 the config_section which is "storage_class" which will provide an
220 import path to a storage system. This defaults to
221 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
222
223 Arguments:
224 - config_section: dictionary of config parameters
225
226 Returns:
227 An instantiated storage system.
228
229 Example:
230 storage_system_from_config(
231 {'base_url': '/media/',
232 'base_dir': '/var/whatever/media/'})
233
234 Will return:
235 BasicFileStorage(
236 base_url='/media/',
237 base_dir='/var/whatever/media')
238 """
239 # This construct is needed, because dict(config) does
240 # not replace the variables in the config items.
241 config_params = dict(config_section.iteritems())
242
243 if 'storage_class' in config_params:
244 storage_class = config_params['storage_class']
245 config_params.pop('storage_class')
246 else:
247 storage_class = 'mediagoblin.storage.filestorage:BasicFileStorage'
248
152a3bfa 249 storage_class = common.import_component(storage_class)
a2468d18 250 return storage_class(**config_params)
4a791b80
BS
251
252import filestorage