It's 2012 all up in here
[mediagoblin.git] / mediagoblin / storage / __init__.py
CommitLineData
a2468d18 1# GNU MediaGoblin -- federated, autonomous media hosting
cf29e8a8 2# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
a2468d18
JW
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17import os
18import shutil
19import urlparse
20import uuid
21
22from werkzeug.utils import secure_filename
23
152a3bfa 24from mediagoblin.tools import common
a2468d18
JW
25
26########
27# Errors
28########
29
30
31class Error(Exception):
32 pass
33
34
35class InvalidFilepath(Error):
36 pass
37
38
39class NoWebServing(Error):
40 pass
41
42
43class NotImplementedError(Error):
44 pass
45
46
47###############################################
48# Storage interface & basic file implementation
49###############################################
50
51class StorageInterface(object):
52 """
53 Interface for the storage API.
54
55 This interface doesn't actually provide behavior, but it defines
56 what kind of storage patterns subclasses should provide.
57
58 It is important to note that the storage API idea of a "filepath"
59 is actually like ['dir1', 'dir2', 'file.jpg'], so keep that in
60 mind while reading method documentation.
61
62 You should set up your __init__ method with whatever keyword
63 arguments are appropriate to your storage system, but you should
64 also passively accept all extraneous keyword arguments like:
65
66 def __init__(self, **kwargs):
67 pass
68
69 See BasicFileStorage as a simple implementation of the
70 StorageInterface.
71 """
72
73 # Whether this file store is on the local filesystem.
74 local_storage = False
75
76 def __raise_not_implemented(self):
77 """
78 Raise a warning about some component not implemented by a
79 subclass of this interface.
80 """
81 raise NotImplementedError(
82 "This feature not implemented in this storage API implementation.")
83
84 def file_exists(self, filepath):
85 """
86 Return a boolean asserting whether or not file at filepath
87 exists in our storage system.
88
89 Returns:
90 True / False depending on whether file exists or not.
91 """
92 # Subclasses should override this method.
93 self.__raise_not_implemented()
94
95 def get_file(self, filepath, mode='r'):
96 """
97 Return a file-like object for reading/writing from this filepath.
98
99 Should create directories, buckets, whatever, as necessary.
100 """
101 # Subclasses should override this method.
102 self.__raise_not_implemented()
103
104 def delete_file(self, filepath):
105 """
106 Delete or dereference the file at filepath.
107
108 This might need to delete directories, buckets, whatever, for
109 cleanliness. (Be sure to avoid race conditions on that though)
110 """
111 # Subclasses should override this method.
112 self.__raise_not_implemented()
113
114 def file_url(self, filepath):
115 """
116 Get the URL for this file. This assumes our storage has been
117 mounted with some kind of URL which makes this possible.
118 """
119 # Subclasses should override this method.
120 self.__raise_not_implemented()
121
122 def get_unique_filepath(self, filepath):
123 """
124 If a filename at filepath already exists, generate a new name.
125
126 Eg, if the filename doesn't exist:
127 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
128 [u'dir1', u'dir2', u'fname.jpg']
129
130 But if a file does exist, let's get one back with at uuid tacked on:
131 >>> storage_handler.get_unique_filename(['dir1', 'dir2', 'fname.jpg'])
132 [u'dir1', u'dir2', u'd02c3571-dd62-4479-9d62-9e3012dada29-fname.jpg']
133 """
134 # Make sure we have a clean filepath to start with, since
135 # we'll be possibly tacking on stuff to the filename.
136 filepath = clean_listy_filepath(filepath)
137
138 if self.file_exists(filepath):
139 return filepath[:-1] + ["%s-%s" % (uuid.uuid4(), filepath[-1])]
140 else:
141 return filepath
142
143 def get_local_path(self, filepath):
144 """
145 If this is a local_storage implementation, give us a link to
146 the local filesystem reference to this file.
147
148 >>> storage_handler.get_local_path(['foo', 'bar', 'baz.jpg'])
149 u'/path/to/mounting/foo/bar/baz.jpg'
150 """
151 # Subclasses should override this method, if applicable.
152 self.__raise_not_implemented()
153
154 def copy_locally(self, filepath, dest_path):
155 """
156 Copy this file locally.
157
158 A basic working method for this is provided that should
159 function both for local_storage systems and remote storge
160 systems, but if more efficient systems for copying locally
161 apply to your system, override this method with something more
162 appropriate.
163 """
164 if self.local_storage:
165 shutil.copy(
166 self.get_local_path(filepath), dest_path)
167 else:
168 with self.get_file(filepath, 'rb') as source_file:
169 with file(dest_path, 'wb') as dest_file:
170 dest_file.write(source_file.read())
98f6efb0
CAW
171
172 def copy_local_to_storage(self, filename, filepath):
173 """
174 Copy this file from locally to the storage system.
2e8fbc8f
CAW
175
176 This is kind of the opposite of copy_locally. It's likely you
177 could override this method with something more appropriate to
178 your storage system.
98f6efb0
CAW
179 """
180 with self.get_file(filepath, 'wb') as dest_file:
181 with file(filename, 'rb') as source_file:
182 dest_file.write(source_file.read())
a2468d18
JW
183
184
185###########
186# Utilities
187###########
188
189def clean_listy_filepath(listy_filepath):
190 """
191 Take a listy filepath (like ['dir1', 'dir2', 'filename.jpg']) and
192 clean out any nastiness from it.
193
194
195 >>> clean_listy_filepath([u'/dir1/', u'foo/../nasty', u'linooks.jpg'])
196 [u'dir1', u'foo_.._nasty', u'linooks.jpg']
197
198 Args:
199 - listy_filepath: a list of filepath components, mediagoblin
200 storage API style.
201
202 Returns:
203 A cleaned list of unicode objects.
204 """
205 cleaned_filepath = [
206 unicode(secure_filename(filepath))
207 for filepath in listy_filepath]
208
209 if u'' in cleaned_filepath:
210 raise InvalidFilepath(
211 "A filename component could not be resolved into a usable name.")
212
213 return cleaned_filepath
214
215
216def storage_system_from_config(config_section):
217 """
218 Utility for setting up a storage system from a config section.
219
220 Note that a special argument may be passed in to
221 the config_section which is "storage_class" which will provide an
222 import path to a storage system. This defaults to
223 "mediagoblin.storage:BasicFileStorage" if otherwise undefined.
224
225 Arguments:
226 - config_section: dictionary of config parameters
227
228 Returns:
229 An instantiated storage system.
230
231 Example:
232 storage_system_from_config(
233 {'base_url': '/media/',
234 'base_dir': '/var/whatever/media/'})
235
236 Will return:
237 BasicFileStorage(
238 base_url='/media/',
239 base_dir='/var/whatever/media')
240 """
241 # This construct is needed, because dict(config) does
242 # not replace the variables in the config items.
243 config_params = dict(config_section.iteritems())
244
245 if 'storage_class' in config_params:
246 storage_class = config_params['storage_class']
247 config_params.pop('storage_class')
248 else:
249 storage_class = 'mediagoblin.storage.filestorage:BasicFileStorage'
250
152a3bfa 251 storage_class = common.import_component(storage_class)
a2468d18 252 return storage_class(**config_params)