`gmg reprocess available --action-help` now tells you processor arguments!
[mediagoblin.git] / mediagoblin / processing / __init__.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 from collections import OrderedDict
18 import logging
19 import os
20
21 from mediagoblin.db.util import atomic_update
22 from mediagoblin import mg_globals as mgg
23
24 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
25
26 _log = logging.getLogger(__name__)
27
28
29 class ProgressCallback(object):
30 def __init__(self, entry):
31 self.entry = entry
32
33 def __call__(self, progress):
34 if progress:
35 self.entry.transcoding_progress = progress
36 self.entry.save()
37
38
39 def create_pub_filepath(entry, filename):
40 return mgg.public_store.get_unique_filepath(
41 ['media_entries',
42 unicode(entry.id),
43 filename])
44
45
46 class FilenameBuilder(object):
47 """Easily slice and dice filenames.
48
49 Initialize this class with an original file path, then use the fill()
50 method to create new filenames based on the original.
51
52 """
53 MAX_FILENAME_LENGTH = 255 # VFAT's maximum filename length
54
55 def __init__(self, path):
56 """Initialize a builder from an original file path."""
57 self.dirpath, self.basename = os.path.split(path)
58 self.basename, self.ext = os.path.splitext(self.basename)
59 self.ext = self.ext.lower()
60
61 def fill(self, fmtstr):
62 """Build a new filename based on the original.
63
64 The fmtstr argument can include the following:
65 {basename} -- the original basename, with the extension removed
66 {ext} -- the original extension, always lowercase
67
68 If necessary, {basename} will be truncated so the filename does not
69 exceed this class' MAX_FILENAME_LENGTH in length.
70
71 """
72 basename_len = (self.MAX_FILENAME_LENGTH -
73 len(fmtstr.format(basename='', ext=self.ext)))
74 return fmtstr.format(basename=self.basename[:basename_len],
75 ext=self.ext)
76
77
78
79 class MediaProcessor(object):
80 """A particular processor for this media type.
81
82 While the ProcessingManager handles all types of MediaProcessing
83 possible for a particular media type, a MediaProcessor can be
84 thought of as a *particular* processing action for a media type.
85 For example, you may have separate MediaProcessors for:
86
87 - initial_processing: the intial processing of a media
88 - gen_thumb: generate a thumbnail
89 - resize: resize an image
90 - transcode: transcode a video
91
92 ... etc.
93
94 Some information on producing a new MediaProcessor for your media type:
95
96 - You *must* supply a name attribute. This must be a class level
97 attribute, and a string. This will be used to determine the
98 subcommand of your process
99 - It's recommended that you supply a class level description
100 attribute.
101 - Supply a media_is_eligible classmethod. This will be used to
102 determine whether or not a media entry is eligible to use this
103 processor type. See the method documentation for details.
104 - To give "./bin/gmg reprocess run" abilities to this media type,
105 supply both gnerate_parser and parser_to_request classmethods.
106 - The process method will be what actually processes your media.
107 """
108 # You MUST override this in the child MediaProcessor!
109 name = None
110
111 # Optional, but will be used in various places to describe the
112 # action this MediaProcessor provides
113 description = None
114
115 def __init__(self, manager):
116 self.manager = manager
117
118 # Should be initialized at time of processing, at least
119 self.workbench = None
120
121 # @with_workbench
122 def process(self, **kwargs):
123 """
124 Actually process this media entry.
125 """
126 raise NotImplementedError
127
128 @classmethod
129 def media_is_eligibile(cls, media_entry):
130 raise NotImplementedError
131
132 ###############################
133 # Command line interface things
134 ###############################
135
136 @classmethod
137 def generate_parser(cls):
138 raise NotImplementedError
139
140 @classmethod
141 def parser_to_request(cls, parser):
142 raise NotImplementedError
143
144 ##########################################
145 # THE FUTURE: web interface things here :)
146 ##########################################
147
148
149 class ProcessingManager(object):
150 """Manages all the processing actions available for a media type
151
152 Specific processing actions, MediaProcessor subclasses, are added
153 to the ProcessingManager.
154 """
155 def __init__(self):
156 # Dict of all MediaProcessors of this media type
157 self.processors = OrderedDict()
158
159 def add_processor(self, processor):
160 """
161 Add a processor class to this media type
162 """
163 name = processor.name
164 if name is None:
165 raise AttributeError("Processor class's .name attribute not set")
166
167 self.processors[name] = processor
168
169 def list_eligible_processors(self, entry):
170 """
171 List all processors that this media entry is eligible to be processed
172 for.
173 """
174 return [
175 processor
176 for processor in self.processors.values()
177 if processor.media_is_eligible(entry)]
178
179 def list_all_processors(self):
180 return self.processors.values()
181
182 def gen_process_request_via_cli(self, subparser):
183 # Got to figure out what actually goes here before I can write this properly
184 pass
185
186 def process(self, entry, directive, request):
187 """
188 Process a media entry.
189 """
190 pass
191
192
193 class ProcessingState(object):
194 """
195 The first and only argument to the "processor" of a media type
196
197 This could be thought of as a "request" to the processor
198 function. It has the main info for the request (media entry)
199 and a bunch of tools for the request on it.
200 It can get more fancy without impacting old media types.
201 """
202 def __init__(self, entry):
203 self.entry = entry
204 self.workbench = None
205 self.orig_filename = None
206
207 def set_workbench(self, wb):
208 self.workbench = wb
209
210 def get_orig_filename(self):
211 """
212 Get the a filename for the original, on local storage
213
214 If the media entry has a queued_media_file, use that, otherwise
215 use the original.
216
217 In the future, this will return the highest quality file available
218 if neither the original or queued file are available
219 """
220 if self.orig_filename is not None:
221 return self.orig_filename
222
223 if self.entry.queued_media_file:
224 orig_filepath = self.entry.queued_media_file
225 storage = mgg.queue_store
226 else:
227 orig_filepath = self.entry.media_files['original']
228 storage = mgg.public_store
229
230 orig_filename = self.workbench.localized_file(
231 storage, orig_filepath,
232 'source')
233 self.orig_filename = orig_filename
234 return orig_filename
235
236 def copy_original(self, target_name, keyname=u"original"):
237 self.store_public(keyname, self.get_orig_filename(), target_name)
238
239 def store_public(self, keyname, local_file, target_name=None):
240 if target_name is None:
241 target_name = os.path.basename(local_file)
242 target_filepath = create_pub_filepath(self.entry, target_name)
243 if keyname in self.entry.media_files:
244 _log.warn("store_public: keyname %r already used for file %r, "
245 "replacing with %r", keyname,
246 self.entry.media_files[keyname], target_filepath)
247 mgg.public_store.copy_local_to_storage(local_file, target_filepath)
248 self.entry.media_files[keyname] = target_filepath
249
250 def delete_queue_file(self):
251 # Remove queued media file from storage and database.
252 # queued_filepath is in the task_id directory which should
253 # be removed too, but fail if the directory is not empty to be on
254 # the super-safe side.
255 queued_filepath = self.entry.queued_media_file
256 mgg.queue_store.delete_file(queued_filepath) # rm file
257 mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir
258 self.entry.queued_media_file = []
259
260
261 def mark_entry_failed(entry_id, exc):
262 """
263 Mark a media entry as having failed in its conversion.
264
265 Uses the exception that was raised to mark more information. If
266 the exception is a derivative of BaseProcessingFail then we can
267 store extra information that can be useful for users telling them
268 why their media failed to process.
269
270 Args:
271 - entry_id: The id of the media entry
272
273 """
274 # Was this a BaseProcessingFail? In other words, was this a
275 # type of error that we know how to handle?
276 if isinstance(exc, BaseProcessingFail):
277 # Looks like yes, so record information about that failure and any
278 # metadata the user might have supplied.
279 atomic_update(mgg.database.MediaEntry,
280 {'id': entry_id},
281 {u'state': u'failed',
282 u'fail_error': unicode(exc.exception_path),
283 u'fail_metadata': exc.metadata})
284 else:
285 _log.warn("No idea what happened here, but it failed: %r", exc)
286 # Looks like no, so just mark it as failed and don't record a
287 # failure_error (we'll assume it wasn't handled) and don't record
288 # metadata (in fact overwrite it if somehow it had previous info
289 # here)
290 atomic_update(mgg.database.MediaEntry,
291 {'id': entry_id},
292 {u'state': u'failed',
293 u'fail_error': None,
294 u'fail_metadata': {}})
295
296
297 class BaseProcessingFail(Exception):
298 """
299 Base exception that all other processing failure messages should
300 subclass from.
301
302 You shouldn't call this itself; instead you should subclass it
303 and provid the exception_path and general_message applicable to
304 this error.
305 """
306 general_message = u''
307
308 @property
309 def exception_path(self):
310 return u"%s:%s" % (
311 self.__class__.__module__, self.__class__.__name__)
312
313 def __init__(self, **metadata):
314 self.metadata = metadata or {}
315
316
317 class BadMediaFail(BaseProcessingFail):
318 """
319 Error that should be raised when an inappropriate file was given
320 for the media type specified.
321 """
322 general_message = _(u'Invalid file given for media type.')