Updating to the point where we can allllmost run with the new reprocessing code
[mediagoblin.git] / mediagoblin / processing / __init__.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 from collections import OrderedDict
18 import logging
19 import os
20
21 from mediagoblin import mg_globals as mgg
22 from mediagoblin.db.util import atomic_update
23 from mediagoblin.db.models import MediaEntry
24 from mediagoblin.tools.pluginapi import hook_handle
25 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
26
27 _log = logging.getLogger(__name__)
28
29
30 class ProgressCallback(object):
31 def __init__(self, entry):
32 self.entry = entry
33
34 def __call__(self, progress):
35 if progress:
36 self.entry.transcoding_progress = progress
37 self.entry.save()
38
39
40 def create_pub_filepath(entry, filename):
41 return mgg.public_store.get_unique_filepath(
42 ['media_entries',
43 unicode(entry.id),
44 filename])
45
46
47 class FilenameBuilder(object):
48 """Easily slice and dice filenames.
49
50 Initialize this class with an original file path, then use the fill()
51 method to create new filenames based on the original.
52
53 """
54 MAX_FILENAME_LENGTH = 255 # VFAT's maximum filename length
55
56 def __init__(self, path):
57 """Initialize a builder from an original file path."""
58 self.dirpath, self.basename = os.path.split(path)
59 self.basename, self.ext = os.path.splitext(self.basename)
60 self.ext = self.ext.lower()
61
62 def fill(self, fmtstr):
63 """Build a new filename based on the original.
64
65 The fmtstr argument can include the following:
66 {basename} -- the original basename, with the extension removed
67 {ext} -- the original extension, always lowercase
68
69 If necessary, {basename} will be truncated so the filename does not
70 exceed this class' MAX_FILENAME_LENGTH in length.
71
72 """
73 basename_len = (self.MAX_FILENAME_LENGTH -
74 len(fmtstr.format(basename='', ext=self.ext)))
75 return fmtstr.format(basename=self.basename[:basename_len],
76 ext=self.ext)
77
78
79
80 class MediaProcessor(object):
81 """A particular processor for this media type.
82
83 While the ProcessingManager handles all types of MediaProcessing
84 possible for a particular media type, a MediaProcessor can be
85 thought of as a *particular* processing action for a media type.
86 For example, you may have separate MediaProcessors for:
87
88 - initial_processing: the intial processing of a media
89 - gen_thumb: generate a thumbnail
90 - resize: resize an image
91 - transcode: transcode a video
92
93 ... etc.
94
95 Some information on producing a new MediaProcessor for your media type:
96
97 - You *must* supply a name attribute. This must be a class level
98 attribute, and a string. This will be used to determine the
99 subcommand of your process
100 - It's recommended that you supply a class level description
101 attribute.
102 - Supply a media_is_eligible classmethod. This will be used to
103 determine whether or not a media entry is eligible to use this
104 processor type. See the method documentation for details.
105 - To give "./bin/gmg reprocess run" abilities to this media type,
106 supply both gnerate_parser and parser_to_request classmethods.
107 - The process method will be what actually processes your media.
108 """
109 # You MUST override this in the child MediaProcessor!
110 name = None
111
112 # Optional, but will be used in various places to describe the
113 # action this MediaProcessor provides
114 description = None
115
116 def __init__(self, manager, media_entry):
117 self.manager = manager
118 self.media_entry = media_entry
119
120 # Should be initialized at time of processing, at least
121 self.workbench = None
122
123 # @with_workbench
124 def process(self, **kwargs):
125 """
126 Actually process this media entry.
127 """
128 raise NotImplementedError
129
130 @classmethod
131 def media_is_eligible(cls, media_entry):
132 raise NotImplementedError
133
134 ###############################
135 # Command line interface things
136 ###############################
137
138 @classmethod
139 def generate_parser(cls):
140 raise NotImplementedError
141
142 @classmethod
143 def args_to_request(cls, args):
144 raise NotImplementedError
145
146 ##########################################
147 # THE FUTURE: web interface things here :)
148 ##########################################
149
150
151 class ProcessingKeyError(Exception): pass
152 class ProcessorDoesNotExist(ProcessingKeyError): pass
153 class ProcessorNotEligible(ProcessingKeyError): pass
154
155
156 class ProcessingManager(object):
157 """Manages all the processing actions available for a media type
158
159 Specific processing actions, MediaProcessor subclasses, are added
160 to the ProcessingManager.
161 """
162 def __init__(self):
163 # Dict of all MediaProcessors of this media type
164 self.processors = OrderedDict()
165
166 def add_processor(self, processor):
167 """
168 Add a processor class to this media type
169 """
170 name = processor.name
171 if name is None:
172 raise AttributeError("Processor class's .name attribute not set")
173
174 self.processors[name] = processor
175
176 def list_eligible_processors(self, entry):
177 """
178 List all processors that this media entry is eligible to be processed
179 for.
180 """
181 return [
182 processor
183 for processor in self.processors.values()
184 if processor.media_is_eligible(entry)]
185
186 def list_all_processors(self):
187 return self.processors.values()
188
189 def gen_process_request_via_cli(self, subparser):
190 # Got to figure out what actually goes here before I can write this properly
191 pass
192
193 def get_processor(self, key, entry=None):
194 """
195 Get the processor with this key.
196
197 If entry supplied, make sure this entry is actually compatible;
198 otherwise raise error.
199 """
200 try:
201 processor = self.processors[key]
202 except KeyError:
203 raise ProcessorDoesNotExist(
204 "'%s' processor does not exist for this media type" % key)
205
206 if entry and not processor.media_is_eligible(entry):
207 raise ProcessorNotEligible(
208 "This entry is not eligible for processor with name '%s'" % key)
209
210 return processor
211
212 def process_from_args(self, entry, reprocess_command, request):
213 """
214 Process a media entry.
215 """
216 pass
217
218
219 def request_from_args(args, which_args):
220 """
221 Generate a request from the values of some argparse parsed args
222 """
223 request = {}
224 for arg in which_args:
225 request[arg] = getattr(args, arg)
226
227 return request
228
229
230 class MediaEntryNotFound(Exception): pass
231
232
233 def get_manager_for_type(media_type):
234 """
235 Get the appropriate media manager for this type
236 """
237 manager_class = hook_handle(('reprocess_manager', media_type))
238 manager = manager_class()
239
240 return manager
241
242
243 def get_entry_and_manager(media_id):
244 """
245 Get a MediaEntry, its media type, and its manager all in one go.
246
247 Returns a tuple of: `(entry, media_type, media_manager)`
248 """
249 entry = MediaEntry.query.filter_by(id=media_id).first()
250 if entry is None:
251 raise MediaEntryNotFound("Can't find media with id '%s'" % media_id)
252
253 manager = get_manager_for_type(entry.media_type)
254
255 return entry, manager
256
257
258 ################################################
259 # TODO: This ProcessingState is OUTDATED,
260 # and needs to be refactored into other tools!
261 ################################################
262
263 class ProcessingState(object):
264 """
265 The first and only argument to the "processor" of a media type
266
267 This could be thought of as a "request" to the processor
268 function. It has the main info for the request (media entry)
269 and a bunch of tools for the request on it.
270 It can get more fancy without impacting old media types.
271 """
272 def __init__(self, entry):
273 self.entry = entry
274 self.workbench = None
275 self.orig_filename = None
276
277 def set_workbench(self, wb):
278 self.workbench = wb
279
280 def get_orig_filename(self):
281 """
282 Get the a filename for the original, on local storage
283
284 If the media entry has a queued_media_file, use that, otherwise
285 use the original.
286
287 In the future, this will return the highest quality file available
288 if neither the original or queued file are available
289 """
290 if self.orig_filename is not None:
291 return self.orig_filename
292
293 if self.entry.queued_media_file:
294 orig_filepath = self.entry.queued_media_file
295 storage = mgg.queue_store
296 else:
297 orig_filepath = self.entry.media_files['original']
298 storage = mgg.public_store
299
300 orig_filename = self.workbench.localized_file(
301 storage, orig_filepath,
302 'source')
303 self.orig_filename = orig_filename
304 return orig_filename
305
306 def copy_original(self, target_name, keyname=u"original"):
307 self.store_public(keyname, self.get_orig_filename(), target_name)
308
309 def store_public(self, keyname, local_file, target_name=None):
310 if target_name is None:
311 target_name = os.path.basename(local_file)
312 target_filepath = create_pub_filepath(self.entry, target_name)
313 if keyname in self.entry.media_files:
314 _log.warn("store_public: keyname %r already used for file %r, "
315 "replacing with %r", keyname,
316 self.entry.media_files[keyname], target_filepath)
317 mgg.public_store.copy_local_to_storage(local_file, target_filepath)
318 self.entry.media_files[keyname] = target_filepath
319
320 def delete_queue_file(self):
321 # Remove queued media file from storage and database.
322 # queued_filepath is in the task_id directory which should
323 # be removed too, but fail if the directory is not empty to be on
324 # the super-safe side.
325 queued_filepath = self.entry.queued_media_file
326 mgg.queue_store.delete_file(queued_filepath) # rm file
327 mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir
328 self.entry.queued_media_file = []
329
330
331 def mark_entry_failed(entry_id, exc):
332 """
333 Mark a media entry as having failed in its conversion.
334
335 Uses the exception that was raised to mark more information. If
336 the exception is a derivative of BaseProcessingFail then we can
337 store extra information that can be useful for users telling them
338 why their media failed to process.
339
340 Args:
341 - entry_id: The id of the media entry
342
343 """
344 # Was this a BaseProcessingFail? In other words, was this a
345 # type of error that we know how to handle?
346 if isinstance(exc, BaseProcessingFail):
347 # Looks like yes, so record information about that failure and any
348 # metadata the user might have supplied.
349 atomic_update(mgg.database.MediaEntry,
350 {'id': entry_id},
351 {u'state': u'failed',
352 u'fail_error': unicode(exc.exception_path),
353 u'fail_metadata': exc.metadata})
354 else:
355 _log.warn("No idea what happened here, but it failed: %r", exc)
356 # Looks like no, so just mark it as failed and don't record a
357 # failure_error (we'll assume it wasn't handled) and don't record
358 # metadata (in fact overwrite it if somehow it had previous info
359 # here)
360 atomic_update(mgg.database.MediaEntry,
361 {'id': entry_id},
362 {u'state': u'failed',
363 u'fail_error': None,
364 u'fail_metadata': {}})
365
366
367 class BaseProcessingFail(Exception):
368 """
369 Base exception that all other processing failure messages should
370 subclass from.
371
372 You shouldn't call this itself; instead you should subclass it
373 and provid the exception_path and general_message applicable to
374 this error.
375 """
376 general_message = u''
377
378 @property
379 def exception_path(self):
380 return u"%s:%s" % (
381 self.__class__.__module__, self.__class__.__name__)
382
383 def __init__(self, **metadata):
384 self.metadata = metadata or {}
385
386
387 class BadMediaFail(BaseProcessingFail):
388 """
389 Error that should be raised when an inappropriate file was given
390 for the media type specified.
391 """
392 general_message = _(u'Invalid file given for media type.')