Factored the get_orig_filename from processing state and put it to use.
[mediagoblin.git] / mediagoblin / processing / __init__.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 from collections import OrderedDict
18 import logging
19 import os
20
21 from mediagoblin import mg_globals as mgg
22 from mediagoblin.db.util import atomic_update
23 from mediagoblin.db.models import MediaEntry
24 from mediagoblin.tools.pluginapi import hook_handle
25 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
26
27 _log = logging.getLogger(__name__)
28
29
30 class ProgressCallback(object):
31 def __init__(self, entry):
32 self.entry = entry
33
34 def __call__(self, progress):
35 if progress:
36 self.entry.transcoding_progress = progress
37 self.entry.save()
38
39
40 def create_pub_filepath(entry, filename):
41 return mgg.public_store.get_unique_filepath(
42 ['media_entries',
43 unicode(entry.id),
44 filename])
45
46
47 class FilenameBuilder(object):
48 """Easily slice and dice filenames.
49
50 Initialize this class with an original file path, then use the fill()
51 method to create new filenames based on the original.
52
53 """
54 MAX_FILENAME_LENGTH = 255 # VFAT's maximum filename length
55
56 def __init__(self, path):
57 """Initialize a builder from an original file path."""
58 self.dirpath, self.basename = os.path.split(path)
59 self.basename, self.ext = os.path.splitext(self.basename)
60 self.ext = self.ext.lower()
61
62 def fill(self, fmtstr):
63 """Build a new filename based on the original.
64
65 The fmtstr argument can include the following:
66 {basename} -- the original basename, with the extension removed
67 {ext} -- the original extension, always lowercase
68
69 If necessary, {basename} will be truncated so the filename does not
70 exceed this class' MAX_FILENAME_LENGTH in length.
71
72 """
73 basename_len = (self.MAX_FILENAME_LENGTH -
74 len(fmtstr.format(basename='', ext=self.ext)))
75 return fmtstr.format(basename=self.basename[:basename_len],
76 ext=self.ext)
77
78
79
80 class MediaProcessor(object):
81 """A particular processor for this media type.
82
83 While the ProcessingManager handles all types of MediaProcessing
84 possible for a particular media type, a MediaProcessor can be
85 thought of as a *particular* processing action for a media type.
86 For example, you may have separate MediaProcessors for:
87
88 - initial_processing: the intial processing of a media
89 - gen_thumb: generate a thumbnail
90 - resize: resize an image
91 - transcode: transcode a video
92
93 ... etc.
94
95 Some information on producing a new MediaProcessor for your media type:
96
97 - You *must* supply a name attribute. This must be a class level
98 attribute, and a string. This will be used to determine the
99 subcommand of your process
100 - It's recommended that you supply a class level description
101 attribute.
102 - Supply a media_is_eligible classmethod. This will be used to
103 determine whether or not a media entry is eligible to use this
104 processor type. See the method documentation for details.
105 - To give "./bin/gmg reprocess run" abilities to this media type,
106 supply both gnerate_parser and parser_to_request classmethods.
107 - The process method will be what actually processes your media.
108 """
109 # You MUST override this in the child MediaProcessor!
110 name = None
111
112 # Optional, but will be used in various places to describe the
113 # action this MediaProcessor provides
114 description = None
115
116 def __init__(self, manager, media_entry):
117 self.manager = manager
118 self.media_entry = media_entry
119 self.entry_orig_state = media_entry.state
120
121 # Should be initialized at time of processing, at least
122 self.workbench = None
123
124 def __enter__(self):
125 self.workbench = mgg.workbench_manager.create()
126
127 def __exit__(self, *args):
128 self.workbench.destroy()
129 self.workbench = None
130
131 # @with_workbench
132 def process(self, **kwargs):
133 """
134 Actually process this media entry.
135 """
136 raise NotImplementedError
137
138 @classmethod
139 def media_is_eligible(cls, media_entry):
140 raise NotImplementedError
141
142 ###############################
143 # Command line interface things
144 ###############################
145
146 @classmethod
147 def generate_parser(cls):
148 raise NotImplementedError
149
150 @classmethod
151 def args_to_request(cls, args):
152 raise NotImplementedError
153
154 ##########################################
155 # THE FUTURE: web interface things here :)
156 ##########################################
157
158
159 class ProcessingKeyError(Exception): pass
160 class ProcessorDoesNotExist(ProcessingKeyError): pass
161 class ProcessorNotEligible(ProcessingKeyError): pass
162
163
164 class ProcessingManager(object):
165 """Manages all the processing actions available for a media type
166
167 Specific processing actions, MediaProcessor subclasses, are added
168 to the ProcessingManager.
169 """
170 def __init__(self):
171 # Dict of all MediaProcessors of this media type
172 self.processors = OrderedDict()
173
174 def add_processor(self, processor):
175 """
176 Add a processor class to this media type
177 """
178 name = processor.name
179 if name is None:
180 raise AttributeError("Processor class's .name attribute not set")
181
182 self.processors[name] = processor
183
184 def list_eligible_processors(self, entry):
185 """
186 List all processors that this media entry is eligible to be processed
187 for.
188 """
189 return [
190 processor
191 for processor in self.processors.values()
192 if processor.media_is_eligible(entry)]
193
194 def list_all_processors(self):
195 return self.processors.values()
196
197 def gen_process_request_via_cli(self, subparser):
198 # Got to figure out what actually goes here before I can write this properly
199 pass
200
201 def get_processor(self, key, entry=None):
202 """
203 Get the processor with this key.
204
205 If entry supplied, make sure this entry is actually compatible;
206 otherwise raise error.
207 """
208 try:
209 processor = self.processors[key]
210 except KeyError:
211 raise ProcessorDoesNotExist(
212 "'%s' processor does not exist for this media type" % key)
213
214 if entry and not processor.media_is_eligible(entry):
215 raise ProcessorNotEligible(
216 "This entry is not eligible for processor with name '%s'" % key)
217
218 return processor
219
220 def process_from_args(self, entry, reprocess_command, request):
221 """
222 Process a media entry.
223 """
224 pass
225
226
227 def request_from_args(args, which_args):
228 """
229 Generate a request from the values of some argparse parsed args
230 """
231 request = {}
232 for arg in which_args:
233 request[arg] = getattr(args, arg)
234
235 return request
236
237
238 class MediaEntryNotFound(Exception): pass
239
240
241 def get_processing_manager_for_type(media_type):
242 """
243 Get the appropriate media manager for this type
244 """
245 manager_class = hook_handle(('reprocess_manager', media_type))
246 manager = manager_class()
247
248 return manager
249
250
251 def get_entry_and_processing_manager(media_id):
252 """
253 Get a MediaEntry, its media type, and its manager all in one go.
254
255 Returns a tuple of: `(entry, media_type, media_manager)`
256 """
257 entry = MediaEntry.query.filter_by(id=media_id).first()
258 if entry is None:
259 raise MediaEntryNotFound("Can't find media with id '%s'" % media_id)
260
261 manager = get_processing_manager_for_type(entry.media_type)
262
263 return entry, manager
264
265
266 ################################################
267 # TODO: This ProcessingState is OUTDATED,
268 # and needs to be refactored into other tools!
269 ################################################
270
271 class ProcessingState(object):
272 """
273 The first and only argument to the "processor" of a media type
274
275 This could be thought of as a "request" to the processor
276 function. It has the main info for the request (media entry)
277 and a bunch of tools for the request on it.
278 It can get more fancy without impacting old media types.
279 """
280 def __init__(self, entry):
281 self.entry = entry
282 self.workbench = None
283 self.orig_filename = None
284
285 def set_workbench(self, wb):
286 self.workbench = wb
287
288 def get_orig_filename(self):
289 """
290 Get the a filename for the original, on local storage
291
292 If the media entry has a queued_media_file, use that, otherwise
293 use the original.
294
295 In the future, this will return the highest quality file available
296 if neither the original or queued file are available
297 """
298 if self.orig_filename is not None:
299 return self.orig_filename
300
301 if self.entry.queued_media_file:
302 orig_filepath = self.entry.queued_media_file
303 storage = mgg.queue_store
304 else:
305 orig_filepath = self.entry.media_files['original']
306 storage = mgg.public_store
307
308 orig_filename = self.workbench.localized_file(
309 storage, orig_filepath,
310 'source')
311 self.orig_filename = orig_filename
312 return orig_filename
313
314 def copy_original(self, target_name, keyname=u"original"):
315 self.store_public(keyname, self.get_orig_filename(), target_name)
316
317 def store_public(self, keyname, local_file, target_name=None):
318 if target_name is None:
319 target_name = os.path.basename(local_file)
320 target_filepath = create_pub_filepath(self.entry, target_name)
321 if keyname in self.entry.media_files:
322 _log.warn("store_public: keyname %r already used for file %r, "
323 "replacing with %r", keyname,
324 self.entry.media_files[keyname], target_filepath)
325 mgg.public_store.copy_local_to_storage(local_file, target_filepath)
326 self.entry.media_files[keyname] = target_filepath
327
328 def delete_queue_file(self):
329 # Remove queued media file from storage and database.
330 # queued_filepath is in the task_id directory which should
331 # be removed too, but fail if the directory is not empty to be on
332 # the super-safe side.
333 queued_filepath = self.entry.queued_media_file
334 mgg.queue_store.delete_file(queued_filepath) # rm file
335 mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir
336 self.entry.queued_media_file = []
337
338
339 def mark_entry_failed(entry_id, exc):
340 """
341 Mark a media entry as having failed in its conversion.
342
343 Uses the exception that was raised to mark more information. If
344 the exception is a derivative of BaseProcessingFail then we can
345 store extra information that can be useful for users telling them
346 why their media failed to process.
347
348 Args:
349 - entry_id: The id of the media entry
350
351 """
352 # Was this a BaseProcessingFail? In other words, was this a
353 # type of error that we know how to handle?
354 if isinstance(exc, BaseProcessingFail):
355 # Looks like yes, so record information about that failure and any
356 # metadata the user might have supplied.
357 atomic_update(mgg.database.MediaEntry,
358 {'id': entry_id},
359 {u'state': u'failed',
360 u'fail_error': unicode(exc.exception_path),
361 u'fail_metadata': exc.metadata})
362 else:
363 _log.warn("No idea what happened here, but it failed: %r", exc)
364 # Looks like no, so just mark it as failed and don't record a
365 # failure_error (we'll assume it wasn't handled) and don't record
366 # metadata (in fact overwrite it if somehow it had previous info
367 # here)
368 atomic_update(mgg.database.MediaEntry,
369 {'id': entry_id},
370 {u'state': u'failed',
371 u'fail_error': None,
372 u'fail_metadata': {}})
373
374
375 ###############################################################################
376 # refactoring procstate stuff here
377
378
379 def get_orig_filename(entry, workbench):
380 """
381 Get the a filename for the original, on local storage
382
383 If the media entry has a queued_media_file, use that, otherwise
384 use the original.
385
386 In the future, this will return the highest quality file available
387 if neither the original or queued file are available by checking
388 some ordered list of preferred keys.
389 """
390 if entry.queued_media_file:
391 orig_filepath = entry.queued_media_file
392 storage = mgg.queue_store
393 else:
394 orig_filepath = entry.media_files['original']
395 storage = mgg.public_store
396
397 orig_filename = workbench.localized_file(
398 storage, orig_filepath,
399 'source')
400
401 return orig_filename
402
403
404 # end refactoring
405 ###############################################################################
406
407
408
409 class BaseProcessingFail(Exception):
410 """
411 Base exception that all other processing failure messages should
412 subclass from.
413
414 You shouldn't call this itself; instead you should subclass it
415 and provid the exception_path and general_message applicable to
416 this error.
417 """
418 general_message = u''
419
420 @property
421 def exception_path(self):
422 return u"%s:%s" % (
423 self.__class__.__module__, self.__class__.__name__)
424
425 def __init__(self, **metadata):
426 self.metadata = metadata or {}
427
428
429 class BadMediaFail(BaseProcessingFail):
430 """
431 Error that should be raised when an inappropriate file was given
432 for the media type specified.
433 """
434 general_message = _(u'Invalid file given for media type.')