A couple of fixes to stupid things I did while coding this. And it WORKS!
[mediagoblin.git] / mediagoblin / media_types / image / processing.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 try:
18 from PIL import Image
19 except ImportError:
20 import Image
21 import os
22 import logging
23 import argparse
24
25 from mediagoblin import mg_globals as mgg
26 from mediagoblin.db.models import MediaEntry
27 from mediagoblin.processing import (
28 BadMediaFail, FilenameBuilder,
29 MediaProcessor, ProcessingManager,
30 request_from_args, get_orig_filename,
31 store_public, copy_original)
32 from mediagoblin.submit.lib import run_process_media
33 from mediagoblin.tools.exif import exif_fix_image_orientation, \
34 extract_exif, clean_exif, get_gps_data, get_useful, \
35 exif_image_needs_rotation
36 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
37
38 _log = logging.getLogger(__name__)
39
40 PIL_FILTERS = {
41 'NEAREST': Image.NEAREST,
42 'BILINEAR': Image.BILINEAR,
43 'BICUBIC': Image.BICUBIC,
44 'ANTIALIAS': Image.ANTIALIAS}
45
46 MEDIA_TYPE = 'mediagoblin.media_types.image'
47
48
49 def resize_image(entry, resized, keyname, target_name, new_size,
50 exif_tags, workdir):
51 """
52 Store a resized version of an image and return its pathname.
53
54 Arguments:
55 proc_state -- the processing state for the image to resize
56 resized -- an image from Image.open() of the original image being resized
57 keyname -- Under what key to save in the db.
58 target_name -- public file path for the new resized image
59 exif_tags -- EXIF data for the original image
60 workdir -- directory path for storing converted image files
61 new_size -- 2-tuple size for the resized image
62 """
63 config = mgg.global_config['media_type:mediagoblin.media_types.image']
64
65 resized = exif_fix_image_orientation(resized, exif_tags) # Fix orientation
66
67 filter_config = config['resize_filter']
68 try:
69 resize_filter = PIL_FILTERS[filter_config.upper()]
70 except KeyError:
71 raise Exception('Filter "{0}" not found, choose one of {1}'.format(
72 unicode(filter_config),
73 u', '.join(PIL_FILTERS.keys())))
74
75 resized.thumbnail(new_size, resize_filter)
76
77 # Copy the new file to the conversion subdir, then remotely.
78 tmp_resized_filename = os.path.join(workdir, target_name)
79 with file(tmp_resized_filename, 'w') as resized_file:
80 resized.save(resized_file, quality=config['quality'])
81 store_public(entry, keyname, tmp_resized_filename, target_name)
82
83
84 def resize_tool(entry,
85 force, keyname, target_name,
86 conversions_subdir, exif_tags, new_size=None):
87 # filename -- the filename of the original image being resized
88 filename = target_name
89
90 # Use the default size if new_size was not given
91 if not new_size:
92 max_width = mgg.global_config['media:' + keyname]['max_width']
93 max_height = mgg.global_config['media:' + keyname]['max_height']
94 new_size = (max_width, max_height)
95
96 # If the size of the original file exceeds the specified size for the desized
97 # file, a target_name file is created and later associated with the media
98 # entry.
99 # Also created if the file needs rotation, or if forced.
100 try:
101 im = Image.open(filename)
102 except IOError:
103 raise BadMediaFail()
104 if force \
105 or im.size[0] > new_size[0]\
106 or im.size[1] > new_size[1]\
107 or exif_image_needs_rotation(exif_tags):
108 resize_image(
109 entry, im, unicode(keyname), target_name,
110 new_size,
111 exif_tags, conversions_subdir)
112
113
114 SUPPORTED_FILETYPES = ['png', 'gif', 'jpg', 'jpeg', 'tiff']
115
116
117 def sniff_handler(media_file, **kw):
118 _log.info('Sniffing {0}'.format(MEDIA_TYPE))
119 if kw.get('media') is not None: # That's a double negative!
120 name, ext = os.path.splitext(kw['media'].filename)
121 clean_ext = ext[1:].lower() # Strip the . from ext and make lowercase
122
123 if clean_ext in SUPPORTED_FILETYPES:
124 _log.info('Found file extension in supported filetypes')
125 return MEDIA_TYPE
126 else:
127 _log.debug('Media present, extension not found in {0}'.format(
128 SUPPORTED_FILETYPES))
129 else:
130 _log.warning('Need additional information (keyword argument \'media\')'
131 ' to be able to handle sniffing')
132
133 return None
134
135
136 class ProcessImage(object):
137 """Code to process an image. Will be run by celery.
138
139 A Workbench() represents a local tempory dir. It is automatically
140 cleaned up when this function exits.
141 """
142 def __init__(self, proc_state=None):
143 if proc_state:
144 self.proc_state = proc_state
145 self.entry = proc_state.entry
146 self.workbench = proc_state.workbench
147
148 # Conversions subdirectory to avoid collisions
149 self.conversions_subdir = os.path.join(
150 self.workbench.dir, 'convirsions')
151
152 self.orig_filename = proc_state.get_orig_filename()
153 self.name_builder = FilenameBuilder(self.orig_filename)
154
155 # Exif extraction
156 self.exif_tags = extract_exif(self.orig_filename)
157
158 os.mkdir(self.conversions_subdir)
159
160 def reprocess_action(self, args):
161 """
162 List the available actions for media in a given state
163 """
164 if args[0].state == 'processed':
165 print _('\n Available reprocessing actions for processed images:'
166 '\n \t --resize: thumb or medium'
167 '\n Options:'
168 '\n \t --size: max_width max_height (defaults to'
169 'config specs)')
170 return True
171
172 def _parser(self, args):
173 """
174 Parses the unknown args from the gmg parser
175 """
176 parser = argparse.ArgumentParser()
177 parser.add_argument(
178 '--resize',
179 choices=['thumb', 'medium'])
180 parser.add_argument(
181 '--size',
182 nargs=2,
183 metavar=('max_width', 'max_height'),
184 type=int)
185 parser.add_argument(
186 '--initial_processing',
187 action='store_true')
188
189 return parser.parse_args(args[1])
190
191 def _check_eligible(self, entry_args, reprocess_args):
192 """
193 Check to see if we can actually process the given media as requested
194 """
195
196 if entry_args.state == 'processed':
197 if reprocess_args.initial_processing:
198 raise Exception(_('You can not run --initial_processing on'
199 ' media that has already been processed.'))
200
201 if entry_args.state == 'failed':
202 if reprocess_args.resize:
203 raise Exception(_('You can not run --resize on media that has'
204 ' not been processed.'))
205 if reprocess_args.size:
206 _log.warn('With --initial_processing, the --size flag will be'
207 ' ignored.')
208
209 if entry_args.state == 'processing':
210 raise Exception(_('We currently do not support reprocessing on'
211 ' media that is in the "processing" state.'))
212
213 def initial_processing(self):
214 # Is there any GPS data
215 gps_data = get_gps_data(self.exif_tags)
216
217 # Always create a small thumbnail
218 resize_tool(self.proc_state, True, 'thumb', self.orig_filename,
219 self.name_builder.fill('{basename}.thumbnail{ext}'),
220 self.conversions_subdir, self.exif_tags)
221
222 # Possibly create a medium
223 resize_tool(self.proc_state, False, 'medium', self.orig_filename,
224 self.name_builder.fill('{basename}.medium{ext}'),
225 self.conversions_subdir, self.exif_tags)
226
227 # Copy our queued local workbench to its final destination
228 self.proc_state.copy_original(self.name_builder.fill('{basename}{ext}'))
229
230 # Remove queued media file from storage and database
231 self.proc_state.delete_queue_file()
232
233 # Insert exif data into database
234 exif_all = clean_exif(self.exif_tags)
235
236 if len(exif_all):
237 self.entry.media_data_init(exif_all=exif_all)
238
239 if len(gps_data):
240 for key in list(gps_data.keys()):
241 gps_data['gps_' + key] = gps_data.pop(key)
242 self.entry.media_data_init(**gps_data)
243
244 def reprocess(self, reprocess_info):
245 """
246 This function actually does the reprocessing when called by
247 ProcessMedia in gmg/processing/task.py
248 """
249 new_size = None
250
251 # Did they specify a size? They must specify either both or none, so
252 # we only need to check if one is present
253 if reprocess_info.get('max_width'):
254 max_width = reprocess_info['max_width']
255 max_height = reprocess_info['max_height']
256
257 new_size = (max_width, max_height)
258
259 resize_tool(self.proc_state, False, reprocess_info['resize'],
260 self.name_builder.fill('{basename}.medium{ext}'),
261 self.conversions_subdir, self.exif_tags, new_size)
262
263 def media_reprocess(self, args):
264 """
265 This function handles the all of the reprocessing logic, before calling
266 gmg/submit/lib/run_process_media
267 """
268 reprocess_args = self._parser(args)
269 entry_args = args[0]
270
271 # Can we actually process the given media as requested?
272 self._check_eligible(entry_args, reprocess_args)
273
274 # Do we want to re-try initial processing?
275 if reprocess_args.initial_processing:
276 for id in entry_args.media_id:
277 entry = MediaEntry.query.filter_by(id=id).first()
278 run_process_media(entry)
279
280 # Are we wanting to resize the thumbnail or medium?
281 elif reprocess_args.resize:
282
283 # reprocess all given media entries
284 for id in entry_args.media_id:
285 entry = MediaEntry.query.filter_by(id=id).first()
286
287 # For now we can only reprocess with the original file
288 if not entry.media_files.get('original'):
289 raise Exception(_('The original file for this media entry'
290 ' does not exist.'))
291
292 reprocess_info = self._get_reprocess_info(reprocess_args)
293 run_process_media(entry, reprocess_info=reprocess_info)
294
295 # If we are here, they forgot to tell us how to reprocess
296 else:
297 _log.warn('You must set either --resize or --initial_processing'
298 ' flag to reprocess an image.')
299
300 def _get_reprocess_info(self, args):
301 """ Returns a dict with the info needed for reprocessing"""
302 reprocess_info = {'resize': args.resize}
303
304 if args.size:
305 reprocess_info['max_width'] = args.size[0]
306 reprocess_info['max_height'] = args.size[1]
307
308 return reprocess_info
309
310
311 class CommonImageProcessor(MediaProcessor):
312 """
313 Provides a base for various media processing steps
314 """
315 # Common resizing step
316 def resize_step(self):
317 pass
318
319 @classmethod
320 def _add_width_height_args(cls, parser):
321 parser.add_argument(
322 "--width", default=None,
323 help=(
324 "Width of the resized image (if not using defaults)"))
325 parser.add_argument(
326 "--height", default=None,
327 help=(
328 "Height of the resized image (if not using defaults)"))
329
330 def common_setup(self):
331 """
332 Set up the workbench directory and pull down the original file
333 """
334 ## @@: Should this be two functions?
335 # Conversions subdirectory to avoid collisions
336 self.conversions_subdir = os.path.join(
337 self.workbench.dir, 'convirsions')
338 os.mkdir(self.conversions_subdir)
339
340 # Pull down and set up the original file
341 self.orig_filename = get_orig_filename(
342 self.entry, self.workbench)
343 self.name_builder = FilenameBuilder(self.orig_filename)
344
345 # Exif extraction
346 self.exif_tags = extract_exif(self.orig_filename)
347
348
349 def generate_medium_if_applicable(self, size=None):
350 resize_tool(self.entry, False, 'medium', self.orig_filename,
351 self.name_builder.fill('{basename}.medium{ext}'),
352 self.conversions_subdir, self.exif_tags)
353
354 def generate_thumb(self, size=None):
355 resize_tool(self.entry, True, 'thumb', self.orig_filename,
356 self.name_builder.fill('{basename}.thumbnail{ext}'),
357 self.conversions_subdir, self.exif_tags)
358
359 def copy_original(self):
360 copy_original(
361 self.entry, self.orig_filename,
362 self.name_builder.fill('{basename}{ext}'))
363
364 def extract_metadata(self):
365 # Is there any GPS data
366 gps_data = get_gps_data(self.exif_tags)
367
368 # Insert exif data into database
369 exif_all = clean_exif(self.exif_tags)
370
371 if len(exif_all):
372 self.entry.media_data_init(exif_all=exif_all)
373
374 if len(gps_data):
375 for key in list(gps_data.keys()):
376 gps_data['gps_' + key] = gps_data.pop(key)
377 self.entry.media_data_init(**gps_data)
378
379
380 class InitialProcessor(CommonImageProcessor):
381 """
382 Initial processing step for new images
383 """
384 name = "initial"
385 description = "Initial processing"
386
387 @classmethod
388 def media_is_eligible(cls, entry):
389 """
390 Determine if this media type is eligible for processing
391 """
392 return entry.state in (
393 "unprocessed", "failed")
394
395 ###############################
396 # Command line interface things
397 ###############################
398
399 @classmethod
400 def generate_parser(cls):
401 parser = argparse.ArgumentParser(
402 description=cls.description,
403 prog=cls.name)
404
405 parser.add_argument(
406 '--size',
407 nargs=2,
408 metavar=('max_width', 'max_height'),
409 type=int)
410
411 parser.add_argument(
412 '--thumb-size',
413 nargs=2,
414 type=int)
415
416 return parser
417
418 @classmethod
419 def args_to_request(cls, args):
420 return request_from_args(
421 args, ['size', 'thumb_size'])
422
423
424 def process(self, size=None, thumb_size=None):
425 self.common_setup()
426 self.generate_medium_if_applicable(size=size)
427 self.generate_thumb(size=thumb_size)
428 self.extract_metadata()
429
430
431 class ImageProcessingManager(ProcessingManager):
432 def __init__(self):
433 super(self.__class__, self).__init__()
434 self.add_processor(InitialProcessor)
435
436
437 if __name__ == '__main__':
438 import sys
439 import pprint
440
441 pp = pprint.PrettyPrinter()
442
443 result = extract_exif(sys.argv[1])
444 gps = get_gps_data(result)
445 clean = clean_exif(result)
446 useful = get_useful(clean)
447
448 print pp.pprint(
449 clean)