"initial" reprocessing subcommand now works!
[mediagoblin.git] / mediagoblin / media_types / image / processing.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 try:
18 from PIL import Image
19 except ImportError:
20 import Image
21 import os
22 import logging
23 import argparse
24
25 from mediagoblin import mg_globals as mgg
26 from mediagoblin.db.models import MediaEntry
27 from mediagoblin.processing import (
28 BadMediaFail, FilenameBuilder,
29 MediaProcessor, ProcessingManager)
30 from mediagoblin.submit.lib import run_process_media
31 from mediagoblin.tools.exif import exif_fix_image_orientation, \
32 extract_exif, clean_exif, get_gps_data, get_useful, \
33 exif_image_needs_rotation
34 from mediagoblin.tools.translate import lazy_pass_to_ugettext as _
35
36 _log = logging.getLogger(__name__)
37
38 PIL_FILTERS = {
39 'NEAREST': Image.NEAREST,
40 'BILINEAR': Image.BILINEAR,
41 'BICUBIC': Image.BICUBIC,
42 'ANTIALIAS': Image.ANTIALIAS}
43
44 MEDIA_TYPE = 'mediagoblin.media_types.image'
45
46
47 def resize_image(proc_state, resized, keyname, target_name, new_size,
48 exif_tags, workdir):
49 """
50 Store a resized version of an image and return its pathname.
51
52 Arguments:
53 proc_state -- the processing state for the image to resize
54 resized -- an image from Image.open() of the original image being resized
55 keyname -- Under what key to save in the db.
56 target_name -- public file path for the new resized image
57 exif_tags -- EXIF data for the original image
58 workdir -- directory path for storing converted image files
59 new_size -- 2-tuple size for the resized image
60 """
61 config = mgg.global_config['media_type:mediagoblin.media_types.image']
62
63 resized = exif_fix_image_orientation(resized, exif_tags) # Fix orientation
64
65 filter_config = config['resize_filter']
66 try:
67 resize_filter = PIL_FILTERS[filter_config.upper()]
68 except KeyError:
69 raise Exception('Filter "{0}" not found, choose one of {1}'.format(
70 unicode(filter_config),
71 u', '.join(PIL_FILTERS.keys())))
72
73 resized.thumbnail(new_size, resize_filter)
74
75 # Copy the new file to the conversion subdir, then remotely.
76 tmp_resized_filename = os.path.join(workdir, target_name)
77 with file(tmp_resized_filename, 'w') as resized_file:
78 resized.save(resized_file, quality=config['quality'])
79 proc_state.store_public(keyname, tmp_resized_filename, target_name)
80
81
82 def resize_tool(proc_state, force, keyname, target_name,
83 conversions_subdir, exif_tags, new_size=None):
84 # filename -- the filename of the original image being resized
85 filename = proc_state.get_orig_filename()
86
87 # Use the default size if new_size was not given
88 if not new_size:
89 max_width = mgg.global_config['media:' + keyname]['max_width']
90 max_height = mgg.global_config['media:' + keyname]['max_height']
91 new_size = (max_width, max_height)
92
93 # If the size of the original file exceeds the specified size for the desized
94 # file, a target_name file is created and later associated with the media
95 # entry.
96 # Also created if the file needs rotation, or if forced.
97 try:
98 im = Image.open(filename)
99 except IOError:
100 raise BadMediaFail()
101 if force \
102 or im.size[0] > new_size[0]\
103 or im.size[1] > new_size[1]\
104 or exif_image_needs_rotation(exif_tags):
105 resize_image(
106 proc_state, im, unicode(keyname), target_name,
107 new_size,
108 exif_tags, conversions_subdir)
109
110
111 SUPPORTED_FILETYPES = ['png', 'gif', 'jpg', 'jpeg', 'tiff']
112
113
114 def sniff_handler(media_file, **kw):
115 _log.info('Sniffing {0}'.format(MEDIA_TYPE))
116 if kw.get('media') is not None: # That's a double negative!
117 name, ext = os.path.splitext(kw['media'].filename)
118 clean_ext = ext[1:].lower() # Strip the . from ext and make lowercase
119
120 if clean_ext in SUPPORTED_FILETYPES:
121 _log.info('Found file extension in supported filetypes')
122 return MEDIA_TYPE
123 else:
124 _log.debug('Media present, extension not found in {0}'.format(
125 SUPPORTED_FILETYPES))
126 else:
127 _log.warning('Need additional information (keyword argument \'media\')'
128 ' to be able to handle sniffing')
129
130 return None
131
132
133 class ProcessImage(object):
134 """Code to process an image. Will be run by celery.
135
136 A Workbench() represents a local tempory dir. It is automatically
137 cleaned up when this function exits.
138 """
139 def __init__(self, proc_state=None):
140 if proc_state:
141 self.proc_state = proc_state
142 self.entry = proc_state.entry
143 self.workbench = proc_state.workbench
144
145 # Conversions subdirectory to avoid collisions
146 self.conversions_subdir = os.path.join(
147 self.workbench.dir, 'convirsions')
148
149 self.orig_filename = proc_state.get_orig_filename()
150 self.name_builder = FilenameBuilder(self.orig_filename)
151
152 # Exif extraction
153 self.exif_tags = extract_exif(self.orig_filename)
154
155 os.mkdir(self.conversions_subdir)
156
157 def reprocess_action(self, args):
158 """
159 List the available actions for media in a given state
160 """
161 if args[0].state == 'processed':
162 print _('\n Available reprocessing actions for processed images:'
163 '\n \t --resize: thumb or medium'
164 '\n Options:'
165 '\n \t --size: max_width max_height (defaults to'
166 'config specs)')
167 return True
168
169 def _parser(self, args):
170 """
171 Parses the unknown args from the gmg parser
172 """
173 parser = argparse.ArgumentParser()
174 parser.add_argument(
175 '--resize',
176 choices=['thumb', 'medium'])
177 parser.add_argument(
178 '--size',
179 nargs=2,
180 metavar=('max_width', 'max_height'),
181 type=int)
182 parser.add_argument(
183 '--initial_processing',
184 action='store_true')
185
186 return parser.parse_args(args[1])
187
188 def _check_eligible(self, entry_args, reprocess_args):
189 """
190 Check to see if we can actually process the given media as requested
191 """
192
193 if entry_args.state == 'processed':
194 if reprocess_args.initial_processing:
195 raise Exception(_('You can not run --initial_processing on'
196 ' media that has already been processed.'))
197
198 if entry_args.state == 'failed':
199 if reprocess_args.resize:
200 raise Exception(_('You can not run --resize on media that has'
201 ' not been processed.'))
202 if reprocess_args.size:
203 _log.warn('With --initial_processing, the --size flag will be'
204 ' ignored.')
205
206 if entry_args.state == 'processing':
207 raise Exception(_('We currently do not support reprocessing on'
208 ' media that is in the "processing" state.'))
209
210 def initial_processing(self):
211 # Is there any GPS data
212 gps_data = get_gps_data(self.exif_tags)
213
214 # Always create a small thumbnail
215 resize_tool(self.proc_state, True, 'thumb', self.orig_filename,
216 self.name_builder.fill('{basename}.thumbnail{ext}'),
217 self.conversions_subdir, self.exif_tags)
218
219 # Possibly create a medium
220 resize_tool(self.proc_state, False, 'medium', self.orig_filename,
221 self.name_builder.fill('{basename}.medium{ext}'),
222 self.conversions_subdir, self.exif_tags)
223
224 # Copy our queued local workbench to its final destination
225 self.proc_state.copy_original(self.name_builder.fill('{basename}{ext}'))
226
227 # Remove queued media file from storage and database
228 self.proc_state.delete_queue_file()
229
230 # Insert exif data into database
231 exif_all = clean_exif(self.exif_tags)
232
233 if len(exif_all):
234 self.entry.media_data_init(exif_all=exif_all)
235
236 if len(gps_data):
237 for key in list(gps_data.keys()):
238 gps_data['gps_' + key] = gps_data.pop(key)
239 self.entry.media_data_init(**gps_data)
240
241 def reprocess(self, reprocess_info):
242 """
243 This function actually does the reprocessing when called by
244 ProcessMedia in gmg/processing/task.py
245 """
246 new_size = None
247
248 # Did they specify a size? They must specify either both or none, so
249 # we only need to check if one is present
250 if reprocess_info.get('max_width'):
251 max_width = reprocess_info['max_width']
252 max_height = reprocess_info['max_height']
253
254 new_size = (max_width, max_height)
255
256 resize_tool(self.proc_state, False, reprocess_info['resize'],
257 self.name_builder.fill('{basename}.medium{ext}'),
258 self.conversions_subdir, self.exif_tags, new_size)
259
260 def media_reprocess(self, args):
261 """
262 This function handles the all of the reprocessing logic, before calling
263 gmg/submit/lib/run_process_media
264 """
265 reprocess_args = self._parser(args)
266 entry_args = args[0]
267
268 # Can we actually process the given media as requested?
269 self._check_eligible(entry_args, reprocess_args)
270
271 # Do we want to re-try initial processing?
272 if reprocess_args.initial_processing:
273 for id in entry_args.media_id:
274 entry = MediaEntry.query.filter_by(id=id).first()
275 run_process_media(entry)
276
277 # Are we wanting to resize the thumbnail or medium?
278 elif reprocess_args.resize:
279
280 # reprocess all given media entries
281 for id in entry_args.media_id:
282 entry = MediaEntry.query.filter_by(id=id).first()
283
284 # For now we can only reprocess with the original file
285 if not entry.media_files.get('original'):
286 raise Exception(_('The original file for this media entry'
287 ' does not exist.'))
288
289 reprocess_info = self._get_reprocess_info(reprocess_args)
290 run_process_media(entry, reprocess_info=reprocess_info)
291
292 # If we are here, they forgot to tell us how to reprocess
293 else:
294 _log.warn('You must set either --resize or --initial_processing'
295 ' flag to reprocess an image.')
296
297 def _get_reprocess_info(self, args):
298 """ Returns a dict with the info needed for reprocessing"""
299 reprocess_info = {'resize': args.resize}
300
301 if args.size:
302 reprocess_info['max_width'] = args.size[0]
303 reprocess_info['max_height'] = args.size[1]
304
305 return reprocess_info
306
307
308 class CommonImageProcessor(MediaProcessor):
309 """
310 Provides a base for various media processing steps
311 """
312 # Common resizing step
313 def resize_step(self):
314 pass
315
316 def _add_width_height_args(self, parser):
317 parser.add_argument(
318 "--width", default=None,
319 help=(
320 "Width of the resized image (if not using defaults)"))
321 parser.add_argument(
322 "--height", default=None,
323 help=(
324 "Height of the resized image (if not using defaults)"))
325
326
327 class InitialProcessor(CommonImageProcessor):
328 """
329 Initial processing step for new images
330 """
331 name = "initial"
332 description = "Initial processing"
333
334 @classmethod
335 def media_is_eligibile(self, media_entry):
336 """
337 Determine if this media type is eligible for processing
338 """
339 return media_entry.state in (
340 "unprocessed", "failed")
341
342 ###############################
343 # Command line interface things
344 ###############################
345
346 @classmethod
347 def generate_parser(self):
348 parser = argparse.ArgumentParser(
349 description=self.description)
350
351 self._add_width_height_args(parser)
352
353 return parser
354
355 @classmethod
356 def args_to_request(self, args):
357 raise NotImplementedError
358
359
360
361 class ImageProcessingManager(ProcessingManager):
362 def __init__(self):
363 super(self.__class__, self).__init__()
364 self.add_processor(InitialProcessor)
365
366
367 if __name__ == '__main__':
368 import sys
369 import pprint
370
371 pp = pprint.PrettyPrinter()
372
373 result = extract_exif(sys.argv[1])
374 gps = get_gps_data(result)
375 clean = clean_exif(result)
376 useful = get_useful(clean)
377
378 print pp.pprint(
379 clean)