Add priority to the celery tasks
[mediagoblin.git] / mediagoblin / media_types / ascii / processing.py
CommitLineData
a246ccca 1# GNU MediaGoblin -- federated, autonomous media hosting
cf29e8a8 2# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
a246ccca
JW
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
35d6a950 16import argparse
a246ccca
JW
17import chardet
18import os
d0e9f843
AL
19try:
20 from PIL import Image
21except ImportError:
22 import Image
010d28b4 23import logging
a246ccca 24
e49b7e02
BP
25import six
26
a246ccca 27from mediagoblin import mg_globals as mgg
35d6a950
RE
28from mediagoblin.processing import (
29 create_pub_filepath, FilenameBuilder,
30 MediaProcessor, ProcessingManager,
1cefccc7 31 get_process_filename, copy_original,
35d6a950 32 store_public, request_from_args)
c2dfe1dd 33from mediagoblin.media_types.ascii import asciitoimage
a246ccca 34
010d28b4 35_log = logging.getLogger(__name__)
a246ccca 36
10085b77 37SUPPORTED_EXTENSIONS = ['txt', 'asc', 'nfo']
22930812 38MEDIA_TYPE = 'mediagoblin.media_types.ascii'
10085b77 39
c56d4b55 40
301da9ca 41def sniff_handler(media_file, filename):
22930812 42 _log.info('Sniffing {0}'.format(MEDIA_TYPE))
10085b77 43
301da9ca
CAW
44 name, ext = os.path.splitext(filename)
45 clean_ext = ext[1:].lower()
46
47 if clean_ext in SUPPORTED_EXTENSIONS:
48 return MEDIA_TYPE
10085b77 49
22930812 50 return None
ec4261a4 51
c56d4b55 52
35d6a950
RE
53class CommonAsciiProcessor(MediaProcessor):
54 """
55 Provides a base for various ascii processing steps
45ab3e07 56 """
1cefccc7
RE
57 acceptable_files = ['original', 'unicode']
58
35d6a950 59 def common_setup(self):
738cc499
RE
60 self.ascii_config = mgg.global_config['plugins'][
61 'mediagoblin.media_types.ascii']
35d6a950
RE
62
63 # Conversions subdirectory to avoid collisions
64 self.conversions_subdir = os.path.join(
0485e9c8 65 self.workbench.dir, 'conversions')
35d6a950 66 os.mkdir(self.conversions_subdir)
a246ccca 67
1cefccc7
RE
68 # Pull down and set up the processing file
69 self.process_filename = get_process_filename(
70 self.entry, self.workbench, self.acceptable_files)
71 self.name_builder = FilenameBuilder(self.process_filename)
a246ccca 72
35d6a950 73 self.charset = None
a246ccca 74
35d6a950
RE
75 def copy_original(self):
76 copy_original(
1cefccc7 77 self.entry, self.process_filename,
35d6a950
RE
78 self.name_builder.fill('{basename}{ext}'))
79
80 def _detect_charset(self, orig_file):
81 d_charset = chardet.detect(orig_file.read())
a246ccca 82
010d28b4 83 # Only select a non-utf-8 charset if chardet is *really* sure
35d6a950
RE
84 # Tested with "Feli\x0109an superjaron", which was detected
85 if d_charset['confidence'] < 0.9:
86 self.charset = 'utf-8'
010d28b4 87 else:
35d6a950 88 self.charset = d_charset['encoding']
010d28b4
JW
89
90 _log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
35d6a950
RE
91 d_charset,
92 self.charset))
93
8a528add
RE
94 # Rewind the file
95 orig_file.seek(0)
96
35d6a950 97 def store_unicode_file(self):
d9aced73 98 with open(self.process_filename, 'rb') as orig_file:
35d6a950
RE
99 self._detect_charset(orig_file)
100 unicode_filepath = create_pub_filepath(self.entry,
101 'ascii-portable.txt')
102
103 with mgg.public_store.get_file(unicode_filepath, 'wb') \
104 as unicode_file:
105 # Decode the original file from its detected charset (or UTF8)
106 # Encode the unicode instance to ASCII and replace any
107 # non-ASCII with an HTML entity (&#
108 unicode_file.write(
e49b7e02 109 six.text_type(orig_file.read().decode(
35d6a950
RE
110 self.charset)).encode(
111 'ascii',
112 'xmlcharrefreplace'))
113
114 self.entry.media_files['unicode'] = unicode_filepath
115
116 def generate_thumb(self, font=None, thumb_size=None):
d9aced73 117 with open(self.process_filename, 'rb') as orig_file:
35d6a950
RE
118 # If no font kwarg, check config
119 if not font:
120 font = self.ascii_config.get('thumbnail_font', None)
121 if not thumb_size:
122 thumb_size = (mgg.global_config['media:thumb']['max_width'],
123 mgg.global_config['media:thumb']['max_height'])
124
fd9e4af2
RE
125 if self._skip_resizing(font, thumb_size):
126 return
127
35d6a950
RE
128 tmp_thumb = os.path.join(
129 self.conversions_subdir,
130 self.name_builder.fill('{basename}.thumbnail.png'))
131
132 ascii_converter_args = {}
133
134 # If there is a font from either the config or kwarg, update
135 # ascii_converter_args
136 if font:
137 ascii_converter_args.update(
138 {'font': self.ascii_config['thumbnail_font']})
139
140 converter = asciitoimage.AsciiToImage(
141 **ascii_converter_args)
142
143 thumb = converter._create_image(
144 orig_file.read())
145
ea309bff 146 thumb.thumbnail(thumb_size, Image.ANTIALIAS)
147 thumb.save(tmp_thumb);
35d6a950 148
fd9e4af2
RE
149 thumb_info = {'font': font,
150 'width': thumb_size[0],
151 'height': thumb_size[1]}
152
35d6a950
RE
153 _log.debug('Copying local file to public storage')
154 store_public(self.entry, 'thumb', tmp_thumb,
155 self.name_builder.fill('{basename}.thumbnail.jpg'))
156
7ec0a978
CAW
157 self.entry.set_file_metadata('thumb', **thumb_info)
158
159
755b6a86
RE
160 def _skip_resizing(self, font, thumb_size):
161 thumb_info = self.entry.get_file_metadata('thumb')
fd9e4af2 162
755b6a86
RE
163 if not thumb_info:
164 return False
fd9e4af2 165
755b6a86 166 skip = True
fd9e4af2 167
755b6a86
RE
168 if thumb_info.get('font') != font:
169 skip = False
170 elif thumb_info.get('width') != thumb_size[0]:
171 skip = False
172 elif thumb_info.get('height') != thumb_size[1]:
173 skip = False
fd9e4af2 174
755b6a86 175 return skip
fd9e4af2 176
35d6a950
RE
177
178class InitialProcessor(CommonAsciiProcessor):
179 """
180 Initial processing step for new ascii media
181 """
182 name = "initial"
183 description = "Initial processing"
184
185 @classmethod
186 def media_is_eligible(cls, entry=None, state=None):
187 if not state:
188 state = entry.state
189 return state in (
190 "unprocessed", "failed")
191
192 @classmethod
193 def generate_parser(cls):
194 parser = argparse.ArgumentParser(
195 description=cls.description,
196 prog=cls.name)
197
198 parser.add_argument(
199 '--thumb_size',
200 nargs=2,
201 metavar=('max_width', 'max_width'),
202 type=int)
203
204 parser.add_argument(
205 '--font',
206 help='the thumbnail font')
207
208 return parser
209
210 @classmethod
211 def args_to_request(cls, args):
212 return request_from_args(
213 args, ['thumb_size', 'font'])
214
215 def process(self, thumb_size=None, font=None):
216 self.common_setup()
217 self.store_unicode_file()
218 self.generate_thumb(thumb_size=thumb_size, font=font)
219 self.copy_original()
220 self.delete_queue_file()
221
222
698c7a8b
RE
223class Resizer(CommonAsciiProcessor):
224 """
225 Resizing process steps for processed media
226 """
227 name = 'resize'
228 description = 'Resize thumbnail'
3225008f 229 thumb_size = 'thumb_size'
698c7a8b
RE
230
231 @classmethod
232 def media_is_eligible(cls, entry=None, state=None):
233 """
234 Determine if this media type is eligible for processing
235 """
236 if not state:
237 state = entry.state
238 return state in 'processed'
239
240 @classmethod
241 def generate_parser(cls):
242 parser = argparse.ArgumentParser(
243 description=cls.description,
244 prog=cls.name)
245
246 parser.add_argument(
247 '--thumb_size',
248 nargs=2,
249 metavar=('max_width', 'max_height'),
250 type=int)
251
252 # Needed for gmg reprocess thumbs to work
253 parser.add_argument(
254 'file',
255 nargs='?',
63021eb6
RE
256 default='thumb',
257 choices=['thumb'])
698c7a8b
RE
258
259 return parser
260
261 @classmethod
262 def args_to_request(cls, args):
263 return request_from_args(
63021eb6 264 args, ['thumb_size', 'file'])
698c7a8b
RE
265
266 def process(self, thumb_size=None, file=None):
267 self.common_setup()
268 self.generate_thumb(thumb_size=thumb_size)
269
270
35d6a950
RE
271class AsciiProcessingManager(ProcessingManager):
272 def __init__(self):
1a2982d6 273 super(AsciiProcessingManager, self).__init__()
35d6a950 274 self.add_processor(InitialProcessor)
d63f78fa 275 self.add_processor(Resizer)
c62181f4 276
25ecdec9 277 def workflow(self, entry, manager, feed_url, reprocess_action,
278 reprocess_info=None):
c62181f4 279 ProcessMedia().apply_async(
280 [entry.id, feed_url, reprocess_action, reprocess_info], {},
281 task_id=entry.queued_task_id)