Commit | Line | Data |
---|---|---|
a246ccca | 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
cf29e8a8 | 2 | # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. |
a246ccca JW |
3 | # |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU Affero General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU Affero General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
35d6a950 | 16 | import argparse |
a246ccca JW |
17 | import chardet |
18 | import os | |
d0e9f843 AL |
19 | try: |
20 | from PIL import Image | |
21 | except ImportError: | |
22 | import Image | |
010d28b4 | 23 | import logging |
a246ccca | 24 | |
e49b7e02 BP |
25 | import six |
26 | ||
a246ccca | 27 | from mediagoblin import mg_globals as mgg |
35d6a950 RE |
28 | from mediagoblin.processing import ( |
29 | create_pub_filepath, FilenameBuilder, | |
30 | MediaProcessor, ProcessingManager, | |
1cefccc7 | 31 | get_process_filename, copy_original, |
35d6a950 | 32 | store_public, request_from_args) |
c2dfe1dd | 33 | from mediagoblin.media_types.ascii import asciitoimage |
a246ccca | 34 | |
010d28b4 | 35 | _log = logging.getLogger(__name__) |
a246ccca | 36 | |
10085b77 | 37 | SUPPORTED_EXTENSIONS = ['txt', 'asc', 'nfo'] |
22930812 | 38 | MEDIA_TYPE = 'mediagoblin.media_types.ascii' |
10085b77 | 39 | |
c56d4b55 | 40 | |
301da9ca | 41 | def sniff_handler(media_file, filename): |
22930812 | 42 | _log.info('Sniffing {0}'.format(MEDIA_TYPE)) |
10085b77 | 43 | |
301da9ca CAW |
44 | name, ext = os.path.splitext(filename) |
45 | clean_ext = ext[1:].lower() | |
46 | ||
47 | if clean_ext in SUPPORTED_EXTENSIONS: | |
48 | return MEDIA_TYPE | |
10085b77 | 49 | |
22930812 | 50 | return None |
ec4261a4 | 51 | |
c56d4b55 | 52 | |
35d6a950 RE |
53 | class CommonAsciiProcessor(MediaProcessor): |
54 | """ | |
55 | Provides a base for various ascii processing steps | |
45ab3e07 | 56 | """ |
1cefccc7 RE |
57 | acceptable_files = ['original', 'unicode'] |
58 | ||
35d6a950 | 59 | def common_setup(self): |
738cc499 RE |
60 | self.ascii_config = mgg.global_config['plugins'][ |
61 | 'mediagoblin.media_types.ascii'] | |
35d6a950 RE |
62 | |
63 | # Conversions subdirectory to avoid collisions | |
64 | self.conversions_subdir = os.path.join( | |
0485e9c8 | 65 | self.workbench.dir, 'conversions') |
35d6a950 | 66 | os.mkdir(self.conversions_subdir) |
a246ccca | 67 | |
1cefccc7 RE |
68 | # Pull down and set up the processing file |
69 | self.process_filename = get_process_filename( | |
70 | self.entry, self.workbench, self.acceptable_files) | |
71 | self.name_builder = FilenameBuilder(self.process_filename) | |
a246ccca | 72 | |
35d6a950 | 73 | self.charset = None |
a246ccca | 74 | |
35d6a950 RE |
75 | def copy_original(self): |
76 | copy_original( | |
1cefccc7 | 77 | self.entry, self.process_filename, |
35d6a950 RE |
78 | self.name_builder.fill('{basename}{ext}')) |
79 | ||
80 | def _detect_charset(self, orig_file): | |
81 | d_charset = chardet.detect(orig_file.read()) | |
a246ccca | 82 | |
010d28b4 | 83 | # Only select a non-utf-8 charset if chardet is *really* sure |
35d6a950 RE |
84 | # Tested with "Feli\x0109an superjaron", which was detected |
85 | if d_charset['confidence'] < 0.9: | |
86 | self.charset = 'utf-8' | |
010d28b4 | 87 | else: |
35d6a950 | 88 | self.charset = d_charset['encoding'] |
010d28b4 JW |
89 | |
90 | _log.info('Charset detected: {0}\nWill interpret as: {1}'.format( | |
35d6a950 RE |
91 | d_charset, |
92 | self.charset)) | |
93 | ||
8a528add RE |
94 | # Rewind the file |
95 | orig_file.seek(0) | |
96 | ||
35d6a950 | 97 | def store_unicode_file(self): |
d9aced73 | 98 | with open(self.process_filename, 'rb') as orig_file: |
35d6a950 RE |
99 | self._detect_charset(orig_file) |
100 | unicode_filepath = create_pub_filepath(self.entry, | |
101 | 'ascii-portable.txt') | |
102 | ||
103 | with mgg.public_store.get_file(unicode_filepath, 'wb') \ | |
104 | as unicode_file: | |
105 | # Decode the original file from its detected charset (or UTF8) | |
106 | # Encode the unicode instance to ASCII and replace any | |
107 | # non-ASCII with an HTML entity (&# | |
108 | unicode_file.write( | |
e49b7e02 | 109 | six.text_type(orig_file.read().decode( |
35d6a950 RE |
110 | self.charset)).encode( |
111 | 'ascii', | |
112 | 'xmlcharrefreplace')) | |
113 | ||
114 | self.entry.media_files['unicode'] = unicode_filepath | |
115 | ||
116 | def generate_thumb(self, font=None, thumb_size=None): | |
d9aced73 | 117 | with open(self.process_filename, 'rb') as orig_file: |
35d6a950 RE |
118 | # If no font kwarg, check config |
119 | if not font: | |
120 | font = self.ascii_config.get('thumbnail_font', None) | |
121 | if not thumb_size: | |
122 | thumb_size = (mgg.global_config['media:thumb']['max_width'], | |
123 | mgg.global_config['media:thumb']['max_height']) | |
124 | ||
fd9e4af2 RE |
125 | if self._skip_resizing(font, thumb_size): |
126 | return | |
127 | ||
35d6a950 RE |
128 | tmp_thumb = os.path.join( |
129 | self.conversions_subdir, | |
130 | self.name_builder.fill('{basename}.thumbnail.png')) | |
131 | ||
132 | ascii_converter_args = {} | |
133 | ||
134 | # If there is a font from either the config or kwarg, update | |
135 | # ascii_converter_args | |
136 | if font: | |
137 | ascii_converter_args.update( | |
138 | {'font': self.ascii_config['thumbnail_font']}) | |
139 | ||
140 | converter = asciitoimage.AsciiToImage( | |
141 | **ascii_converter_args) | |
142 | ||
143 | thumb = converter._create_image( | |
144 | orig_file.read()) | |
145 | ||
ea309bff | 146 | thumb.thumbnail(thumb_size, Image.ANTIALIAS) |
147 | thumb.save(tmp_thumb); | |
35d6a950 | 148 | |
fd9e4af2 RE |
149 | thumb_info = {'font': font, |
150 | 'width': thumb_size[0], | |
151 | 'height': thumb_size[1]} | |
152 | ||
35d6a950 RE |
153 | _log.debug('Copying local file to public storage') |
154 | store_public(self.entry, 'thumb', tmp_thumb, | |
155 | self.name_builder.fill('{basename}.thumbnail.jpg')) | |
156 | ||
7ec0a978 CAW |
157 | self.entry.set_file_metadata('thumb', **thumb_info) |
158 | ||
159 | ||
755b6a86 RE |
160 | def _skip_resizing(self, font, thumb_size): |
161 | thumb_info = self.entry.get_file_metadata('thumb') | |
fd9e4af2 | 162 | |
755b6a86 RE |
163 | if not thumb_info: |
164 | return False | |
fd9e4af2 | 165 | |
755b6a86 | 166 | skip = True |
fd9e4af2 | 167 | |
755b6a86 RE |
168 | if thumb_info.get('font') != font: |
169 | skip = False | |
170 | elif thumb_info.get('width') != thumb_size[0]: | |
171 | skip = False | |
172 | elif thumb_info.get('height') != thumb_size[1]: | |
173 | skip = False | |
fd9e4af2 | 174 | |
755b6a86 | 175 | return skip |
fd9e4af2 | 176 | |
35d6a950 RE |
177 | |
178 | class InitialProcessor(CommonAsciiProcessor): | |
179 | """ | |
180 | Initial processing step for new ascii media | |
181 | """ | |
182 | name = "initial" | |
183 | description = "Initial processing" | |
184 | ||
185 | @classmethod | |
186 | def media_is_eligible(cls, entry=None, state=None): | |
187 | if not state: | |
188 | state = entry.state | |
189 | return state in ( | |
190 | "unprocessed", "failed") | |
191 | ||
192 | @classmethod | |
193 | def generate_parser(cls): | |
194 | parser = argparse.ArgumentParser( | |
195 | description=cls.description, | |
196 | prog=cls.name) | |
197 | ||
198 | parser.add_argument( | |
199 | '--thumb_size', | |
200 | nargs=2, | |
201 | metavar=('max_width', 'max_width'), | |
202 | type=int) | |
203 | ||
204 | parser.add_argument( | |
205 | '--font', | |
206 | help='the thumbnail font') | |
207 | ||
208 | return parser | |
209 | ||
210 | @classmethod | |
211 | def args_to_request(cls, args): | |
212 | return request_from_args( | |
213 | args, ['thumb_size', 'font']) | |
214 | ||
215 | def process(self, thumb_size=None, font=None): | |
216 | self.common_setup() | |
217 | self.store_unicode_file() | |
218 | self.generate_thumb(thumb_size=thumb_size, font=font) | |
219 | self.copy_original() | |
220 | self.delete_queue_file() | |
221 | ||
222 | ||
698c7a8b RE |
223 | class Resizer(CommonAsciiProcessor): |
224 | """ | |
225 | Resizing process steps for processed media | |
226 | """ | |
227 | name = 'resize' | |
228 | description = 'Resize thumbnail' | |
3225008f | 229 | thumb_size = 'thumb_size' |
698c7a8b RE |
230 | |
231 | @classmethod | |
232 | def media_is_eligible(cls, entry=None, state=None): | |
233 | """ | |
234 | Determine if this media type is eligible for processing | |
235 | """ | |
236 | if not state: | |
237 | state = entry.state | |
238 | return state in 'processed' | |
239 | ||
240 | @classmethod | |
241 | def generate_parser(cls): | |
242 | parser = argparse.ArgumentParser( | |
243 | description=cls.description, | |
244 | prog=cls.name) | |
245 | ||
246 | parser.add_argument( | |
247 | '--thumb_size', | |
248 | nargs=2, | |
249 | metavar=('max_width', 'max_height'), | |
250 | type=int) | |
251 | ||
252 | # Needed for gmg reprocess thumbs to work | |
253 | parser.add_argument( | |
254 | 'file', | |
255 | nargs='?', | |
63021eb6 RE |
256 | default='thumb', |
257 | choices=['thumb']) | |
698c7a8b RE |
258 | |
259 | return parser | |
260 | ||
261 | @classmethod | |
262 | def args_to_request(cls, args): | |
263 | return request_from_args( | |
63021eb6 | 264 | args, ['thumb_size', 'file']) |
698c7a8b RE |
265 | |
266 | def process(self, thumb_size=None, file=None): | |
267 | self.common_setup() | |
268 | self.generate_thumb(thumb_size=thumb_size) | |
269 | ||
270 | ||
35d6a950 RE |
271 | class AsciiProcessingManager(ProcessingManager): |
272 | def __init__(self): | |
1a2982d6 | 273 | super(AsciiProcessingManager, self).__init__() |
35d6a950 | 274 | self.add_processor(InitialProcessor) |
d63f78fa | 275 | self.add_processor(Resizer) |
c62181f4 | 276 | |
25ecdec9 | 277 | def workflow(self, entry, manager, feed_url, reprocess_action, |
278 | reprocess_info=None): | |
c62181f4 | 279 | ProcessMedia().apply_async( |
280 | [entry.id, feed_url, reprocess_action, reprocess_info], {}, | |
281 | task_id=entry.queued_task_id) |