Media processing, transcoding, display fixes
[mediagoblin.git] / mediagoblin / media_types / ascii / processing.py
CommitLineData
a246ccca 1# GNU MediaGoblin -- federated, autonomous media hosting
cf29e8a8 2# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
a246ccca
JW
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
a246ccca
JW
16import chardet
17import os
18import Image
010d28b4 19import logging
a246ccca
JW
20
21from mediagoblin import mg_globals as mgg
c56d4b55 22from mediagoblin.processing import create_pub_filepath
c2dfe1dd 23from mediagoblin.media_types.ascii import asciitoimage
a246ccca 24
010d28b4 25_log = logging.getLogger(__name__)
a246ccca 26
10085b77
JW
27SUPPORTED_EXTENSIONS = ['txt', 'asc', 'nfo']
28
c56d4b55 29
ec4261a4 30def sniff_handler(media_file, **kw):
9743ce88 31 if kw.get('media') is not None:
10085b77
JW
32 name, ext = os.path.splitext(kw['media'].filename)
33 clean_ext = ext[1:].lower()
34
35 if clean_ext in SUPPORTED_EXTENSIONS:
36 return True
37
ec4261a4
JW
38 return False
39
c56d4b55 40
a246ccca
JW
41def process_ascii(entry):
42 '''
43 Code to process a txt file
44 '''
196a5181 45 ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii']
a246ccca
JW
46 workbench = mgg.workbench_manager.create_workbench()
47 # Conversions subdirectory to avoid collisions
48 conversions_subdir = os.path.join(
49 workbench.dir, 'conversions')
50 os.mkdir(conversions_subdir)
51
6b45ec1b 52 queued_filepath = entry.queued_media_file
a246ccca
JW
53 queued_filename = workbench.localized_file(
54 mgg.queue_store, queued_filepath,
55 'source')
56
57 queued_file = file(queued_filename, 'rb')
58
59 with queued_file:
60 queued_file_charset = chardet.detect(queued_file.read())
61
010d28b4
JW
62 # Only select a non-utf-8 charset if chardet is *really* sure
63 # Tested with "Feli\x0109an superjaron", which was detecte
64 if queued_file_charset['confidence'] < 0.9:
65 interpreted_charset = 'utf-8'
66 else:
67 interpreted_charset = queued_file_charset['encoding']
68
69 _log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
70 queued_file_charset,
71 interpreted_charset))
72
a246ccca
JW
73 queued_file.seek(0) # Rewind the queued file
74
75 thumb_filepath = create_pub_filepath(
76 entry, 'thumbnail.png')
77
78 tmp_thumb_filename = os.path.join(
79 conversions_subdir, thumb_filepath[-1])
80
196a5181
JW
81 ascii_converter_args = {}
82
83 if ascii_config['thumbnail_font']:
84 ascii_converter_args.update(
85 {'font': ascii_config['thumbnail_font']})
86
87 converter = asciitoimage.AsciiToImage(
88 **ascii_converter_args)
a246ccca
JW
89
90 thumb = converter._create_image(
91 queued_file.read())
92
93 with file(tmp_thumb_filename, 'w') as thumb_file:
c56d4b55
JW
94 thumb.thumbnail(
95 (mgg.global_config['media:thumb']['max_width'],
96 mgg.global_config['media:thumb']['max_height']),
97 Image.ANTIALIAS)
a246ccca
JW
98 thumb.save(thumb_file)
99
64da09e8 100 _log.debug('Copying local file to public storage')
a246ccca
JW
101 mgg.public_store.copy_local_to_storage(
102 tmp_thumb_filename, thumb_filepath)
103
104 queued_file.seek(0)
105
106 original_filepath = create_pub_filepath(entry, queued_filepath[-1])
107
108 with mgg.public_store.get_file(original_filepath, 'wb') \
109 as original_file:
110 original_file.write(queued_file.read())
111
a246ccca
JW
112 queued_file.seek(0) # Rewind *again*
113
010d28b4 114 unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt')
a246ccca
JW
115
116 with mgg.public_store.get_file(unicode_filepath, 'wb') \
117 as unicode_file:
010d28b4
JW
118 # Decode the original file from its detected charset (or UTF8)
119 # Encode the unicode instance to ASCII and replace any non-ASCII
120 # with an HTML entity (&#
a246ccca 121 unicode_file.write(
010d28b4
JW
122 unicode(queued_file.read().decode(
123 interpreted_charset)).encode(
a246ccca
JW
124 'ascii',
125 'xmlcharrefreplace'))
126
127 mgg.queue_store.delete_file(queued_filepath)
6b45ec1b 128 entry.queued_media_file = []
a246ccca
JW
129 media_files_dict = entry.setdefault('media_files', {})
130 media_files_dict['thumb'] = thumb_filepath
131 media_files_dict['unicode'] = unicode_filepath
132 media_files_dict['original'] = original_filepath
133
134 entry.save()