Merge branch 'persona_resquash'
[mediagoblin.git] / mediagoblin / media_types / ascii / processing.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 import chardet
17 import os
18 try:
19 from PIL import Image
20 except ImportError:
21 import Image
22 import logging
23
24 from mediagoblin import mg_globals as mgg
25 from mediagoblin.processing import create_pub_filepath
26 from mediagoblin.media_types.ascii import asciitoimage
27
28 _log = logging.getLogger(__name__)
29
30 SUPPORTED_EXTENSIONS = ['txt', 'asc', 'nfo']
31 MEDIA_TYPE = 'mediagoblin.media_types.ascii'
32
33
34 def sniff_handler(media_file, **kw):
35 _log.info('Sniffing {0}'.format(MEDIA_TYPE))
36 if kw.get('media') is not None:
37 name, ext = os.path.splitext(kw['media'].filename)
38 clean_ext = ext[1:].lower()
39
40 if clean_ext in SUPPORTED_EXTENSIONS:
41 return MEDIA_TYPE
42
43 return None
44
45
46 def process_ascii(proc_state):
47 """Code to process a txt file. Will be run by celery.
48
49 A Workbench() represents a local tempory dir. It is automatically
50 cleaned up when this function exits.
51 """
52 entry = proc_state.entry
53 workbench = proc_state.workbench
54 ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii']
55 # Conversions subdirectory to avoid collisions
56 conversions_subdir = os.path.join(
57 workbench.dir, 'conversions')
58 os.mkdir(conversions_subdir)
59
60 queued_filepath = entry.queued_media_file
61 queued_filename = workbench.localized_file(
62 mgg.queue_store, queued_filepath,
63 'source')
64
65 queued_file = file(queued_filename, 'rb')
66
67 with queued_file:
68 queued_file_charset = chardet.detect(queued_file.read())
69
70 # Only select a non-utf-8 charset if chardet is *really* sure
71 # Tested with "Feli\x0109an superjaron", which was detecte
72 if queued_file_charset['confidence'] < 0.9:
73 interpreted_charset = 'utf-8'
74 else:
75 interpreted_charset = queued_file_charset['encoding']
76
77 _log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
78 queued_file_charset,
79 interpreted_charset))
80
81 queued_file.seek(0) # Rewind the queued file
82
83 thumb_filepath = create_pub_filepath(
84 entry, 'thumbnail.png')
85
86 tmp_thumb_filename = os.path.join(
87 conversions_subdir, thumb_filepath[-1])
88
89 ascii_converter_args = {}
90
91 if ascii_config['thumbnail_font']:
92 ascii_converter_args.update(
93 {'font': ascii_config['thumbnail_font']})
94
95 converter = asciitoimage.AsciiToImage(
96 **ascii_converter_args)
97
98 thumb = converter._create_image(
99 queued_file.read())
100
101 with file(tmp_thumb_filename, 'w') as thumb_file:
102 thumb.thumbnail(
103 (mgg.global_config['media:thumb']['max_width'],
104 mgg.global_config['media:thumb']['max_height']),
105 Image.ANTIALIAS)
106 thumb.save(thumb_file)
107
108 _log.debug('Copying local file to public storage')
109 mgg.public_store.copy_local_to_storage(
110 tmp_thumb_filename, thumb_filepath)
111
112 queued_file.seek(0)
113
114 original_filepath = create_pub_filepath(entry, queued_filepath[-1])
115
116 with mgg.public_store.get_file(original_filepath, 'wb') \
117 as original_file:
118 original_file.write(queued_file.read())
119
120 queued_file.seek(0) # Rewind *again*
121
122 unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt')
123
124 with mgg.public_store.get_file(unicode_filepath, 'wb') \
125 as unicode_file:
126 # Decode the original file from its detected charset (or UTF8)
127 # Encode the unicode instance to ASCII and replace any non-ASCII
128 # with an HTML entity (&#
129 unicode_file.write(
130 unicode(queued_file.read().decode(
131 interpreted_charset)).encode(
132 'ascii',
133 'xmlcharrefreplace'))
134
135 # Remove queued media file from storage and database.
136 # queued_filepath is in the task_id directory which should
137 # be removed too, but fail if the directory is not empty to be on
138 # the super-safe side.
139 mgg.queue_store.delete_file(queued_filepath) # rm file
140 mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir
141 entry.queued_media_file = []
142
143 media_files_dict = entry.setdefault('media_files', {})
144 media_files_dict['thumb'] = thumb_filepath
145 media_files_dict['unicode'] = unicode_filepath
146 media_files_dict['original'] = original_filepath
147
148 entry.save()