Commit | Line | Data |
---|---|---|
a246ccca | 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
cf29e8a8 | 2 | # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. |
a246ccca JW |
3 | # |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU Affero General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU Affero General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
a246ccca JW |
16 | import chardet |
17 | import os | |
18 | import Image | |
010d28b4 | 19 | import logging |
a246ccca JW |
20 | |
21 | from mediagoblin import mg_globals as mgg | |
c56d4b55 | 22 | from mediagoblin.processing import create_pub_filepath |
c2dfe1dd | 23 | from mediagoblin.media_types.ascii import asciitoimage |
a246ccca | 24 | |
010d28b4 | 25 | _log = logging.getLogger(__name__) |
a246ccca | 26 | |
10085b77 JW |
27 | SUPPORTED_EXTENSIONS = ['txt', 'asc', 'nfo'] |
28 | ||
c56d4b55 | 29 | |
ec4261a4 | 30 | def sniff_handler(media_file, **kw): |
9743ce88 | 31 | if kw.get('media') is not None: |
10085b77 JW |
32 | name, ext = os.path.splitext(kw['media'].filename) |
33 | clean_ext = ext[1:].lower() | |
34 | ||
35 | if clean_ext in SUPPORTED_EXTENSIONS: | |
36 | return True | |
37 | ||
ec4261a4 JW |
38 | return False |
39 | ||
c56d4b55 | 40 | |
fb46fa66 | 41 | def process_ascii(proc_state): |
45ab3e07 SS |
42 | """Code to process a txt file. Will be run by celery. |
43 | ||
44 | A Workbench() represents a local tempory dir. It is automatically | |
45 | cleaned up when this function exits. | |
46 | """ | |
fb46fa66 E |
47 | entry = proc_state.entry |
48 | workbench = proc_state.workbench | |
196a5181 | 49 | ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii'] |
a246ccca JW |
50 | # Conversions subdirectory to avoid collisions |
51 | conversions_subdir = os.path.join( | |
52 | workbench.dir, 'conversions') | |
53 | os.mkdir(conversions_subdir) | |
54 | ||
6b45ec1b | 55 | queued_filepath = entry.queued_media_file |
a246ccca JW |
56 | queued_filename = workbench.localized_file( |
57 | mgg.queue_store, queued_filepath, | |
58 | 'source') | |
59 | ||
60 | queued_file = file(queued_filename, 'rb') | |
61 | ||
62 | with queued_file: | |
63 | queued_file_charset = chardet.detect(queued_file.read()) | |
64 | ||
010d28b4 JW |
65 | # Only select a non-utf-8 charset if chardet is *really* sure |
66 | # Tested with "Feli\x0109an superjaron", which was detecte | |
67 | if queued_file_charset['confidence'] < 0.9: | |
68 | interpreted_charset = 'utf-8' | |
69 | else: | |
70 | interpreted_charset = queued_file_charset['encoding'] | |
71 | ||
72 | _log.info('Charset detected: {0}\nWill interpret as: {1}'.format( | |
73 | queued_file_charset, | |
74 | interpreted_charset)) | |
75 | ||
a246ccca JW |
76 | queued_file.seek(0) # Rewind the queued file |
77 | ||
78 | thumb_filepath = create_pub_filepath( | |
79 | entry, 'thumbnail.png') | |
80 | ||
81 | tmp_thumb_filename = os.path.join( | |
82 | conversions_subdir, thumb_filepath[-1]) | |
83 | ||
196a5181 JW |
84 | ascii_converter_args = {} |
85 | ||
86 | if ascii_config['thumbnail_font']: | |
87 | ascii_converter_args.update( | |
88 | {'font': ascii_config['thumbnail_font']}) | |
89 | ||
90 | converter = asciitoimage.AsciiToImage( | |
91 | **ascii_converter_args) | |
a246ccca JW |
92 | |
93 | thumb = converter._create_image( | |
94 | queued_file.read()) | |
95 | ||
96 | with file(tmp_thumb_filename, 'w') as thumb_file: | |
c56d4b55 JW |
97 | thumb.thumbnail( |
98 | (mgg.global_config['media:thumb']['max_width'], | |
99 | mgg.global_config['media:thumb']['max_height']), | |
100 | Image.ANTIALIAS) | |
a246ccca JW |
101 | thumb.save(thumb_file) |
102 | ||
64da09e8 | 103 | _log.debug('Copying local file to public storage') |
a246ccca JW |
104 | mgg.public_store.copy_local_to_storage( |
105 | tmp_thumb_filename, thumb_filepath) | |
106 | ||
107 | queued_file.seek(0) | |
108 | ||
109 | original_filepath = create_pub_filepath(entry, queued_filepath[-1]) | |
110 | ||
111 | with mgg.public_store.get_file(original_filepath, 'wb') \ | |
112 | as original_file: | |
113 | original_file.write(queued_file.read()) | |
114 | ||
a246ccca JW |
115 | queued_file.seek(0) # Rewind *again* |
116 | ||
010d28b4 | 117 | unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt') |
a246ccca JW |
118 | |
119 | with mgg.public_store.get_file(unicode_filepath, 'wb') \ | |
120 | as unicode_file: | |
010d28b4 JW |
121 | # Decode the original file from its detected charset (or UTF8) |
122 | # Encode the unicode instance to ASCII and replace any non-ASCII | |
123 | # with an HTML entity (&# | |
a246ccca | 124 | unicode_file.write( |
010d28b4 JW |
125 | unicode(queued_file.read().decode( |
126 | interpreted_charset)).encode( | |
a246ccca JW |
127 | 'ascii', |
128 | 'xmlcharrefreplace')) | |
129 | ||
130 | mgg.queue_store.delete_file(queued_filepath) | |
6b45ec1b | 131 | entry.queued_media_file = [] |
a246ccca JW |
132 | media_files_dict = entry.setdefault('media_files', {}) |
133 | media_files_dict['thumb'] = thumb_filepath | |
134 | media_files_dict['unicode'] = unicode_filepath | |
135 | media_files_dict['original'] = original_filepath | |
136 | ||
137 | entry.save() |