Commit | Line | Data |
---|---|---|
a246ccca | 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
cf29e8a8 | 2 | # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. |
a246ccca JW |
3 | # |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU Affero General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU Affero General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
a246ccca JW |
16 | import chardet |
17 | import os | |
d0e9f843 AL |
18 | try: |
19 | from PIL import Image | |
20 | except ImportError: | |
21 | import Image | |
010d28b4 | 22 | import logging |
a246ccca JW |
23 | |
24 | from mediagoblin import mg_globals as mgg | |
c56d4b55 | 25 | from mediagoblin.processing import create_pub_filepath |
c2dfe1dd | 26 | from mediagoblin.media_types.ascii import asciitoimage |
a246ccca | 27 | |
010d28b4 | 28 | _log = logging.getLogger(__name__) |
a246ccca | 29 | |
10085b77 | 30 | SUPPORTED_EXTENSIONS = ['txt', 'asc', 'nfo'] |
22930812 | 31 | MEDIA_TYPE = 'mediagoblin.media_types.ascii' |
10085b77 | 32 | |
c56d4b55 | 33 | |
ec4261a4 | 34 | def sniff_handler(media_file, **kw): |
22930812 | 35 | _log.info('Sniffing {0}'.format(MEDIA_TYPE)) |
9743ce88 | 36 | if kw.get('media') is not None: |
10085b77 JW |
37 | name, ext = os.path.splitext(kw['media'].filename) |
38 | clean_ext = ext[1:].lower() | |
39 | ||
40 | if clean_ext in SUPPORTED_EXTENSIONS: | |
22930812 | 41 | return MEDIA_TYPE |
10085b77 | 42 | |
22930812 | 43 | return None |
ec4261a4 | 44 | |
c56d4b55 | 45 | |
fb46fa66 | 46 | def process_ascii(proc_state): |
45ab3e07 SS |
47 | """Code to process a txt file. Will be run by celery. |
48 | ||
49 | A Workbench() represents a local tempory dir. It is automatically | |
d0e9f843 | 50 | cleaned up when this function exits. |
45ab3e07 | 51 | """ |
fb46fa66 E |
52 | entry = proc_state.entry |
53 | workbench = proc_state.workbench | |
196a5181 | 54 | ascii_config = mgg.global_config['media_type:mediagoblin.media_types.ascii'] |
a246ccca JW |
55 | # Conversions subdirectory to avoid collisions |
56 | conversions_subdir = os.path.join( | |
57 | workbench.dir, 'conversions') | |
58 | os.mkdir(conversions_subdir) | |
59 | ||
6b45ec1b | 60 | queued_filepath = entry.queued_media_file |
a246ccca JW |
61 | queued_filename = workbench.localized_file( |
62 | mgg.queue_store, queued_filepath, | |
63 | 'source') | |
64 | ||
65 | queued_file = file(queued_filename, 'rb') | |
66 | ||
67 | with queued_file: | |
68 | queued_file_charset = chardet.detect(queued_file.read()) | |
69 | ||
010d28b4 JW |
70 | # Only select a non-utf-8 charset if chardet is *really* sure |
71 | # Tested with "Feli\x0109an superjaron", which was detecte | |
72 | if queued_file_charset['confidence'] < 0.9: | |
73 | interpreted_charset = 'utf-8' | |
74 | else: | |
75 | interpreted_charset = queued_file_charset['encoding'] | |
76 | ||
77 | _log.info('Charset detected: {0}\nWill interpret as: {1}'.format( | |
78 | queued_file_charset, | |
79 | interpreted_charset)) | |
80 | ||
a246ccca JW |
81 | queued_file.seek(0) # Rewind the queued file |
82 | ||
83 | thumb_filepath = create_pub_filepath( | |
84 | entry, 'thumbnail.png') | |
85 | ||
86 | tmp_thumb_filename = os.path.join( | |
87 | conversions_subdir, thumb_filepath[-1]) | |
88 | ||
196a5181 JW |
89 | ascii_converter_args = {} |
90 | ||
91 | if ascii_config['thumbnail_font']: | |
92 | ascii_converter_args.update( | |
93 | {'font': ascii_config['thumbnail_font']}) | |
94 | ||
95 | converter = asciitoimage.AsciiToImage( | |
96 | **ascii_converter_args) | |
a246ccca JW |
97 | |
98 | thumb = converter._create_image( | |
99 | queued_file.read()) | |
100 | ||
101 | with file(tmp_thumb_filename, 'w') as thumb_file: | |
c56d4b55 JW |
102 | thumb.thumbnail( |
103 | (mgg.global_config['media:thumb']['max_width'], | |
104 | mgg.global_config['media:thumb']['max_height']), | |
105 | Image.ANTIALIAS) | |
a246ccca JW |
106 | thumb.save(thumb_file) |
107 | ||
64da09e8 | 108 | _log.debug('Copying local file to public storage') |
a246ccca JW |
109 | mgg.public_store.copy_local_to_storage( |
110 | tmp_thumb_filename, thumb_filepath) | |
111 | ||
112 | queued_file.seek(0) | |
113 | ||
114 | original_filepath = create_pub_filepath(entry, queued_filepath[-1]) | |
115 | ||
116 | with mgg.public_store.get_file(original_filepath, 'wb') \ | |
117 | as original_file: | |
118 | original_file.write(queued_file.read()) | |
119 | ||
a246ccca JW |
120 | queued_file.seek(0) # Rewind *again* |
121 | ||
010d28b4 | 122 | unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt') |
a246ccca JW |
123 | |
124 | with mgg.public_store.get_file(unicode_filepath, 'wb') \ | |
125 | as unicode_file: | |
010d28b4 JW |
126 | # Decode the original file from its detected charset (or UTF8) |
127 | # Encode the unicode instance to ASCII and replace any non-ASCII | |
128 | # with an HTML entity (&# | |
a246ccca | 129 | unicode_file.write( |
010d28b4 JW |
130 | unicode(queued_file.read().decode( |
131 | interpreted_charset)).encode( | |
a246ccca JW |
132 | 'ascii', |
133 | 'xmlcharrefreplace')) | |
134 | ||
36ae6bcb SS |
135 | # Remove queued media file from storage and database. |
136 | # queued_filepath is in the task_id directory which should | |
137 | # be removed too, but fail if the directory is not empty to be on | |
138 | # the super-safe side. | |
139 | mgg.queue_store.delete_file(queued_filepath) # rm file | |
140 | mgg.queue_store.delete_dir(queued_filepath[:-1]) # rm dir | |
6b45ec1b | 141 | entry.queued_media_file = [] |
36ae6bcb | 142 | |
a246ccca JW |
143 | media_files_dict = entry.setdefault('media_files', {}) |
144 | media_files_dict['thumb'] = thumb_filepath | |
145 | media_files_dict['unicode'] = unicode_filepath | |
146 | media_files_dict['original'] = original_filepath | |
147 | ||
148 | entry.save() |