Commit | Line | Data |
---|---|---|
a246ccca | 1 | # GNU MediaGoblin -- federated, autonomous media hosting |
cf29e8a8 | 2 | # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS. |
a246ccca JW |
3 | # |
4 | # This program is free software: you can redistribute it and/or modify | |
5 | # it under the terms of the GNU Affero General Public License as published by | |
6 | # the Free Software Foundation, either version 3 of the License, or | |
7 | # (at your option) any later version. | |
8 | # | |
9 | # This program is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | # GNU Affero General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Affero General Public License | |
15 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
a246ccca JW |
16 | import chardet |
17 | import os | |
18 | import Image | |
010d28b4 | 19 | import logging |
a246ccca JW |
20 | |
21 | from mediagoblin import mg_globals as mgg | |
22 | from mediagoblin.processing import create_pub_filepath, THUMB_SIZE | |
c2dfe1dd | 23 | from mediagoblin.media_types.ascii import asciitoimage |
a246ccca | 24 | |
010d28b4 | 25 | _log = logging.getLogger(__name__) |
a246ccca | 26 | |
ec4261a4 JW |
27 | def sniff_handler(media_file, **kw): |
28 | return False | |
29 | ||
a246ccca JW |
30 | def process_ascii(entry): |
31 | ''' | |
32 | Code to process a txt file | |
33 | ''' | |
34 | workbench = mgg.workbench_manager.create_workbench() | |
35 | # Conversions subdirectory to avoid collisions | |
36 | conversions_subdir = os.path.join( | |
37 | workbench.dir, 'conversions') | |
38 | os.mkdir(conversions_subdir) | |
39 | ||
40 | queued_filepath = entry['queued_media_file'] | |
41 | queued_filename = workbench.localized_file( | |
42 | mgg.queue_store, queued_filepath, | |
43 | 'source') | |
44 | ||
45 | queued_file = file(queued_filename, 'rb') | |
46 | ||
47 | with queued_file: | |
48 | queued_file_charset = chardet.detect(queued_file.read()) | |
49 | ||
010d28b4 JW |
50 | # Only select a non-utf-8 charset if chardet is *really* sure |
51 | # Tested with "Feli\x0109an superjaron", which was detecte | |
52 | if queued_file_charset['confidence'] < 0.9: | |
53 | interpreted_charset = 'utf-8' | |
54 | else: | |
55 | interpreted_charset = queued_file_charset['encoding'] | |
56 | ||
57 | _log.info('Charset detected: {0}\nWill interpret as: {1}'.format( | |
58 | queued_file_charset, | |
59 | interpreted_charset)) | |
60 | ||
a246ccca JW |
61 | queued_file.seek(0) # Rewind the queued file |
62 | ||
63 | thumb_filepath = create_pub_filepath( | |
64 | entry, 'thumbnail.png') | |
65 | ||
66 | tmp_thumb_filename = os.path.join( | |
67 | conversions_subdir, thumb_filepath[-1]) | |
68 | ||
69 | converter = asciitoimage.AsciiToImage() | |
70 | ||
71 | thumb = converter._create_image( | |
72 | queued_file.read()) | |
73 | ||
74 | with file(tmp_thumb_filename, 'w') as thumb_file: | |
75 | thumb.thumbnail(THUMB_SIZE, Image.ANTIALIAS) | |
76 | thumb.save(thumb_file) | |
77 | ||
64da09e8 | 78 | _log.debug('Copying local file to public storage') |
a246ccca JW |
79 | mgg.public_store.copy_local_to_storage( |
80 | tmp_thumb_filename, thumb_filepath) | |
81 | ||
82 | queued_file.seek(0) | |
83 | ||
84 | original_filepath = create_pub_filepath(entry, queued_filepath[-1]) | |
85 | ||
86 | with mgg.public_store.get_file(original_filepath, 'wb') \ | |
87 | as original_file: | |
88 | original_file.write(queued_file.read()) | |
89 | ||
90 | ||
91 | queued_file.seek(0) # Rewind *again* | |
92 | ||
010d28b4 | 93 | unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt') |
a246ccca JW |
94 | |
95 | with mgg.public_store.get_file(unicode_filepath, 'wb') \ | |
96 | as unicode_file: | |
010d28b4 JW |
97 | # Decode the original file from its detected charset (or UTF8) |
98 | # Encode the unicode instance to ASCII and replace any non-ASCII | |
99 | # with an HTML entity (&# | |
a246ccca | 100 | unicode_file.write( |
010d28b4 JW |
101 | unicode(queued_file.read().decode( |
102 | interpreted_charset)).encode( | |
a246ccca JW |
103 | 'ascii', |
104 | 'xmlcharrefreplace')) | |
105 | ||
106 | mgg.queue_store.delete_file(queued_filepath) | |
107 | entry['queued_media_file'] = [] | |
108 | media_files_dict = entry.setdefault('media_files', {}) | |
109 | media_files_dict['thumb'] = thumb_filepath | |
110 | media_files_dict['unicode'] = unicode_filepath | |
111 | media_files_dict['original'] = original_filepath | |
112 | ||
113 | entry.save() |