self._if = ImageFont.truetype(
self._font,
- self._font_size)
+ self._font_size,
+ encoding='unic')
# ,-,-^-'-^'^-^'^-'^-.
# ( I am a wall socket )Oo, ___
- Character set detection and decoding,
http://pypi.python.org/pypi/chardet
'''
+ # Convert the input from str to unicode
+ text = text.decode('utf-8')
+
# TODO: Account for alternative line endings
lines = text.split('\n')
px_pos = self._px_pos(char_pos)
- _log.debug('Writing character "{0}" at {1} (px pos {2}'.format(
+ _log.debug('Writing character "{0}" at {1} (px pos {2})'.format(
char,
char_pos,
px_pos))
px_pos[index] = char_pos[index] * self._if_dims[index]
return px_pos
-
-
-if __name__ == "__main__":
- import urllib
- txt = urllib.urlopen('file:///home/joar/Dropbox/ascii/install-all-the-dependencies.txt')
-
- _log.setLevel(logging.DEBUG)
- logging.basicConfig()
-
- converter = AsciiToImage()
-
- converter.convert(txt.read(), '/tmp/test.png')
-
- '''
- im, x, y, duration = renderImage(h, 10)
- print "Rendered image in %.5f seconds" % duration
- im.save('tldr.png', "PNG")
- '''
import chardet
import os
import Image
+import logging
from mediagoblin import mg_globals as mgg
from mediagoblin.processing import create_pub_filepath, THUMB_SIZE
+_log = logging.getLogger(__name__)
def process_ascii(entry):
'''
with queued_file:
queued_file_charset = chardet.detect(queued_file.read())
+ # Only select a non-utf-8 charset if chardet is *really* sure
+ # Tested with "Feli\x0109an superjaron", which was detecte
+ if queued_file_charset['confidence'] < 0.9:
+ interpreted_charset = 'utf-8'
+ else:
+ interpreted_charset = queued_file_charset['encoding']
+
+ _log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
+ queued_file_charset,
+ interpreted_charset))
+
queued_file.seek(0) # Rewind the queued file
thumb_filepath = create_pub_filepath(
queued_file.seek(0) # Rewind *again*
- unicode_filepath = create_pub_filepath(entry, 'unicode.txt')
+ unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt')
with mgg.public_store.get_file(unicode_filepath, 'wb') \
as unicode_file:
+ # Decode the original file from its detected charset (or UTF8)
+ # Encode the unicode instance to ASCII and replace any non-ASCII
+ # with an HTML entity (&#
unicode_file.write(
- unicode(queued_file.read().decode(
- queued_file_charset['encoding'])).encode(
+ unicode(queued_file.read().decode(
+ interpreted_charset)).encode(
'ascii',
'xmlcharrefreplace'))