ASCII art support - Fixes

author Joar Wandborg <git@wandborg.com>

Thu, 2 Feb 2012 20:28:21 +0000 (21:28 +0100)

committer Joar Wandborg <git@wandborg.com>

Thu, 2 Feb 2012 21:29:40 +0000 (22:29 +0100)
author Joar Wandborg <git@wandborg.com>
Thu, 2 Feb 2012 20:28:21 +0000 (21:28 +0100)
committer Joar Wandborg <git@wandborg.com>
Thu, 2 Feb 2012 21:29:40 +0000 (22:29 +0100)
diff --git a/mediagoblin/media_types/ascii/asciitoimage.py b/mediagoblin/media_types/ascii/asciitoimage.py

index da1a3bccb44d824776e11c810ced0c7a35b76026..186d80660fd3146b6feff2a857ac4233cb968381 100644 (file)
--- a/mediagoblin/media_types/ascii/asciitoimage.py
+++ b/mediagoblin/media_types/ascii/asciitoimage.py
@@ -65,7 +65,8 @@ class AsciiToImage(object):
  
          self._if = ImageFont.truetype(
              self._font,
-            self._font_size)
+            self._font_size,
+            encoding='unic')
  
          #      ,-,-^-'-^'^-^'^-'^-.
          #     ( I am a wall socket )Oo,  ___
@@ -91,6 +92,9 @@ class AsciiToImage(object):
          - Character set detection and decoding,
            http://pypi.python.org/pypi/chardet
          '''
+        # Convert the input from str to unicode
+        text = text.decode('utf-8')
+
          # TODO: Account for alternative line endings
          lines = text.split('\n')
  
@@ -123,7 +127,7 @@ class AsciiToImage(object):
  
                  px_pos = self._px_pos(char_pos)
  
-                _log.debug('Writing character "{0}" at {1} (px pos {2}'.format(
+                _log.debug('Writing character "{0}" at {1} (px pos {2})'.format(
                          char,
                          char_pos,
                          px_pos))
@@ -152,21 +156,3 @@ class AsciiToImage(object):
                  px_pos[index] = char_pos[index] * self._if_dims[index]
  
          return px_pos
-
-
-if __name__ == "__main__":
-    import urllib
-    txt = urllib.urlopen('file:///home/joar/Dropbox/ascii/install-all-the-dependencies.txt')
-
-    _log.setLevel(logging.DEBUG)
-    logging.basicConfig()
-
-    converter = AsciiToImage()
-
-    converter.convert(txt.read(), '/tmp/test.png')
-
-    '''
-    im, x, y, duration = renderImage(h, 10)
-    print "Rendered image in %.5f seconds" % duration
-    im.save('tldr.png', "PNG")
-    '''
diff --git a/mediagoblin/media_types/ascii/processing.py b/mediagoblin/media_types/ascii/processing.py

index ec530df62a234285ac5d0258f5132f556024014c..96dfce807f8c138cfd1a28a8339160b0366ec6bf 100644 (file)
--- a/mediagoblin/media_types/ascii/processing.py
+++ b/mediagoblin/media_types/ascii/processing.py
@@ -17,10 +17,12 @@ import asciitoimage
  import chardet
  import os
  import Image
+import logging
  
  from mediagoblin import mg_globals as mgg
  from mediagoblin.processing import create_pub_filepath, THUMB_SIZE
  
+_log = logging.getLogger(__name__)
  
  def process_ascii(entry):
      '''
@@ -42,6 +44,17 @@ def process_ascii(entry):
      with queued_file:
          queued_file_charset = chardet.detect(queued_file.read())
  
+        # Only select a non-utf-8 charset if chardet is *really* sure
+        # Tested with "Feli\x0109an superjaron", which was detecte
+        if queued_file_charset['confidence'] < 0.9:
+            interpreted_charset = 'utf-8'
+        else:
+            interpreted_charset = queued_file_charset['encoding']
+
+        _log.info('Charset detected: {0}\nWill interpret as: {1}'.format(
+                queued_file_charset,
+                interpreted_charset))
+
          queued_file.seek(0)  # Rewind the queued file
  
          thumb_filepath = create_pub_filepath(
@@ -73,13 +86,16 @@ def process_ascii(entry):
  
          queued_file.seek(0)  # Rewind *again*
  
-        unicode_filepath = create_pub_filepath(entry, 'unicode.txt')
+        unicode_filepath = create_pub_filepath(entry, 'ascii-portable.txt')
  
          with mgg.public_store.get_file(unicode_filepath, 'wb') \
                  as unicode_file:
+            # Decode the original file from its detected charset (or UTF8)
+            # Encode the unicode instance to ASCII and replace any non-ASCII
+            # with an HTML entity (&#
              unicode_file.write(
-                    unicode(queued_file.read().decode(
-                        queued_file_charset['encoding'])).encode(
+                unicode(queued_file.read().decode(
+                        interpreted_charset)).encode(
                      'ascii',
                      'xmlcharrefreplace'))
author	Joar Wandborg <git@wandborg.com>
	Thu, 2 Feb 2012 20:28:21 +0000 (21:28 +0100)
committer	Joar Wandborg <git@wandborg.com>
	Thu, 2 Feb 2012 21:29:40 +0000 (22:29 +0100)
mediagoblin/media_types/ascii/asciitoimage.py		patch \| blob \| blame \| history
mediagoblin/media_types/ascii/processing.py		patch \| blob \| blame \| history