From b2cde062c247a38dd71ae3cc732ad8acef2444b5 Mon Sep 17 00:00:00 2001 From: Benjamin Rubin Date: Sat, 19 Jul 2014 20:00:58 -0400 Subject: [PATCH] Unescape HTML entities in tweets and messages --- rainbowstream/draw.py | 11 ++++++++--- rainbowstream/py3patch.py | 7 +++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/rainbowstream/draw.py b/rainbowstream/draw.py index 96d2657..66c2483 100644 --- a/rainbowstream/draw.py +++ b/rainbowstream/draw.py @@ -20,6 +20,11 @@ db = RainbowDB() g = {} +def unescape(s): + p = HTMLParser() + return p.unescape(s) + + def init_cycle(): """ Init the cycle @@ -147,7 +152,7 @@ def draw(t, iot=False, keyword=None, check_semaphore=False, fil=[], ig=[]): check_theme() # Retrieve tweet tid = t['id'] - text = t['text'] + text = unescape(t['text']) screen_name = t['user']['screen_name'] name = t['user']['name'] created_at = t['created_at'] @@ -267,7 +272,7 @@ def print_message(m): """ sender_screen_name = '@' + m['sender_screen_name'] sender_name = m['sender']['name'] - text = m['text'] + text = unescape(m['text']) recipient_screen_name = '@' + m['recipient_screen_name'] recipient_name = m['recipient']['name'] mid = m['id'] @@ -453,4 +458,4 @@ def print_list(group): # Start the color cycle -start_cycle() \ No newline at end of file +start_cycle() diff --git a/rainbowstream/py3patch.py b/rainbowstream/py3patch.py index 05fe7ff..6042984 100644 --- a/rainbowstream/py3patch.py +++ b/rainbowstream/py3patch.py @@ -9,6 +9,13 @@ try: except: from io import StringIO, BytesIO +# HTMLParser module + +try: + from HTMLParser import HTMLParser +except: + from html.parser import HTMLParser + # raw_input and map functiion behaviour if sys.version[0] == "3": raw_input = input -- 2.25.1