fix: don't decode stream bytes until json.decode
authorGeoffrey Chan <gc@qsensei.com>
Sun, 10 May 2015 00:53:07 +0000 (20:53 -0400)
committerGeoffrey Chan <gc@qsensei.com>
Sun, 10 May 2015 01:28:47 +0000 (21:28 -0400)
tests/test_streaming.py
tweepy/streaming.py

index a2c0f2acdb7d5d94721091d63a38de689519791d..8b7abf87fa6791a65e8d16c5169237f85b9fb46d 100644 (file)
@@ -174,6 +174,16 @@ class TweepyStreamReadBuffer(unittest.TestCase):
         # The mocked function not have been called at all since the stream looks closed
         self.assertEqual(mock_read.call_count, 0)
 
+    def test_read_unicode_tweet(self):
+        stream = '11\n{id:12345}\n\n23\n{id:23456, test:"\xe3\x81\x93"}\n\n'
+        for length in [1, 2, 5, 10, 20, 50]:
+            buf = ReadBuffer(six.StringIO(stream), length)
+            self.assertEqual('11\n', buf.read_line())
+            self.assertEqual('{id:12345}\n', buf.read_len(11))
+            self.assertEqual('\n', buf.read_line())
+            self.assertEqual('23\n', buf.read_line())
+            self.assertEqual('{id:23456, test:"\xe3\x81\x93"}\n', buf.read_len(23))
+
 
 class TweepyStreamBackoffTests(unittest.TestCase):
     def setUp(self):
index 9b246bda820c5f2e5b31abcd3bf8b6570a7b2f93..6ce3e5c827b729a81eeed559c02db1810d685768 100644 (file)
@@ -150,7 +150,7 @@ class ReadBuffer(object):
 
     def __init__(self, stream, chunk_size):
         self._stream = stream
-        self._buffer = u""
+        self._buffer = ''
         self._chunk_size = chunk_size
 
     def read_len(self, length):
@@ -158,7 +158,7 @@ class ReadBuffer(object):
             if len(self._buffer) >= length:
                 return self._pop(length)
             read_len = max(self._chunk_size, length - len(self._buffer))
-            self._buffer += self._stream.read(read_len).decode("ascii")
+            self._buffer += self._stream.read(read_len)
 
     def read_line(self, sep='\n'):
         start = 0
@@ -168,7 +168,7 @@ class ReadBuffer(object):
                 return self._pop(loc + len(sep))
             else:
                 start = len(self._buffer)
-            self._buffer += self._stream.read(self._chunk_size).decode("ascii")
+            self._buffer += self._stream.read(self._chunk_size)
 
     def _pop(self, length):
         r = self._buffer[:length]