Many changes in post model and in generic stream
authorMarek Marecki <marekjm@taistelu.com>
Fri, 23 Aug 2013 08:48:33 +0000 (10:48 +0200)
committerMarek Marecki <marekjm@taistelu.com>
Fri, 23 Aug 2013 08:48:33 +0000 (10:48 +0200)
Changelog.markdown
diaspy/__init__.py
diaspy/models.py
diaspy/settings.py
diaspy/streams.py

index 790cc0c9b9d0c950b53ff0547b21b6604405b217..aa3af8300ee18678370b6c03fa989e1f41e9409c 100644 (file)
@@ -24,11 +24,17 @@ up-to-date than manual and if conflicts appear they should follow the order:
 #### Version `0.4.1` (2013-08-):
 
 * __new__:  `__getitem__()` in `diaspy.models.Post`,
+* __new__:  `__dict__()` in `diaspy.models.Post`,
+* __new__:  `guid` argument in `diaspy.models.Post.__init__()`,
 * __new__:  `json()` method in `diaspy.streams.Generic` adds the possibility to export streams to JSON,
 * __new__:  `full()` method in `diaspy.streams.Generic` will try to fetch full stream (containing all posts),
 * __new__:  `setEmail()` method in `diaspy.settings.Settings`,
 * __new__:  `setLanguage()` method in `diaspy.settings.Settings`,
 * __new__:  `downloadPhotos()` method in `diaspy.settings.Settings`,
+* __new__:  `backtime` argument in `more()` method in `diaspy.streams.Generic`,
+
+* __upd__:  if `Post()` is created with fetched comments, data will also be fetched as a dependency,
+* __upd__:  `id` argument type is now `int` (`diaspy.models.Post.__init__()`),
 
 * __fix__:  fixed some bugs in regular expressions used by `diaspy` internals
             (html tag removal, so you get nicer notifications),
index 7e12ab610dcd0c046d5722efa57471ccf188f759..30380c71f8096bec3434f0c998cd08c9c5c62393 100644 (file)
@@ -7,4 +7,4 @@ import diaspy.notifications as notifications
 import diaspy.settings as settings
 
 
-__version__ = '0.4.0.1'
+__version__ = '0.4.1'
index a832b0d4ae3af62a414679541c60b08b696da526..bcf7daeb6b461bb5ad2469a69d438798ee28e86e 100644 (file)
@@ -315,23 +315,29 @@ class Post():
     .. note::
         Remember that you need to have access to the post.
     """
-    def __init__(self, connection, id, fetch=True, comments=True):
+    def __init__(self, connection, id=0, guid='', fetch=True, comments=True):
         """
-        :param id: id or guid of the post
-        :type id: str
+        :param id: id of the post (GUID is recommended)
+        :type id: int
+        :param guid: GUID of the post
+        :type guid: str
         :param connection: connection object used to authenticate
         :type connection: connection.Connection
         :param fetch: defines whether to fetch post's data or not
         :type fetch: bool
-        :param comments: defines whether to fetch post's comments or not
+        :param comments: defines whether to fetch post's comments or not (if True also data will be fetched)
         :type comments: bool
         """
+        if not (guid or id): raise TypeError('guid and/or id missing')
         self._connection = connection
         self.id = id
+        self.guid = guid
         self.data = {}
         self.comments = []
         if fetch: self._fetchdata()
-        if comments: self._fetchcomments()
+        if comments:
+            if not self.data: self._fetchdata()
+            self._fetchcomments()
 
     def __repr__(self):
         """Returns string containing more information then str().
@@ -346,23 +352,36 @@ class Post():
     def __getitem__(self, key):
         return self.data[key]
 
+    def __dict__(self):
+        """Returns dictionary of posts data.
+        """
+        return self.data
+
     def _fetchdata(self):
         """This function retrieves data of the post.
+
+        :returns: guid of post whose data was fetched
         """
-        request = self._connection.get('posts/{0}.json'.format(self.id))
+        if self.id: id = self.id
+        if self.guid: id = self.guid
+        request = self._connection.get('posts/{0}.json'.format(id))
         if request.status_code != 200:
-            raise errors.PostError('{0}: could not fetch data for post: {1}'.format(request.status_code, self.id))
+            raise errors.PostError('{0}: could not fetch data for post: {1}'.format(request.status_code, id))
         else:
             self.data = request.json()
+        return self['guid']
 
     def _fetchcomments(self):
-        """Retireves comments for this post.
-        """
-        request = self._connection.get('posts/{0}/comments.json'.format(self.id))
-        if request.status_code != 200:
-            raise errors.PostError('{0}: could not fetch comments for post: {1}'.format(request.status_code, self.id))
-        else:
-            self.comments = [Comment(c) for c in request.json()]
+        """Retreives comments for this post.
+        """
+        if self.id: id = self.id
+        if self.guid: id = self.guid
+        if self['interactions']['comments_count']:
+            request = self._connection.get('posts/{0}/comments.json'.format(id))
+            if request.status_code != 200:
+                raise errors.PostError('{0}: could not fetch comments for post: {1}'.format(request.status_code, id))
+            else:
+                self.comments = [Comment(c) for c in request.json()]
 
     def update(self):
         """Updates post data.
index 245d3b4879b73484512d201ff7e8a7a23ad7fc76..2cb9276247fb0411d31315c29156d74935967614 100644 (file)
@@ -24,10 +24,10 @@ class Settings():
         request = self._connection.get('user/export')
         return request.text
 
-    def downloadPhotos(self, size='large', path='.', _critical=False, _stream=None):
+    def downloadPhotos(self, size='large', path='.', mark_nsfw=True, _critical=False, _stream=None):
         """Downloads photos into the current working directory.
         Sizes are: large, medium, small.
-        Filename is: {photo_guid}.{extension}
+        Filename is: {post_guid}_{photo_guid}.{extension}
 
         Normally, this method will catch urllib-generated errors and
         just issue warnings about photos that couldn't be downloaded.
@@ -38,20 +38,25 @@ class Settings():
         :type size: str
         :param path: path to download (defaults to current working directory
         :type path: str
+        :param mark_nsfw: will append '-nsfw' to images from posts marked as nsfw,
+        :type mark_nsfw: bool
         :param _stream: diaspy.streams.Generic-like object (only for testing)
         :param _critical: if True urllib errors will be reraised after generating a warning (may be removed)
 
         :returns: integer, number of photos downloaded
         """
         photos = 0
-        if _stream is not None: stream = _stream
-        else: stream = streams.Activity
-        stream = stream(self._connection)
-        stream.full()
+        if _stream is None:
+            stream = streams.Activity(self._connection)
+            stream.full()
+        else:
+            stream = _stream
         for i, post in enumerate(stream):
+            if post['nsfw'] is not False: nsfw = '-nsfw'
+            else: nsfw = ''
             if post['photos']:
                 for n, photo in enumerate(post['photos']):
-                    name = '{0}.{1}'.format(photo['guid'], photo['sizes'][size].split('.')[-1])
+                    name = '{0}_{1}{2}.{3}'.format(post['guid'], photo['guid'], nsfw, photo['sizes'][size].split('.')[-1])
                     filename = os.path.join(path, name)
                     try:
                         urllib.request.urlretrieve(url=photo['sizes'][size], filename=filename)
index 1d06a50a1ea0b5702ef4fd0cf4ebdf4cf56e0dd6..24ab5ba38b790f8a7d09c333e9d31f27617ee017 100644 (file)
@@ -57,7 +57,7 @@ class Generic():
         params = {}
         if max_time:
             params['max_time'] = max_time
-            params['_'] = self.max_time
+            params['_'] = int(time.time() * 1000)
         request = self._connection.get(self._location, params=params)
         if request.status_code != 200:
             raise errors.StreamError('wrong status code: {0}'.format(request.status_code))
@@ -116,28 +116,62 @@ class Generic():
         """
         self._stream = self._obtain()
 
-    def more(self, max_time=0):
+    def more(self, max_time=0, backtime=84600):
         """Tries to download more (older ones) Posts from Stream.
 
+        :param backtime: how many seconds substract each time (defaults to one day)
+        :type backtime: int
         :param max_time: seconds since epoch (optional, diaspy'll figure everything on its own)
         :type max_time: int
         """
-        if not max_time: max_time = self.max_time - 3000000
+        if not max_time: max_time = self.max_time - backtime
         self.max_time = max_time
         new_stream = self._obtain(max_time=max_time)
         self._expand(new_stream)
 
-    def full(self):
+    def full(self, backtime=84600, retry=42, callback=None):
         """Fetches full stream - containing all posts.
-        WARNING: this can be a **VERY** time consuming function on slow connections of massive streams.
-
+        WARNING: this is a **VERY** long running function.
+        Use callback parameter to access information about the stream during its
+        run.
+
+        Default backtime is one day. But sometimes user might not have any activity for longer
+        period (on the beginning I posted once a month or so).
+        The role of retry is to hadle such situations by trying to go further back in time.
+        If a post is found the counter is restored.
+
+        :param backtime: how many seconds to substract each time
+        :type backtime: int
+        :param retry: how many times the functin should look deeper than your last post
+        :type retry: int
+        :param callback: callable taking diaspy.streams.Generic as an argument
         :returns: integer, lenght of the stream
         """
         oldstream = self.copy()
         self.more()
-        while len(oldstream) != len(self):
+        while len(oldstream) < len(self):
             oldstream = self.copy()
-            self.more()
+            if callback is not None: callback(self)
+            self.more(backtime=backtime)
+            if len(oldstream) < len(self): continue
+            # but if no posts were found start retrying...
+            print('retrying... {0}'.format(retry))
+            n = retry
+            while n > 0:
+                print('\t', n, self.max_time)
+                # try to get even more posts...
+                self.more(backtime=backtime)
+                print('\t', len(oldstream), len(self))
+                # check if it was a success...
+                if len(oldstream) < len(self):
+                    # and if so restore normal order of execution by
+                    # going one loop higher
+                    break
+                oldstream = self.copy()
+                # if it was not a success substract one day, keep calm and
+                # try going further rback in time...
+                n -= 1
+            #if len(oldstream) == len(self): break
         return len(self)
 
     def copy(self):