From: Marek Marecki <marekjm@taistelu.com>
Date: Fri, 23 Aug 2013 08:48:33 +0000 (+0200)
Subject: Many changes in post model and in generic stream
X-Git-Url: https://vcs.fsf.org/?a=commitdiff_plain;h=fe783229cc4d4ee394f3eeb044d93a1f09051bf8;p=diaspy.git

Many changes in post model and in generic stream
---

diff --git a/Changelog.markdown b/Changelog.markdown
index 790cc0c..aa3af83 100644
--- a/Changelog.markdown
+++ b/Changelog.markdown
@@ -24,11 +24,17 @@ up-to-date than manual and if conflicts appear they should follow the order:
 #### Version `0.4.1` (2013-08-):
 
 * __new__:  `__getitem__()` in `diaspy.models.Post`,
+* __new__:  `__dict__()` in `diaspy.models.Post`,
+* __new__:  `guid` argument in `diaspy.models.Post.__init__()`,
 * __new__:  `json()` method in `diaspy.streams.Generic` adds the possibility to export streams to JSON,
 * __new__:  `full()` method in `diaspy.streams.Generic` will try to fetch full stream (containing all posts),
 * __new__:  `setEmail()` method in `diaspy.settings.Settings`,
 * __new__:  `setLanguage()` method in `diaspy.settings.Settings`,
 * __new__:  `downloadPhotos()` method in `diaspy.settings.Settings`,
+* __new__:  `backtime` argument in `more()` method in `diaspy.streams.Generic`,
+
+* __upd__:  if `Post()` is created with fetched comments, data will also be fetched as a dependency,
+* __upd__:  `id` argument type is now `int` (`diaspy.models.Post.__init__()`),
 
 * __fix__:  fixed some bugs in regular expressions used by `diaspy` internals
             (html tag removal, so you get nicer notifications),
diff --git a/diaspy/__init__.py b/diaspy/__init__.py
index 7e12ab6..30380c7 100644
--- a/diaspy/__init__.py
+++ b/diaspy/__init__.py
@@ -7,4 +7,4 @@ import diaspy.notifications as notifications
 import diaspy.settings as settings
 
 
-__version__ = '0.4.0.1'
+__version__ = '0.4.1'
diff --git a/diaspy/models.py b/diaspy/models.py
index a832b0d..bcf7dae 100644
--- a/diaspy/models.py
+++ b/diaspy/models.py
@@ -315,23 +315,29 @@ class Post():
     .. note::
         Remember that you need to have access to the post.
     """
-    def __init__(self, connection, id, fetch=True, comments=True):
+    def __init__(self, connection, id=0, guid='', fetch=True, comments=True):
         """
-        :param id: id or guid of the post
-        :type id: str
+        :param id: id of the post (GUID is recommended)
+        :type id: int
+        :param guid: GUID of the post
+        :type guid: str
         :param connection: connection object used to authenticate
         :type connection: connection.Connection
         :param fetch: defines whether to fetch post's data or not
         :type fetch: bool
-        :param comments: defines whether to fetch post's comments or not
+        :param comments: defines whether to fetch post's comments or not (if True also data will be fetched)
         :type comments: bool
         """
+        if not (guid or id): raise TypeError('guid and/or id missing')
         self._connection = connection
         self.id = id
+        self.guid = guid
         self.data = {}
         self.comments = []
         if fetch: self._fetchdata()
-        if comments: self._fetchcomments()
+        if comments:
+            if not self.data: self._fetchdata()
+            self._fetchcomments()
 
     def __repr__(self):
         """Returns string containing more information then str().
@@ -346,23 +352,36 @@ class Post():
     def __getitem__(self, key):
         return self.data[key]
 
+    def __dict__(self):
+        """Returns dictionary of posts data.
+        """
+        return self.data
+
     def _fetchdata(self):
         """This function retrieves data of the post.
+
+        :returns: guid of post whose data was fetched
         """
-        request = self._connection.get('posts/{0}.json'.format(self.id))
+        if self.id: id = self.id
+        if self.guid: id = self.guid
+        request = self._connection.get('posts/{0}.json'.format(id))
         if request.status_code != 200:
-            raise errors.PostError('{0}: could not fetch data for post: {1}'.format(request.status_code, self.id))
+            raise errors.PostError('{0}: could not fetch data for post: {1}'.format(request.status_code, id))
         else:
             self.data = request.json()
+        return self['guid']
 
     def _fetchcomments(self):
-        """Retireves comments for this post.
-        """
-        request = self._connection.get('posts/{0}/comments.json'.format(self.id))
-        if request.status_code != 200:
-            raise errors.PostError('{0}: could not fetch comments for post: {1}'.format(request.status_code, self.id))
-        else:
-            self.comments = [Comment(c) for c in request.json()]
+        """Retreives comments for this post.
+        """
+        if self.id: id = self.id
+        if self.guid: id = self.guid
+        if self['interactions']['comments_count']:
+            request = self._connection.get('posts/{0}/comments.json'.format(id))
+            if request.status_code != 200:
+                raise errors.PostError('{0}: could not fetch comments for post: {1}'.format(request.status_code, id))
+            else:
+                self.comments = [Comment(c) for c in request.json()]
 
     def update(self):
         """Updates post data.
diff --git a/diaspy/settings.py b/diaspy/settings.py
index 245d3b4..2cb9276 100644
--- a/diaspy/settings.py
+++ b/diaspy/settings.py
@@ -24,10 +24,10 @@ class Settings():
         request = self._connection.get('user/export')
         return request.text
 
-    def downloadPhotos(self, size='large', path='.', _critical=False, _stream=None):
+    def downloadPhotos(self, size='large', path='.', mark_nsfw=True, _critical=False, _stream=None):
         """Downloads photos into the current working directory.
         Sizes are: large, medium, small.
-        Filename is: {photo_guid}.{extension}
+        Filename is: {post_guid}_{photo_guid}.{extension}
 
         Normally, this method will catch urllib-generated errors and
         just issue warnings about photos that couldn't be downloaded.
@@ -38,20 +38,25 @@ class Settings():
         :type size: str
         :param path: path to download (defaults to current working directory
         :type path: str
+        :param mark_nsfw: will append '-nsfw' to images from posts marked as nsfw,
+        :type mark_nsfw: bool
         :param _stream: diaspy.streams.Generic-like object (only for testing)
         :param _critical: if True urllib errors will be reraised after generating a warning (may be removed)
 
         :returns: integer, number of photos downloaded
         """
         photos = 0
-        if _stream is not None: stream = _stream
-        else: stream = streams.Activity
-        stream = stream(self._connection)
-        stream.full()
+        if _stream is None:
+            stream = streams.Activity(self._connection)
+            stream.full()
+        else:
+            stream = _stream
         for i, post in enumerate(stream):
+            if post['nsfw'] is not False: nsfw = '-nsfw'
+            else: nsfw = ''
             if post['photos']:
                 for n, photo in enumerate(post['photos']):
-                    name = '{0}.{1}'.format(photo['guid'], photo['sizes'][size].split('.')[-1])
+                    name = '{0}_{1}{2}.{3}'.format(post['guid'], photo['guid'], nsfw, photo['sizes'][size].split('.')[-1])
                     filename = os.path.join(path, name)
                     try:
                         urllib.request.urlretrieve(url=photo['sizes'][size], filename=filename)
diff --git a/diaspy/streams.py b/diaspy/streams.py
index 1d06a50..24ab5ba 100644
--- a/diaspy/streams.py
+++ b/diaspy/streams.py
@@ -57,7 +57,7 @@ class Generic():
         params = {}
         if max_time:
             params['max_time'] = max_time
-            params['_'] = self.max_time
+            params['_'] = int(time.time() * 1000)
         request = self._connection.get(self._location, params=params)
         if request.status_code != 200:
             raise errors.StreamError('wrong status code: {0}'.format(request.status_code))
@@ -116,28 +116,62 @@ class Generic():
         """
         self._stream = self._obtain()
 
-    def more(self, max_time=0):
+    def more(self, max_time=0, backtime=84600):
         """Tries to download more (older ones) Posts from Stream.
 
+        :param backtime: how many seconds substract each time (defaults to one day)
+        :type backtime: int
         :param max_time: seconds since epoch (optional, diaspy'll figure everything on its own)
         :type max_time: int
         """
-        if not max_time: max_time = self.max_time - 3000000
+        if not max_time: max_time = self.max_time - backtime
         self.max_time = max_time
         new_stream = self._obtain(max_time=max_time)
         self._expand(new_stream)
 
-    def full(self):
+    def full(self, backtime=84600, retry=42, callback=None):
         """Fetches full stream - containing all posts.
-        WARNING: this can be a **VERY** time consuming function on slow connections of massive streams.
-
+        WARNING: this is a **VERY** long running function.
+        Use callback parameter to access information about the stream during its
+        run.
+
+        Default backtime is one day. But sometimes user might not have any activity for longer
+        period (on the beginning I posted once a month or so).
+        The role of retry is to hadle such situations by trying to go further back in time.
+        If a post is found the counter is restored.
+
+        :param backtime: how many seconds to substract each time
+        :type backtime: int
+        :param retry: how many times the functin should look deeper than your last post
+        :type retry: int
+        :param callback: callable taking diaspy.streams.Generic as an argument
         :returns: integer, lenght of the stream
         """
         oldstream = self.copy()
         self.more()
-        while len(oldstream) != len(self):
+        while len(oldstream) < len(self):
             oldstream = self.copy()
-            self.more()
+            if callback is not None: callback(self)
+            self.more(backtime=backtime)
+            if len(oldstream) < len(self): continue
+            # but if no posts were found start retrying...
+            print('retrying... {0}'.format(retry))
+            n = retry
+            while n > 0:
+                print('\t', n, self.max_time)
+                # try to get even more posts...
+                self.more(backtime=backtime)
+                print('\t', len(oldstream), len(self))
+                # check if it was a success...
+                if len(oldstream) < len(self):
+                    # and if so restore normal order of execution by
+                    # going one loop higher
+                    break
+                oldstream = self.copy()
+                # if it was not a success substract one day, keep calm and
+                # try going further rback in time...
+                n -= 1
+            #if len(oldstream) == len(self): break
         return len(self)
 
     def copy(self):