Many changes in post model and in generic stream

author Marek Marecki <marekjm@taistelu.com>

Fri, 23 Aug 2013 08:48:33 +0000 (10:48 +0200)

committer Marek Marecki <marekjm@taistelu.com>

Fri, 23 Aug 2013 08:48:33 +0000 (10:48 +0200)
author Marek Marecki <marekjm@taistelu.com>
Fri, 23 Aug 2013 08:48:33 +0000 (10:48 +0200)
committer Marek Marecki <marekjm@taistelu.com>
Fri, 23 Aug 2013 08:48:33 +0000 (10:48 +0200)
diff --git a/Changelog.markdown b/Changelog.markdown

index 790cc0c9b9d0c950b53ff0547b21b6604405b217..aa3af8300ee18678370b6c03fa989e1f41e9409c 100644 (file)
--- a/Changelog.markdown
+++ b/Changelog.markdown
@@ -24,11 +24,17 @@ up-to-date than manual and if conflicts appear they should follow the order:
  #### Version `0.4.1` (2013-08-):
  
  * __new__:  `__getitem__()` in `diaspy.models.Post`,
+* __new__:  `__dict__()` in `diaspy.models.Post`,
+* __new__:  `guid` argument in `diaspy.models.Post.__init__()`,
  * __new__:  `json()` method in `diaspy.streams.Generic` adds the possibility to export streams to JSON,
  * __new__:  `full()` method in `diaspy.streams.Generic` will try to fetch full stream (containing all posts),
  * __new__:  `setEmail()` method in `diaspy.settings.Settings`,
  * __new__:  `setLanguage()` method in `diaspy.settings.Settings`,
  * __new__:  `downloadPhotos()` method in `diaspy.settings.Settings`,
+* __new__:  `backtime` argument in `more()` method in `diaspy.streams.Generic`,
+
+* __upd__:  if `Post()` is created with fetched comments, data will also be fetched as a dependency,
+* __upd__:  `id` argument type is now `int` (`diaspy.models.Post.__init__()`),
  
  * __fix__:  fixed some bugs in regular expressions used by `diaspy` internals
              (html tag removal, so you get nicer notifications),
diff --git a/diaspy/__init__.py b/diaspy/__init__.py

index 7e12ab610dcd0c046d5722efa57471ccf188f759..30380c71f8096bec3434f0c998cd08c9c5c62393 100644 (file)
--- a/diaspy/__init__.py
+++ b/diaspy/__init__.py
@@ -7,4 +7,4 @@ import diaspy.notifications as notifications
  import diaspy.settings as settings
  
  
-__version__ = '0.4.0.1'
+__version__ = '0.4.1'
diff --git a/diaspy/models.py b/diaspy/models.py

index a832b0d4ae3af62a414679541c60b08b696da526..bcf7daeb6b461bb5ad2469a69d438798ee28e86e 100644 (file)
--- a/diaspy/models.py
+++ b/diaspy/models.py
@@ -315,23 +315,29 @@ class Post():
      .. note::
          Remember that you need to have access to the post.
      """
-    def __init__(self, connection, id, fetch=True, comments=True):
+    def __init__(self, connection, id=0, guid='', fetch=True, comments=True):
          """
-        :param id: id or guid of the post
-        :type id: str
+        :param id: id of the post (GUID is recommended)
+        :type id: int
+        :param guid: GUID of the post
+        :type guid: str
          :param connection: connection object used to authenticate
          :type connection: connection.Connection
          :param fetch: defines whether to fetch post's data or not
          :type fetch: bool
-        :param comments: defines whether to fetch post's comments or not
+        :param comments: defines whether to fetch post's comments or not (if True also data will be fetched)
          :type comments: bool
          """
+        if not (guid or id): raise TypeError('guid and/or id missing')
          self._connection = connection
          self.id = id
+        self.guid = guid
          self.data = {}
          self.comments = []
          if fetch: self._fetchdata()
-        if comments: self._fetchcomments()
+        if comments:
+            if not self.data: self._fetchdata()
+            self._fetchcomments()
  
      def __repr__(self):
          """Returns string containing more information then str().
@@ -346,23 +352,36 @@ class Post():
      def __getitem__(self, key):
          return self.data[key]
  
+    def __dict__(self):
+        """Returns dictionary of posts data.
+        """
+        return self.data
+
      def _fetchdata(self):
          """This function retrieves data of the post.
+
+        :returns: guid of post whose data was fetched
          """
-        request = self._connection.get('posts/{0}.json'.format(self.id))
+        if self.id: id = self.id
+        if self.guid: id = self.guid
+        request = self._connection.get('posts/{0}.json'.format(id))
          if request.status_code != 200:
-            raise errors.PostError('{0}: could not fetch data for post: {1}'.format(request.status_code, self.id))
+            raise errors.PostError('{0}: could not fetch data for post: {1}'.format(request.status_code, id))
          else:
              self.data = request.json()
+        return self['guid']
  
      def _fetchcomments(self):
-        """Retireves comments for this post.
-        """
-        request = self._connection.get('posts/{0}/comments.json'.format(self.id))
-        if request.status_code != 200:
-            raise errors.PostError('{0}: could not fetch comments for post: {1}'.format(request.status_code, self.id))
-        else:
-            self.comments = [Comment(c) for c in request.json()]
+        """Retreives comments for this post.
+        """
+        if self.id: id = self.id
+        if self.guid: id = self.guid
+        if self['interactions']['comments_count']:
+            request = self._connection.get('posts/{0}/comments.json'.format(id))
+            if request.status_code != 200:
+                raise errors.PostError('{0}: could not fetch comments for post: {1}'.format(request.status_code, id))
+            else:
+                self.comments = [Comment(c) for c in request.json()]
  
      def update(self):
          """Updates post data.
diff --git a/diaspy/settings.py b/diaspy/settings.py

index 245d3b4879b73484512d201ff7e8a7a23ad7fc76..2cb9276247fb0411d31315c29156d74935967614 100644 (file)
--- a/diaspy/settings.py
+++ b/diaspy/settings.py
@@ -24,10 +24,10 @@ class Settings():
          request = self._connection.get('user/export')
          return request.text
  
-    def downloadPhotos(self, size='large', path='.', _critical=False, _stream=None):
+    def downloadPhotos(self, size='large', path='.', mark_nsfw=True, _critical=False, _stream=None):
          """Downloads photos into the current working directory.
          Sizes are: large, medium, small.
-        Filename is: {photo_guid}.{extension}
+        Filename is: {post_guid}_{photo_guid}.{extension}
  
          Normally, this method will catch urllib-generated errors and
          just issue warnings about photos that couldn't be downloaded.
@@ -38,20 +38,25 @@ class Settings():
          :type size: str
          :param path: path to download (defaults to current working directory
          :type path: str
+        :param mark_nsfw: will append '-nsfw' to images from posts marked as nsfw,
+        :type mark_nsfw: bool
          :param _stream: diaspy.streams.Generic-like object (only for testing)
          :param _critical: if True urllib errors will be reraised after generating a warning (may be removed)
  
          :returns: integer, number of photos downloaded
          """
          photos = 0
-        if _stream is not None: stream = _stream
-        else: stream = streams.Activity
-        stream = stream(self._connection)
-        stream.full()
+        if _stream is None:
+            stream = streams.Activity(self._connection)
+            stream.full()
+        else:
+            stream = _stream
          for i, post in enumerate(stream):
+            if post['nsfw'] is not False: nsfw = '-nsfw'
+            else: nsfw = ''
              if post['photos']:
                  for n, photo in enumerate(post['photos']):
-                    name = '{0}.{1}'.format(photo['guid'], photo['sizes'][size].split('.')[-1])
+                    name = '{0}_{1}{2}.{3}'.format(post['guid'], photo['guid'], nsfw, photo['sizes'][size].split('.')[-1])
                      filename = os.path.join(path, name)
                      try:
                          urllib.request.urlretrieve(url=photo['sizes'][size], filename=filename)
diff --git a/diaspy/streams.py b/diaspy/streams.py

index 1d06a50a1ea0b5702ef4fd0cf4ebdf4cf56e0dd6..24ab5ba38b790f8a7d09c333e9d31f27617ee017 100644 (file)
--- a/diaspy/streams.py
+++ b/diaspy/streams.py
@@ -57,7 +57,7 @@ class Generic():
          params = {}
          if max_time:
              params['max_time'] = max_time
-            params['_'] = self.max_time
+            params['_'] = int(time.time() * 1000)
          request = self._connection.get(self._location, params=params)
          if request.status_code != 200:
              raise errors.StreamError('wrong status code: {0}'.format(request.status_code))
@@ -116,28 +116,62 @@ class Generic():
          """
          self._stream = self._obtain()
  
-    def more(self, max_time=0):
+    def more(self, max_time=0, backtime=84600):
          """Tries to download more (older ones) Posts from Stream.
  
+        :param backtime: how many seconds substract each time (defaults to one day)
+        :type backtime: int
          :param max_time: seconds since epoch (optional, diaspy'll figure everything on its own)
          :type max_time: int
          """
-        if not max_time: max_time = self.max_time - 3000000
+        if not max_time: max_time = self.max_time - backtime
          self.max_time = max_time
          new_stream = self._obtain(max_time=max_time)
          self._expand(new_stream)
  
-    def full(self):
+    def full(self, backtime=84600, retry=42, callback=None):
          """Fetches full stream - containing all posts.
-        WARNING: this can be a **VERY** time consuming function on slow connections of massive streams.
-
+        WARNING: this is a **VERY** long running function.
+        Use callback parameter to access information about the stream during its
+        run.
+
+        Default backtime is one day. But sometimes user might not have any activity for longer
+        period (on the beginning I posted once a month or so).
+        The role of retry is to hadle such situations by trying to go further back in time.
+        If a post is found the counter is restored.
+
+        :param backtime: how many seconds to substract each time
+        :type backtime: int
+        :param retry: how many times the functin should look deeper than your last post
+        :type retry: int
+        :param callback: callable taking diaspy.streams.Generic as an argument
          :returns: integer, lenght of the stream
          """
          oldstream = self.copy()
          self.more()
-        while len(oldstream) != len(self):
+        while len(oldstream) < len(self):
              oldstream = self.copy()
-            self.more()
+            if callback is not None: callback(self)
+            self.more(backtime=backtime)
+            if len(oldstream) < len(self): continue
+            # but if no posts were found start retrying...
+            print('retrying... {0}'.format(retry))
+            n = retry
+            while n > 0:
+                print('\t', n, self.max_time)
+                # try to get even more posts...
+                self.more(backtime=backtime)
+                print('\t', len(oldstream), len(self))
+                # check if it was a success...
+                if len(oldstream) < len(self):
+                    # and if so restore normal order of execution by
+                    # going one loop higher
+                    break
+                oldstream = self.copy()
+                # if it was not a success substract one day, keep calm and
+                # try going further rback in time...
+                n -= 1
+            #if len(oldstream) == len(self): break
          return len(self)
  
      def copy(self):
author	Marek Marecki <marekjm@taistelu.com>
	Fri, 23 Aug 2013 08:48:33 +0000 (10:48 +0200)
committer	Marek Marecki <marekjm@taistelu.com>
	Fri, 23 Aug 2013 08:48:33 +0000 (10:48 +0200)
Changelog.markdown		patch \| blob \| blame \| history
diaspy/__init__.py		patch \| blob \| blame \| history
diaspy/models.py		patch \| blob \| blame \| history
diaspy/settings.py		patch \| blob \| blame \| history
diaspy/streams.py		patch \| blob \| blame \| history