#### Version `0.4.1` (2013-08-):
* __new__: `__getitem__()` in `diaspy.models.Post`,
+* __new__: `__dict__()` in `diaspy.models.Post`,
+* __new__: `guid` argument in `diaspy.models.Post.__init__()`,
* __new__: `json()` method in `diaspy.streams.Generic` adds the possibility to export streams to JSON,
* __new__: `full()` method in `diaspy.streams.Generic` will try to fetch full stream (containing all posts),
* __new__: `setEmail()` method in `diaspy.settings.Settings`,
* __new__: `setLanguage()` method in `diaspy.settings.Settings`,
* __new__: `downloadPhotos()` method in `diaspy.settings.Settings`,
+* __new__: `backtime` argument in `more()` method in `diaspy.streams.Generic`,
+
+* __upd__: if `Post()` is created with fetched comments, data will also be fetched as a dependency,
+* __upd__: `id` argument type is now `int` (`diaspy.models.Post.__init__()`),
* __fix__: fixed some bugs in regular expressions used by `diaspy` internals
(html tag removal, so you get nicer notifications),
import diaspy.settings as settings
-__version__ = '0.4.0.1'
+__version__ = '0.4.1'
.. note::
Remember that you need to have access to the post.
"""
- def __init__(self, connection, id, fetch=True, comments=True):
+ def __init__(self, connection, id=0, guid='', fetch=True, comments=True):
"""
- :param id: id or guid of the post
- :type id: str
+ :param id: id of the post (GUID is recommended)
+ :type id: int
+ :param guid: GUID of the post
+ :type guid: str
:param connection: connection object used to authenticate
:type connection: connection.Connection
:param fetch: defines whether to fetch post's data or not
:type fetch: bool
- :param comments: defines whether to fetch post's comments or not
+ :param comments: defines whether to fetch post's comments or not (if True also data will be fetched)
:type comments: bool
"""
+ if not (guid or id): raise TypeError('guid and/or id missing')
self._connection = connection
self.id = id
+ self.guid = guid
self.data = {}
self.comments = []
if fetch: self._fetchdata()
- if comments: self._fetchcomments()
+ if comments:
+ if not self.data: self._fetchdata()
+ self._fetchcomments()
def __repr__(self):
"""Returns string containing more information then str().
def __getitem__(self, key):
return self.data[key]
+ def __dict__(self):
+ """Returns dictionary of posts data.
+ """
+ return self.data
+
def _fetchdata(self):
"""This function retrieves data of the post.
+
+ :returns: guid of post whose data was fetched
"""
- request = self._connection.get('posts/{0}.json'.format(self.id))
+ if self.id: id = self.id
+ if self.guid: id = self.guid
+ request = self._connection.get('posts/{0}.json'.format(id))
if request.status_code != 200:
- raise errors.PostError('{0}: could not fetch data for post: {1}'.format(request.status_code, self.id))
+ raise errors.PostError('{0}: could not fetch data for post: {1}'.format(request.status_code, id))
else:
self.data = request.json()
+ return self['guid']
def _fetchcomments(self):
- """Retireves comments for this post.
- """
- request = self._connection.get('posts/{0}/comments.json'.format(self.id))
- if request.status_code != 200:
- raise errors.PostError('{0}: could not fetch comments for post: {1}'.format(request.status_code, self.id))
- else:
- self.comments = [Comment(c) for c in request.json()]
+ """Retreives comments for this post.
+ """
+ if self.id: id = self.id
+ if self.guid: id = self.guid
+ if self['interactions']['comments_count']:
+ request = self._connection.get('posts/{0}/comments.json'.format(id))
+ if request.status_code != 200:
+ raise errors.PostError('{0}: could not fetch comments for post: {1}'.format(request.status_code, id))
+ else:
+ self.comments = [Comment(c) for c in request.json()]
def update(self):
"""Updates post data.
request = self._connection.get('user/export')
return request.text
- def downloadPhotos(self, size='large', path='.', _critical=False, _stream=None):
+ def downloadPhotos(self, size='large', path='.', mark_nsfw=True, _critical=False, _stream=None):
"""Downloads photos into the current working directory.
Sizes are: large, medium, small.
- Filename is: {photo_guid}.{extension}
+ Filename is: {post_guid}_{photo_guid}.{extension}
Normally, this method will catch urllib-generated errors and
just issue warnings about photos that couldn't be downloaded.
:type size: str
:param path: path to download (defaults to current working directory
:type path: str
+ :param mark_nsfw: will append '-nsfw' to images from posts marked as nsfw,
+ :type mark_nsfw: bool
:param _stream: diaspy.streams.Generic-like object (only for testing)
:param _critical: if True urllib errors will be reraised after generating a warning (may be removed)
:returns: integer, number of photos downloaded
"""
photos = 0
- if _stream is not None: stream = _stream
- else: stream = streams.Activity
- stream = stream(self._connection)
- stream.full()
+ if _stream is None:
+ stream = streams.Activity(self._connection)
+ stream.full()
+ else:
+ stream = _stream
for i, post in enumerate(stream):
+ if post['nsfw'] is not False: nsfw = '-nsfw'
+ else: nsfw = ''
if post['photos']:
for n, photo in enumerate(post['photos']):
- name = '{0}.{1}'.format(photo['guid'], photo['sizes'][size].split('.')[-1])
+ name = '{0}_{1}{2}.{3}'.format(post['guid'], photo['guid'], nsfw, photo['sizes'][size].split('.')[-1])
filename = os.path.join(path, name)
try:
urllib.request.urlretrieve(url=photo['sizes'][size], filename=filename)
params = {}
if max_time:
params['max_time'] = max_time
- params['_'] = self.max_time
+ params['_'] = int(time.time() * 1000)
request = self._connection.get(self._location, params=params)
if request.status_code != 200:
raise errors.StreamError('wrong status code: {0}'.format(request.status_code))
"""
self._stream = self._obtain()
- def more(self, max_time=0):
+ def more(self, max_time=0, backtime=84600):
"""Tries to download more (older ones) Posts from Stream.
+ :param backtime: how many seconds substract each time (defaults to one day)
+ :type backtime: int
:param max_time: seconds since epoch (optional, diaspy'll figure everything on its own)
:type max_time: int
"""
- if not max_time: max_time = self.max_time - 3000000
+ if not max_time: max_time = self.max_time - backtime
self.max_time = max_time
new_stream = self._obtain(max_time=max_time)
self._expand(new_stream)
- def full(self):
+ def full(self, backtime=84600, retry=42, callback=None):
"""Fetches full stream - containing all posts.
- WARNING: this can be a **VERY** time consuming function on slow connections of massive streams.
-
+ WARNING: this is a **VERY** long running function.
+ Use callback parameter to access information about the stream during its
+ run.
+
+ Default backtime is one day. But sometimes user might not have any activity for longer
+ period (on the beginning I posted once a month or so).
+ The role of retry is to hadle such situations by trying to go further back in time.
+ If a post is found the counter is restored.
+
+ :param backtime: how many seconds to substract each time
+ :type backtime: int
+ :param retry: how many times the functin should look deeper than your last post
+ :type retry: int
+ :param callback: callable taking diaspy.streams.Generic as an argument
:returns: integer, lenght of the stream
"""
oldstream = self.copy()
self.more()
- while len(oldstream) != len(self):
+ while len(oldstream) < len(self):
oldstream = self.copy()
- self.more()
+ if callback is not None: callback(self)
+ self.more(backtime=backtime)
+ if len(oldstream) < len(self): continue
+ # but if no posts were found start retrying...
+ print('retrying... {0}'.format(retry))
+ n = retry
+ while n > 0:
+ print('\t', n, self.max_time)
+ # try to get even more posts...
+ self.more(backtime=backtime)
+ print('\t', len(oldstream), len(self))
+ # check if it was a success...
+ if len(oldstream) < len(self):
+ # and if so restore normal order of execution by
+ # going one loop higher
+ break
+ oldstream = self.copy()
+ # if it was not a success substract one day, keep calm and
+ # try going further rback in time...
+ n -= 1
+ #if len(oldstream) == len(self): break
return len(self)
def copy(self):