From: Marek Marecki Date: Fri, 23 Aug 2013 08:48:33 +0000 (+0200) Subject: Many changes in post model and in generic stream X-Git-Url: https://vcs.fsf.org/?a=commitdiff_plain;h=fe783229cc4d4ee394f3eeb044d93a1f09051bf8;p=diaspy.git Many changes in post model and in generic stream --- diff --git a/Changelog.markdown b/Changelog.markdown index 790cc0c..aa3af83 100644 --- a/Changelog.markdown +++ b/Changelog.markdown @@ -24,11 +24,17 @@ up-to-date than manual and if conflicts appear they should follow the order: #### Version `0.4.1` (2013-08-): * __new__: `__getitem__()` in `diaspy.models.Post`, +* __new__: `__dict__()` in `diaspy.models.Post`, +* __new__: `guid` argument in `diaspy.models.Post.__init__()`, * __new__: `json()` method in `diaspy.streams.Generic` adds the possibility to export streams to JSON, * __new__: `full()` method in `diaspy.streams.Generic` will try to fetch full stream (containing all posts), * __new__: `setEmail()` method in `diaspy.settings.Settings`, * __new__: `setLanguage()` method in `diaspy.settings.Settings`, * __new__: `downloadPhotos()` method in `diaspy.settings.Settings`, +* __new__: `backtime` argument in `more()` method in `diaspy.streams.Generic`, + +* __upd__: if `Post()` is created with fetched comments, data will also be fetched as a dependency, +* __upd__: `id` argument type is now `int` (`diaspy.models.Post.__init__()`), * __fix__: fixed some bugs in regular expressions used by `diaspy` internals (html tag removal, so you get nicer notifications), diff --git a/diaspy/__init__.py b/diaspy/__init__.py index 7e12ab6..30380c7 100644 --- a/diaspy/__init__.py +++ b/diaspy/__init__.py @@ -7,4 +7,4 @@ import diaspy.notifications as notifications import diaspy.settings as settings -__version__ = '0.4.0.1' +__version__ = '0.4.1' diff --git a/diaspy/models.py b/diaspy/models.py index a832b0d..bcf7dae 100644 --- a/diaspy/models.py +++ b/diaspy/models.py @@ -315,23 +315,29 @@ class Post(): .. note:: Remember that you need to have access to the post. """ - def __init__(self, connection, id, fetch=True, comments=True): + def __init__(self, connection, id=0, guid='', fetch=True, comments=True): """ - :param id: id or guid of the post - :type id: str + :param id: id of the post (GUID is recommended) + :type id: int + :param guid: GUID of the post + :type guid: str :param connection: connection object used to authenticate :type connection: connection.Connection :param fetch: defines whether to fetch post's data or not :type fetch: bool - :param comments: defines whether to fetch post's comments or not + :param comments: defines whether to fetch post's comments or not (if True also data will be fetched) :type comments: bool """ + if not (guid or id): raise TypeError('guid and/or id missing') self._connection = connection self.id = id + self.guid = guid self.data = {} self.comments = [] if fetch: self._fetchdata() - if comments: self._fetchcomments() + if comments: + if not self.data: self._fetchdata() + self._fetchcomments() def __repr__(self): """Returns string containing more information then str(). @@ -346,23 +352,36 @@ class Post(): def __getitem__(self, key): return self.data[key] + def __dict__(self): + """Returns dictionary of posts data. + """ + return self.data + def _fetchdata(self): """This function retrieves data of the post. + + :returns: guid of post whose data was fetched """ - request = self._connection.get('posts/{0}.json'.format(self.id)) + if self.id: id = self.id + if self.guid: id = self.guid + request = self._connection.get('posts/{0}.json'.format(id)) if request.status_code != 200: - raise errors.PostError('{0}: could not fetch data for post: {1}'.format(request.status_code, self.id)) + raise errors.PostError('{0}: could not fetch data for post: {1}'.format(request.status_code, id)) else: self.data = request.json() + return self['guid'] def _fetchcomments(self): - """Retireves comments for this post. - """ - request = self._connection.get('posts/{0}/comments.json'.format(self.id)) - if request.status_code != 200: - raise errors.PostError('{0}: could not fetch comments for post: {1}'.format(request.status_code, self.id)) - else: - self.comments = [Comment(c) for c in request.json()] + """Retreives comments for this post. + """ + if self.id: id = self.id + if self.guid: id = self.guid + if self['interactions']['comments_count']: + request = self._connection.get('posts/{0}/comments.json'.format(id)) + if request.status_code != 200: + raise errors.PostError('{0}: could not fetch comments for post: {1}'.format(request.status_code, id)) + else: + self.comments = [Comment(c) for c in request.json()] def update(self): """Updates post data. diff --git a/diaspy/settings.py b/diaspy/settings.py index 245d3b4..2cb9276 100644 --- a/diaspy/settings.py +++ b/diaspy/settings.py @@ -24,10 +24,10 @@ class Settings(): request = self._connection.get('user/export') return request.text - def downloadPhotos(self, size='large', path='.', _critical=False, _stream=None): + def downloadPhotos(self, size='large', path='.', mark_nsfw=True, _critical=False, _stream=None): """Downloads photos into the current working directory. Sizes are: large, medium, small. - Filename is: {photo_guid}.{extension} + Filename is: {post_guid}_{photo_guid}.{extension} Normally, this method will catch urllib-generated errors and just issue warnings about photos that couldn't be downloaded. @@ -38,20 +38,25 @@ class Settings(): :type size: str :param path: path to download (defaults to current working directory :type path: str + :param mark_nsfw: will append '-nsfw' to images from posts marked as nsfw, + :type mark_nsfw: bool :param _stream: diaspy.streams.Generic-like object (only for testing) :param _critical: if True urllib errors will be reraised after generating a warning (may be removed) :returns: integer, number of photos downloaded """ photos = 0 - if _stream is not None: stream = _stream - else: stream = streams.Activity - stream = stream(self._connection) - stream.full() + if _stream is None: + stream = streams.Activity(self._connection) + stream.full() + else: + stream = _stream for i, post in enumerate(stream): + if post['nsfw'] is not False: nsfw = '-nsfw' + else: nsfw = '' if post['photos']: for n, photo in enumerate(post['photos']): - name = '{0}.{1}'.format(photo['guid'], photo['sizes'][size].split('.')[-1]) + name = '{0}_{1}{2}.{3}'.format(post['guid'], photo['guid'], nsfw, photo['sizes'][size].split('.')[-1]) filename = os.path.join(path, name) try: urllib.request.urlretrieve(url=photo['sizes'][size], filename=filename) diff --git a/diaspy/streams.py b/diaspy/streams.py index 1d06a50..24ab5ba 100644 --- a/diaspy/streams.py +++ b/diaspy/streams.py @@ -57,7 +57,7 @@ class Generic(): params = {} if max_time: params['max_time'] = max_time - params['_'] = self.max_time + params['_'] = int(time.time() * 1000) request = self._connection.get(self._location, params=params) if request.status_code != 200: raise errors.StreamError('wrong status code: {0}'.format(request.status_code)) @@ -116,28 +116,62 @@ class Generic(): """ self._stream = self._obtain() - def more(self, max_time=0): + def more(self, max_time=0, backtime=84600): """Tries to download more (older ones) Posts from Stream. + :param backtime: how many seconds substract each time (defaults to one day) + :type backtime: int :param max_time: seconds since epoch (optional, diaspy'll figure everything on its own) :type max_time: int """ - if not max_time: max_time = self.max_time - 3000000 + if not max_time: max_time = self.max_time - backtime self.max_time = max_time new_stream = self._obtain(max_time=max_time) self._expand(new_stream) - def full(self): + def full(self, backtime=84600, retry=42, callback=None): """Fetches full stream - containing all posts. - WARNING: this can be a **VERY** time consuming function on slow connections of massive streams. - + WARNING: this is a **VERY** long running function. + Use callback parameter to access information about the stream during its + run. + + Default backtime is one day. But sometimes user might not have any activity for longer + period (on the beginning I posted once a month or so). + The role of retry is to hadle such situations by trying to go further back in time. + If a post is found the counter is restored. + + :param backtime: how many seconds to substract each time + :type backtime: int + :param retry: how many times the functin should look deeper than your last post + :type retry: int + :param callback: callable taking diaspy.streams.Generic as an argument :returns: integer, lenght of the stream """ oldstream = self.copy() self.more() - while len(oldstream) != len(self): + while len(oldstream) < len(self): oldstream = self.copy() - self.more() + if callback is not None: callback(self) + self.more(backtime=backtime) + if len(oldstream) < len(self): continue + # but if no posts were found start retrying... + print('retrying... {0}'.format(retry)) + n = retry + while n > 0: + print('\t', n, self.max_time) + # try to get even more posts... + self.more(backtime=backtime) + print('\t', len(oldstream), len(self)) + # check if it was a success... + if len(oldstream) < len(self): + # and if so restore normal order of execution by + # going one loop higher + break + oldstream = self.copy() + # if it was not a success substract one day, keep calm and + # try going further rback in time... + n -= 1 + #if len(oldstream) == len(self): break return len(self) def copy(self):