mediagoblin/db/models.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 import datetime, uuid
  18
  19 from mongokit import Document
  20
  21 from mediagoblin.auth import lib as auth_lib
  22 from mediagoblin import mg_globals
  23 from mediagoblin.db import migrations
  24 from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId
  25 from mediagoblin.tools.pagination import Pagination
  26 from mediagoblin.tools import url, common
  27
  28 ###################
  29 # Custom validators
  30 ###################
  31
  32 ########
  33 # Models
  34 ########
  35
  36
  37 class User(Document):
  38     """
  39     A user of MediaGoblin.
  40
  41     Structure:
  42      - username: The username of this user, should be unique to this instance.
  43      - email: Email address of this user
  44      - created: When the user was created
  45      - plugin_data: a mapping of extra plugin information for this User.
  46        Nothing uses this yet as we don't have plugins, but someday we
  47        might... :)
  48      - pw_hash: Hashed version of user's password.
  49      - email_verified: Whether or not the user has verified their email or not.
  50        Most parts of the site are disabled for users who haven't yet.
  51      - status: whether or not the user is active, etc.  Currently only has two
  52        values, 'needs_email_verification' or 'active'.  (In the future, maybe
  53        we'll change this to a boolean with a key of 'active' and have a
  54        separate field for a reason the user's been disabled if that's
  55        appropriate... email_verified is already separate, after all.)
  56      - verification_key: If the user is awaiting email verification, the user
  57        will have to provide this key (which will be encoded in the presented
  58        URL) in order to confirm their email as active.
  59      - is_admin: Whether or not this user is an administrator or not.
  60      - url: this user's personal webpage/website, if appropriate.
  61      - bio: biography of this user (plaintext, in markdown)
  62      - bio_html: biography of the user converted to proper HTML.
  63     """
  64     __collection__ = 'users'
  65
  66     structure = {
  67         'username': unicode,
  68         'email': unicode,
  69         'created': datetime.datetime,
  70         'plugin_data': dict, # plugins can dump stuff here.
  71         'pw_hash': unicode,
  72         'email_verified': bool,
  73         'status': unicode,
  74         'verification_key': unicode,
  75         'is_admin': bool,
  76         'url' : unicode,
  77         'bio' : unicode,     # May contain markdown
  78         'bio_html': unicode, # May contain plaintext, or HTML
  79         'fp_verification_key': unicode, # forgotten password verification key
  80         'fp_token_expire': datetime.datetime
  81         }
  82
  83     required_fields = ['username', 'created', 'pw_hash', 'email']
  84
  85     default_values = {
  86         'created': datetime.datetime.utcnow,
  87         'email_verified': False,
  88         'status': u'needs_email_verification',
  89         'verification_key': lambda: unicode(uuid.uuid4()),
  90         'is_admin': False}
  91
  92     def check_login(self, password):
  93         """
  94         See if a user can login with this password
  95         """
  96         return auth_lib.bcrypt_check_password(
  97             password, self['pw_hash'])
  98
  99
 100 class MediaEntry(Document):
 101     """
 102     Record of a piece of media.
 103
 104     Structure:
 105      - uploader: A reference to a User who uploaded this.
 106
 107      - title: Title of this work
 108
 109      - slug: A normalized "slug" which can be used as part of a URL to retrieve
 110        this work, such as 'my-works-name-in-slug-form' may be viewable by
 111        'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
 112        Note that since URLs are constructed this way, slugs must be unique
 113        per-uploader.  (An index is provided to enforce that but code should be
 114        written on the python side to ensure this as well.)
 115
 116      - created: Date and time of when this piece of work was uploaded.
 117
 118      - description: Uploader-set description of this work.  This can be marked
 119        up with MarkDown for slight fanciness (links, boldness, italics,
 120        paragraphs...)
 121
 122      - description_html: Rendered version of the description, run through
 123        Markdown and cleaned with our cleaning tool.
 124
 125      - media_type: What type of media is this?  Currently we only support
 126        'image' ;)
 127
 128      - media_data: Extra information that's media-format-dependent.
 129        For example, images might contain some EXIF data that's not appropriate
 130        to other formats.  You might store it like:
 131
 132          mediaentry['media_data']['exif'] = {
 133              'manufacturer': 'CASIO',
 134              'model': 'QV-4000',
 135              'exposure_time': .659}
 136
 137        Alternately for video you might store:
 138
 139          # play length in seconds
 140          mediaentry['media_data']['play_length'] = 340
 141
 142        ... so what's appropriate here really depends on the media type.
 143
 144      - plugin_data: a mapping of extra plugin information for this User.
 145        Nothing uses this yet as we don't have plugins, but someday we
 146        might... :)
 147
 148      - tags: A list of tags.  Each tag is stored as a dictionary that has a key
 149        for the actual name and the normalized name-as-slug, so ultimately this
 150        looks like:
 151          [{'name': 'Gully Gardens',
 152            'slug': 'gully-gardens'},
 153           {'name': 'Castle Adventure Time?!",
 154            'slug': 'castle-adventure-time'}]
 155
 156      - state: What's the state of this file?  Active, inactive, disabled, etc...
 157        But really for now there are only two states:
 158         "unprocessed": uploaded but needs to go through processing for display
 159         "processed": processed and able to be displayed
 160
 161      - queued_media_file: storage interface style filepath describing a file
 162        queued for processing.  This is stored in the mg_globals.queue_store
 163        storage system.
 164
 165      - queued_task_id: celery task id.  Use this to fetch the task state.
 166
 167      - media_files: Files relevant to this that have actually been processed
 168        and are available for various types of display.  Stored like:
 169          {'thumb': ['dir1', 'dir2', 'pic.png'}
 170
 171      - attachment_files: A list of "attachment" files, ones that aren't
 172        critical to this piece of media but may be usefully relevant to people
 173        viewing the work.  (currently unused.)
 174
 175      - fail_error: path to the exception raised
 176      - fail_metadata:
 177     """
 178     __collection__ = 'media_entries'
 179
 180     structure = {
 181         'uploader': ObjectId,
 182         'title': unicode,
 183         'slug': unicode,
 184         'created': datetime.datetime,
 185         'description': unicode, # May contain markdown/up
 186         'description_html': unicode, # May contain plaintext, or HTML
 187         'media_type': unicode,
 188         'media_data': dict, # extra data relevant to this media_type
 189         'plugin_data': dict, # plugins can dump stuff here.
 190         'tags': [dict],
 191         'state': unicode,
 192
 193         # For now let's assume there can only be one main file queued
 194         # at a time
 195         'queued_media_file': [unicode],
 196         'queued_task_id': unicode,
 197
 198         # A dictionary of logical names to filepaths
 199         'media_files': dict,
 200
 201         # The following should be lists of lists, in appropriate file
 202         # record form
 203         'attachment_files': list,
 204
 205         # If things go badly in processing things, we'll store that
 206         # data here
 207         'fail_error': unicode,
 208         'fail_metadata': dict}
 209
 210     required_fields = [
 211         'uploader', 'created', 'media_type', 'slug']
 212
 213     default_values = {
 214         'created': datetime.datetime.utcnow,
 215         'state': u'unprocessed'}
 216
 217     def get_comments(self):
 218         return self.db.MediaComment.find({
 219                 'media_entry': self['_id']}).sort('created', DESCENDING)
 220
 221     def get_display_media(self, media_map, fetch_order=common.DISPLAY_IMAGE_FETCHING_ORDER):
 222         """
 223         Find the best media for display.
 224
 225         Args:
 226         - media_map: a dict like
 227           {u'image_size': [u'dir1', u'dir2', u'image.jpg']}
 228         - fetch_order: the order we should try fetching images in
 229
 230         Returns:
 231         (media_size, media_path)
 232         """
 233         media_sizes = media_map.keys()
 234
 235         for media_size in common.DISPLAY_IMAGE_FETCHING_ORDER:
 236             if media_size in media_sizes:
 237                 return media_map[media_size]
 238
 239     def main_mediafile(self):
 240         pass
 241
 242     def generate_slug(self):
 243         self['slug'] = url.slugify(self['title'])
 244
 245         duplicate = mg_globals.database.media_entries.find_one(
 246             {'slug': self['slug']})
 247
 248         if duplicate:
 249             self['slug'] = "%s-%s" % (self['_id'], self['slug'])
 250
 251     def url_for_self(self, urlgen):
 252         """
 253         Generate an appropriate url for ourselves
 254
 255         Use a slug if we have one, else use our '_id'.
 256         """
 257         uploader = self.uploader()
 258
 259         if self.get('slug'):
 260             return urlgen(
 261                 'mediagoblin.user_pages.media_home',
 262                 user=uploader['username'],
 263                 media=self['slug'])
 264         else:
 265             return urlgen(
 266                 'mediagoblin.user_pages.media_home',
 267                 user=uploader['username'],
 268                 media=unicode(self['_id']))
 269
 270     def url_to_prev(self, urlgen):
 271         """
 272         Provide a url to the previous entry from this user, if there is one
 273         """
 274         cursor = self.db.MediaEntry.find({'_id' : {"$gt": self['_id']},
 275                                           'uploader': self['uploader'],
 276                                           'state': 'processed'}).sort(
 277                                                     '_id', ASCENDING).limit(1)
 278         if cursor.count():
 279             return urlgen('mediagoblin.user_pages.media_home',
 280                           user=self.uploader()['username'],
 281                           media=unicode(cursor[0]['slug']))
 282
 283     def url_to_next(self, urlgen):
 284         """
 285         Provide a url to the next entry from this user, if there is one
 286         """
 287         cursor = self.db.MediaEntry.find({'_id' : {"$lt": self['_id']},
 288                                           'uploader': self['uploader'],
 289                                           'state': 'processed'}).sort(
 290                                                     '_id', DESCENDING).limit(1)
 291
 292         if cursor.count():
 293             return urlgen('mediagoblin.user_pages.media_home',
 294                           user=self.uploader()['username'],
 295                           media=unicode(cursor[0]['slug']))
 296
 297     def uploader(self):
 298         return self.db.User.find_one({'_id': self['uploader']})
 299
 300     def get_fail_exception(self):
 301         """
 302         Get the exception that's appropriate for this error
 303         """
 304         if self['fail_error']:
 305             return common.import_component(self['fail_error'])
 306
 307
 308 class MediaComment(Document):
 309     """
 310     A comment on a MediaEntry.
 311
 312     Structure:
 313      - media_entry: The media entry this comment is attached to
 314      - author: user who posted this comment
 315      - created: when the comment was created
 316      - content: plaintext (but markdown'able) version of the comment's content.
 317      - content_html: the actual html-rendered version of the comment displayed.
 318        Run through Markdown and the HTML cleaner.
 319     """
 320
 321     __collection__ = 'media_comments'
 322
 323     structure = {
 324         'media_entry': ObjectId,
 325         'author': ObjectId,
 326         'created': datetime.datetime,
 327         'content': unicode,
 328         'content_html': unicode}
 329
 330     required_fields = [
 331         'media_entry', 'author', 'created', 'content']
 332
 333     default_values = {
 334         'created': datetime.datetime.utcnow}
 335
 336     def media_entry(self):
 337         return self.db.MediaEntry.find_one({'_id': self['media_entry']})
 338
 339     def author(self):
 340         return self.db.User.find_one({'_id': self['author']})
 341
 342
 343 REGISTER_MODELS = [
 344     MediaEntry,
 345     User,
 346     MediaComment]
 347
 348
 349 def register_models(connection):
 350     """
 351     Register all models in REGISTER_MODELS with this connection.
 352     """
 353     connection.register(REGISTER_MODELS)
 354