mediagoblin/db/models.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 import datetime, uuid
  18
  19 from mongokit import Document
  20
  21 from mediagoblin.auth import lib as auth_lib
  22 from mediagoblin import mg_globals
  23 from mediagoblin.db import migrations
  24 from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId
  25 from mediagoblin.tools.pagination import Pagination
  26 from mediagoblin.tools import url, common
  27 from mediagoblin.tools import licenses
  28
  29 ###################
  30 # Custom validators
  31 ###################
  32
  33 ########
  34 # Models
  35 ########
  36
  37
  38 class User(Document):
  39     """
  40     A user of MediaGoblin.
  41
  42     Structure:
  43      - username: The username of this user, should be unique to this instance.
  44      - email: Email address of this user
  45      - created: When the user was created
  46      - plugin_data: a mapping of extra plugin information for this User.
  47        Nothing uses this yet as we don't have plugins, but someday we
  48        might... :)
  49      - pw_hash: Hashed version of user's password.
  50      - email_verified: Whether or not the user has verified their email or not.
  51        Most parts of the site are disabled for users who haven't yet.
  52      - status: whether or not the user is active, etc.  Currently only has two
  53        values, 'needs_email_verification' or 'active'.  (In the future, maybe
  54        we'll change this to a boolean with a key of 'active' and have a
  55        separate field for a reason the user's been disabled if that's
  56        appropriate... email_verified is already separate, after all.)
  57      - verification_key: If the user is awaiting email verification, the user
  58        will have to provide this key (which will be encoded in the presented
  59        URL) in order to confirm their email as active.
  60      - is_admin: Whether or not this user is an administrator or not.
  61      - url: this user's personal webpage/website, if appropriate.
  62      - bio: biography of this user (plaintext, in markdown)
  63      - bio_html: biography of the user converted to proper HTML.
  64     """
  65     __collection__ = 'users'
  66
  67     structure = {
  68         'username': unicode,
  69         'email': unicode,
  70         'created': datetime.datetime,
  71         'plugin_data': dict, # plugins can dump stuff here.
  72         'pw_hash': unicode,
  73         'email_verified': bool,
  74         'status': unicode,
  75         'verification_key': unicode,
  76         'is_admin': bool,
  77         'url' : unicode,
  78         'bio' : unicode,     # May contain markdown
  79         'bio_html': unicode, # May contain plaintext, or HTML
  80         'fp_verification_key': unicode, # forgotten password verification key
  81         'fp_token_expire': datetime.datetime
  82         }
  83
  84     required_fields = ['username', 'created', 'pw_hash', 'email']
  85
  86     default_values = {
  87         'created': datetime.datetime.utcnow,
  88         'email_verified': False,
  89         'status': u'needs_email_verification',
  90         'verification_key': lambda: unicode(uuid.uuid4()),
  91         'is_admin': False}
  92
  93     def check_login(self, password):
  94         """
  95         See if a user can login with this password
  96         """
  97         return auth_lib.bcrypt_check_password(
  98             password, self['pw_hash'])
  99
 100
 101 class MediaEntry(Document):
 102     """
 103     Record of a piece of media.
 104
 105     Structure:
 106      - uploader: A reference to a User who uploaded this.
 107
 108      - title: Title of this work
 109
 110      - slug: A normalized "slug" which can be used as part of a URL to retrieve
 111        this work, such as 'my-works-name-in-slug-form' may be viewable by
 112        'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
 113        Note that since URLs are constructed this way, slugs must be unique
 114        per-uploader.  (An index is provided to enforce that but code should be
 115        written on the python side to ensure this as well.)
 116
 117      - created: Date and time of when this piece of work was uploaded.
 118
 119      - description: Uploader-set description of this work.  This can be marked
 120        up with MarkDown for slight fanciness (links, boldness, italics,
 121        paragraphs...)
 122
 123      - description_html: Rendered version of the description, run through
 124        Markdown and cleaned with our cleaning tool.
 125
 126      - media_type: What type of media is this?  Currently we only support
 127        'image' ;)
 128
 129      - media_data: Extra information that's media-format-dependent.
 130        For example, images might contain some EXIF data that's not appropriate
 131        to other formats.  You might store it like:
 132
 133          mediaentry['media_data']['exif'] = {
 134              'manufacturer': 'CASIO',
 135              'model': 'QV-4000',
 136              'exposure_time': .659}
 137
 138        Alternately for video you might store:
 139
 140          # play length in seconds
 141          mediaentry['media_data']['play_length'] = 340
 142
 143        ... so what's appropriate here really depends on the media type.
 144
 145      - plugin_data: a mapping of extra plugin information for this User.
 146        Nothing uses this yet as we don't have plugins, but someday we
 147        might... :)
 148
 149      - tags: A list of tags.  Each tag is stored as a dictionary that has a key
 150        for the actual name and the normalized name-as-slug, so ultimately this
 151        looks like:
 152          [{'name': 'Gully Gardens',
 153            'slug': 'gully-gardens'},
 154           {'name': 'Castle Adventure Time?!",
 155            'slug': 'castle-adventure-time'}]
 156
 157      - state: What's the state of this file?  Active, inactive, disabled, etc...
 158        But really for now there are only two states:
 159         "unprocessed": uploaded but needs to go through processing for display
 160         "processed": processed and able to be displayed
 161
 162      - license: URI for entry's license
 163
 164      - queued_media_file: storage interface style filepath describing a file
 165        queued for processing.  This is stored in the mg_globals.queue_store
 166        storage system.
 167
 168      - queued_task_id: celery task id.  Use this to fetch the task state.
 169
 170      - media_files: Files relevant to this that have actually been processed
 171        and are available for various types of display.  Stored like:
 172          {'thumb': ['dir1', 'dir2', 'pic.png'}
 173
 174      - attachment_files: A list of "attachment" files, ones that aren't
 175        critical to this piece of media but may be usefully relevant to people
 176        viewing the work.  (currently unused.)
 177
 178      - fail_error: path to the exception raised
 179      - fail_metadata:
 180
 181     """
 182     __collection__ = 'media_entries'
 183
 184     structure = {
 185         'uploader': ObjectId,
 186         'title': unicode,
 187         'slug': unicode,
 188         'created': datetime.datetime,
 189         'description': unicode, # May contain markdown/up
 190         'description_html': unicode, # May contain plaintext, or HTML
 191         'media_type': unicode,
 192         'media_data': dict, # extra data relevant to this media_type
 193         'plugin_data': dict, # plugins can dump stuff here.
 194         'tags': [dict],
 195         'state': unicode,
 196         'license': unicode, # License URI
 197
 198         # For now let's assume there can only be one main file queued
 199         # at a time
 200         'queued_media_file': [unicode],
 201         'queued_task_id': unicode,
 202
 203         # A dictionary of logical names to filepaths
 204         'media_files': dict,
 205
 206         # The following should be lists of lists, in appropriate file
 207         # record form
 208         'attachment_files': list,
 209
 210         # If things go badly in processing things, we'll store that
 211         # data here
 212         'fail_error': unicode,
 213         'fail_metadata': dict}
 214
 215     required_fields = [
 216         'uploader', 'created', 'media_type', 'slug']
 217
 218     default_values = {
 219         'created': datetime.datetime.utcnow,
 220         'state': u'unprocessed'}
 221
 222     def get_comments(self):
 223         return self.db.MediaComment.find({
 224                 'media_entry': self['_id']}).sort('created', DESCENDING)
 225
 226     def get_display_media(self, media_map, fetch_order=common.DISPLAY_IMAGE_FETCHING_ORDER):
 227         """
 228         Find the best media for display.
 229
 230         Args:
 231         - media_map: a dict like
 232           {u'image_size': [u'dir1', u'dir2', u'image.jpg']}
 233         - fetch_order: the order we should try fetching images in
 234
 235         Returns:
 236         (media_size, media_path)
 237         """
 238         media_sizes = media_map.keys()
 239
 240         for media_size in common.DISPLAY_IMAGE_FETCHING_ORDER:
 241             if media_size in media_sizes:
 242                 return media_map[media_size]
 243
 244     def main_mediafile(self):
 245         pass
 246
 247     def generate_slug(self):
 248         self['slug'] = url.slugify(self['title'])
 249
 250         duplicate = mg_globals.database.media_entries.find_one(
 251             {'slug': self['slug']})
 252
 253         if duplicate:
 254             self['slug'] = "%s-%s" % (self['_id'], self['slug'])
 255
 256     def url_for_self(self, urlgen):
 257         """
 258         Generate an appropriate url for ourselves
 259
 260         Use a slug if we have one, else use our '_id'.
 261         """
 262         uploader = self.uploader()
 263
 264         if self.get('slug'):
 265             return urlgen(
 266                 'mediagoblin.user_pages.media_home',
 267                 user=uploader['username'],
 268                 media=self['slug'])
 269         else:
 270             return urlgen(
 271                 'mediagoblin.user_pages.media_home',
 272                 user=uploader['username'],
 273                 media=unicode(self['_id']))
 274
 275     def url_to_prev(self, urlgen):
 276         """
 277         Provide a url to the previous entry from this user, if there is one
 278         """
 279         cursor = self.db.MediaEntry.find({'_id' : {"$gt": self['_id']},
 280                                           'uploader': self['uploader'],
 281                                           'state': 'processed'}).sort(
 282                                                     '_id', ASCENDING).limit(1)
 283         if cursor.count():
 284             return urlgen('mediagoblin.user_pages.media_home',
 285                           user=self.uploader()['username'],
 286                           media=unicode(cursor[0]['slug']))
 287
 288     def url_to_next(self, urlgen):
 289         """
 290         Provide a url to the next entry from this user, if there is one
 291         """
 292         cursor = self.db.MediaEntry.find({'_id' : {"$lt": self['_id']},
 293                                           'uploader': self['uploader'],
 294                                           'state': 'processed'}).sort(
 295                                                     '_id', DESCENDING).limit(1)
 296
 297         if cursor.count():
 298             return urlgen('mediagoblin.user_pages.media_home',
 299                           user=self.uploader()['username'],
 300                           media=unicode(cursor[0]['slug']))
 301
 302     def uploader(self):
 303         return self.db.User.find_one({'_id': self['uploader']})
 304
 305     def get_fail_exception(self):
 306         """
 307         Get the exception that's appropriate for this error
 308         """
 309         if self['fail_error']:
 310             return common.import_component(self['fail_error'])
 311
 312     def get_license_data(self):
 313         """Return license dict for requested license"""
 314         return licenses.SUPPORTED_LICENSES[self['license']]
 315
 316
 317 class MediaComment(Document):
 318     """
 319     A comment on a MediaEntry.
 320
 321     Structure:
 322      - media_entry: The media entry this comment is attached to
 323      - author: user who posted this comment
 324      - created: when the comment was created
 325      - content: plaintext (but markdown'able) version of the comment's content.
 326      - content_html: the actual html-rendered version of the comment displayed.
 327        Run through Markdown and the HTML cleaner.
 328     """
 329
 330     __collection__ = 'media_comments'
 331
 332     structure = {
 333         'media_entry': ObjectId,
 334         'author': ObjectId,
 335         'created': datetime.datetime,
 336         'content': unicode,
 337         'content_html': unicode}
 338
 339     required_fields = [
 340         'media_entry', 'author', 'created', 'content']
 341
 342     default_values = {
 343         'created': datetime.datetime.utcnow}
 344
 345     def media_entry(self):
 346         return self.db.MediaEntry.find_one({'_id': self['media_entry']})
 347
 348     def author(self):
 349         return self.db.User.find_one({'_id': self['author']})
 350
 351
 352 REGISTER_MODELS = [
 353     MediaEntry,
 354     User,
 355     MediaComment]
 356
 357
 358 def register_models(connection):
 359     """
 360     Register all models in REGISTER_MODELS with this connection.
 361     """
 362     connection.register(REGISTER_MODELS)
 363