Merge remote-tracking branch 'refs/remotes/rodney757/reprocessing'
[mediagoblin.git] / mediagoblin / db / models.py
index aeee69dd5d555bec406440741e0d468c9d67a83d..809e47220dd6b6901b2361b8d563ca045ec7caac 100644 (file)
@@ -1,5 +1,5 @@
 # GNU MediaGoblin -- federated, autonomous media hosting
-# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Affero General Public License as published by
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-import datetime
-import uuid
-
-from mongokit import Document
+"""
+TODO: indexes on foreignkeys, where useful.
+"""
 
-from mediagoblin.auth import lib as auth_lib
-from mediagoblin import mg_globals
-from mediagoblin.db import migrations
-from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId
-from mediagoblin.tools.pagination import Pagination
-from mediagoblin.tools import url, common
+import logging
+import datetime
 
-###################
-# Custom validators
-###################
+from sqlalchemy import Column, Integer, Unicode, UnicodeText, DateTime, \
+        Boolean, ForeignKey, UniqueConstraint, PrimaryKeyConstraint, \
+        SmallInteger
+from sqlalchemy.orm import relationship, backref, with_polymorphic
+from sqlalchemy.orm.collections import attribute_mapped_collection
+from sqlalchemy.sql.expression import desc
+from sqlalchemy.ext.associationproxy import association_proxy
+from sqlalchemy.util import memoized_property
+
+
+from mediagoblin.db.extratypes import PathTupleWithSlashes, JSONEncoded
+from mediagoblin.db.base import Base, DictReadAttrProxy
+from mediagoblin.db.mixin import UserMixin, MediaEntryMixin, \
+        MediaCommentMixin, CollectionMixin, CollectionItemMixin
+from mediagoblin.tools.files import delete_media_files
+from mediagoblin.tools.common import import_component
+
+# It's actually kind of annoying how sqlalchemy-migrate does this, if
+# I understand it right, but whatever.  Anyway, don't remove this :P
+#
+# We could do migration calls more manually instead of relying on
+# this import-based meddling...
+from migrate import changeset
 
-########
-# Models
-########
+_log = logging.getLogger(__name__)
 
 
-class User(Document):
+class User(Base, UserMixin):
     """
-    A user of MediaGoblin.
-
-    Structure:
-     - username: The username of this user, should be unique to this instance.
-     - email: Email address of this user
-     - created: When the user was created
-     - plugin_data: a mapping of extra plugin information for this User.
-       Nothing uses this yet as we don't have plugins, but someday we
-       might... :)
-     - pw_hash: Hashed version of user's password.
-     - email_verified: Whether or not the user has verified their email or not.
-       Most parts of the site are disabled for users who haven't yet.
-     - status: whether or not the user is active, etc.  Currently only has two
-       values, 'needs_email_verification' or 'active'.  (In the future, maybe
-       we'll change this to a boolean with a key of 'active' and have a
-       separate field for a reason the user's been disabled if that's
-       appropriate... email_verified is already separate, after all.)
-     - verification_key: If the user is awaiting email verification, the user
-       will have to provide this key (which will be encoded in the presented
-       URL) in order to confirm their email as active.
-     - is_admin: Whether or not this user is an administrator or not.
-     - url: this user's personal webpage/website, if appropriate.
-     - bio: biography of this user (plaintext, in markdown)
-     - bio_html: biography of the user converted to proper HTML.
+    TODO: We should consider moving some rarely used fields
+    into some sort of "shadow" table.
     """
-    __collection__ = 'users'
-    use_dot_notation = True
-
-    structure = {
-        'username': unicode,
-        'email': unicode,
-        'created': datetime.datetime,
-        'plugin_data': dict,  # plugins can dump stuff here.
-        'pw_hash': unicode,
-        'email_verified': bool,
-        'status': unicode,
-        'verification_key': unicode,
-        'is_admin': bool,
-        'url': unicode,
-        'bio': unicode,      # May contain markdown
-        'bio_html': unicode,  # May contain plaintext, or HTML
-        'fp_verification_key': unicode,  # forgotten password verification key
-        'fp_token_expire': datetime.datetime,
-        }
-
-    required_fields = ['username', 'created', 'pw_hash', 'email']
-
-    default_values = {
-        'created': datetime.datetime.utcnow,
-        'email_verified': False,
-        'status': u'needs_email_verification',
-        'verification_key': lambda: unicode(uuid.uuid4()),
-        'is_admin': False}
-
-    def check_login(self, password):
-        """
-        See if a user can login with this password
-        """
-        return auth_lib.bcrypt_check_password(
-            password, self.pw_hash)
-
-
-class MediaEntry(Document):
+    __tablename__ = "core__users"
+
+    id = Column(Integer, primary_key=True)
+    username = Column(Unicode, nullable=False, unique=True)
+    # Note: no db uniqueness constraint on email because it's not
+    # reliable (many email systems case insensitive despite against
+    # the RFC) and because it would be a mess to implement at this
+    # point.
+    email = Column(Unicode, nullable=False)
+    pw_hash = Column(Unicode)
+    email_verified = Column(Boolean, default=False)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    status = Column(Unicode, default=u"needs_email_verification", nullable=False)
+    # Intented to be nullable=False, but migrations would not work for it
+    # set to nullable=True implicitly.
+    wants_comment_notification = Column(Boolean, default=True)
+    wants_notifications = Column(Boolean, default=True)
+    license_preference = Column(Unicode)
+    is_admin = Column(Boolean, default=False, nullable=False)
+    url = Column(Unicode)
+    bio = Column(UnicodeText)  # ??
+    uploaded = Column(Integer, default=0)
+    upload_limit = Column(Integer)
+
+    ## TODO
+    # plugin data would be in a separate model
+
+    def __repr__(self):
+        return '<{0} #{1} {2} {3} "{4}">'.format(
+                self.__class__.__name__,
+                self.id,
+                'verified' if self.email_verified else 'non-verified',
+                'admin' if self.is_admin else 'user',
+                self.username)
+
+    def delete(self, **kwargs):
+        """Deletes a User and all related entries/comments/files/..."""
+        # Collections get deleted by relationships.
+
+        media_entries = MediaEntry.query.filter(MediaEntry.uploader == self.id)
+        for media in media_entries:
+            # TODO: Make sure that "MediaEntry.delete()" also deletes
+            # all related files/Comments
+            media.delete(del_orphan_tags=False, commit=False)
+
+        # Delete now unused tags
+        # TODO: import here due to cyclic imports!!! This cries for refactoring
+        from mediagoblin.db.util import clean_orphan_tags
+        clean_orphan_tags(commit=False)
+
+        # Delete user, pass through commit=False/True in kwargs
+        super(User, self).delete(**kwargs)
+        _log.info('Deleted user "{0}" account'.format(self.username))
+
+
+class Client(Base):
     """
-    Record of a piece of media.
-
-    Structure:
-     - uploader: A reference to a User who uploaded this.
-
-     - title: Title of this work
-
-     - slug: A normalized "slug" which can be used as part of a URL to retrieve
-       this work, such as 'my-works-name-in-slug-form' may be viewable by
-       'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
-       Note that since URLs are constructed this way, slugs must be unique
-       per-uploader.  (An index is provided to enforce that but code should be
-       written on the python side to ensure this as well.)
-
-     - created: Date and time of when this piece of work was uploaded.
-
-     - description: Uploader-set description of this work.  This can be marked
-       up with MarkDown for slight fanciness (links, boldness, italics,
-       paragraphs...)
-
-     - description_html: Rendered version of the description, run through
-       Markdown and cleaned with our cleaning tool.
+        Model representing a client - Used for API Auth
+    """
+    __tablename__ = "core__clients"
+
+    id = Column(Unicode, nullable=True, primary_key=True)
+    secret = Column(Unicode, nullable=False)
+    expirey = Column(DateTime, nullable=True)
+    application_type = Column(Unicode, nullable=False)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    updated = Column(DateTime, nullable=False, default=datetime.datetime.now)
+
+    # optional stuff
+    redirect_uri = Column(JSONEncoded, nullable=True)
+    logo_url = Column(Unicode, nullable=True)
+    application_name = Column(Unicode, nullable=True)
+    contacts = Column(JSONEncoded, nullable=True)
+
+    def __repr__(self):
+        if self.application_name:
+            return "<Client {0} - {1}>".format(self.application_name, self.id)
+        else:
+            return "<Client {0}>".format(self.id)
 
-     - media_type: What type of media is this?  Currently we only support
-       'image' ;)
+class RequestToken(Base):
+    """
+        Model for representing the request tokens
+    """
+    __tablename__ = "core__request_tokens"
+
+    token = Column(Unicode, primary_key=True)
+    secret = Column(Unicode, nullable=False)
+    client = Column(Unicode, ForeignKey(Client.id))
+    user = Column(Integer, ForeignKey(User.id), nullable=True)
+    used = Column(Boolean, default=False)
+    authenticated = Column(Boolean, default=False)
+    verifier = Column(Unicode, nullable=True)
+    callback = Column(Unicode, nullable=False, default=u"oob")
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    updated = Column(DateTime, nullable=False, default=datetime.datetime.now)
+
+class AccessToken(Base):
+    """
+        Model for representing the access tokens
+    """
+    __tablename__ = "core__access_tokens"
 
-     - media_data: Extra information that's media-format-dependent.
-       For example, images might contain some EXIF data that's not appropriate
-       to other formats.  You might store it like:
+    token = Column(Unicode, nullable=False, primary_key=True)
+    secret = Column(Unicode, nullable=False)
+    user = Column(Integer, ForeignKey(User.id))
+    request_token = Column(Unicode, ForeignKey(RequestToken.token))
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    updated = Column(DateTime, nullable=False, default=datetime.datetime.now)
 
-         mediaentry['media_data']['exif'] = {
-             'manufacturer': 'CASIO',
-             'model': 'QV-4000',
-             'exposure_time': .659}
 
-       Alternately for video you might store:
+class NonceTimestamp(Base):
+    """
+        A place the timestamp and nonce can be stored - this is for OAuth1
+    """
+    __tablename__ = "core__nonce_timestamps"
 
-         # play length in seconds
-         mediaentry['media_data']['play_length'] = 340
+    nonce = Column(Unicode, nullable=False, primary_key=True)
+    timestamp = Column(DateTime, nullable=False, primary_key=True)
 
-       ... so what's appropriate here really depends on the media type.
 
-     - plugin_data: a mapping of extra plugin information for this User.
-       Nothing uses this yet as we don't have plugins, but someday we
-       might... :)
+class MediaEntry(Base, MediaEntryMixin):
+    """
+    TODO: Consider fetching the media_files using join
+    """
+    __tablename__ = "core__media_entries"
+
+    id = Column(Integer, primary_key=True)
+    uploader = Column(Integer, ForeignKey(User.id), nullable=False, index=True)
+    title = Column(Unicode, nullable=False)
+    slug = Column(Unicode)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now,
+        index=True)
+    description = Column(UnicodeText) # ??
+    media_type = Column(Unicode, nullable=False)
+    state = Column(Unicode, default=u'unprocessed', nullable=False)
+        # or use sqlalchemy.types.Enum?
+    license = Column(Unicode)
+    collected = Column(Integer, default=0)
+    file_size = Column(Integer, default=0)
+
+    fail_error = Column(Unicode)
+    fail_metadata = Column(JSONEncoded)
+
+    transcoding_progress = Column(SmallInteger)
+
+    queued_media_file = Column(PathTupleWithSlashes)
+
+    queued_task_id = Column(Unicode)
+
+    __table_args__ = (
+        UniqueConstraint('uploader', 'slug'),
+        {})
+
+    get_uploader = relationship(User)
+
+    media_files_helper = relationship("MediaFile",
+        collection_class=attribute_mapped_collection("name"),
+        cascade="all, delete-orphan"
+        )
+    media_files = association_proxy('media_files_helper', 'file_path',
+        creator=lambda k, v: MediaFile(name=k, file_path=v)
+        )
+
+    attachment_files_helper = relationship("MediaAttachmentFile",
+        cascade="all, delete-orphan",
+        order_by="MediaAttachmentFile.created"
+        )
+    attachment_files = association_proxy("attachment_files_helper", "dict_view",
+        creator=lambda v: MediaAttachmentFile(
+            name=v["name"], filepath=v["filepath"])
+        )
+
+    tags_helper = relationship("MediaTag",
+        cascade="all, delete-orphan" # should be automatically deleted
+        )
+    tags = association_proxy("tags_helper", "dict_view",
+        creator=lambda v: MediaTag(name=v["name"], slug=v["slug"])
+        )
+
+    collections_helper = relationship("CollectionItem",
+        cascade="all, delete-orphan"
+        )
+    collections = association_proxy("collections_helper", "in_collection")
+
+    ## TODO
+    # fail_error
 
-     - tags: A list of tags.  Each tag is stored as a dictionary that has a key
-       for the actual name and the normalized name-as-slug, so ultimately this
-       looks like:
-         [{'name': 'Gully Gardens',
-           'slug': 'gully-gardens'},
-          {'name': 'Castle Adventure Time?!",
-           'slug': 'castle-adventure-time'}]
+    def get_comments(self, ascending=False):
+        order_col = MediaComment.created
+        if not ascending:
+            order_col = desc(order_col)
+        return self.all_comments.order_by(order_col)
 
-     - state: What's the state of this file?  Active, inactive, disabled, etc...
-       But really for now there are only two states:
-        "unprocessed": uploaded but needs to go through processing for display
-        "processed": processed and able to be displayed
+    def url_to_prev(self, urlgen):
+        """get the next 'newer' entry by this user"""
+        media = MediaEntry.query.filter(
+            (MediaEntry.uploader == self.uploader)
+            & (MediaEntry.state == u'processed')
+            & (MediaEntry.id > self.id)).order_by(MediaEntry.id).first()
 
-     - queued_media_file: storage interface style filepath describing a file
-       queued for processing.  This is stored in the mg_globals.queue_store
-       storage system.
+        if media is not None:
+            return media.url_for_self(urlgen)
 
-     - queued_task_id: celery task id.  Use this to fetch the task state.
+    def url_to_next(self, urlgen):
+        """get the next 'older' entry by this user"""
+        media = MediaEntry.query.filter(
+            (MediaEntry.uploader == self.uploader)
+            & (MediaEntry.state == u'processed')
+            & (MediaEntry.id < self.id)).order_by(desc(MediaEntry.id)).first()
 
-     - media_files: Files relevant to this that have actually been processed
-       and are available for various types of display.  Stored like:
-         {'thumb': ['dir1', 'dir2', 'pic.png'}
+        if media is not None:
+            return media.url_for_self(urlgen)
 
-     - attachment_files: A list of "attachment" files, ones that aren't
-       critical to this piece of media but may be usefully relevant to people
-       viewing the work.  (currently unused.)
+    def get_file_metadata(self, file_key, metadata_key=None):
+        """
+        Return the file_metadata dict of a MediaFile. If metadata_key is given,
+        return the value of the key.
+        """
+        media_file = MediaFile.query.filter_by(media_entry=self.id,
+                                               name=unicode(file_key)).first()
 
-     - fail_error: path to the exception raised
-     - fail_metadata:
-    """
-    __collection__ = 'media_entries'
-    use_dot_notation = True
-
-    structure = {
-        'uploader': ObjectId,
-        'title': unicode,
-        'slug': unicode,
-        'created': datetime.datetime,
-        'description': unicode,  # May contain markdown/up
-        'description_html': unicode,  # May contain plaintext, or HTML
-        'media_type': unicode,
-        'media_data': dict,  # extra data relevant to this media_type
-        'plugin_data': dict,  # plugins can dump stuff here.
-        'tags': [dict],
-        'state': unicode,
-
-        # For now let's assume there can only be one main file queued
-        # at a time
-        'queued_media_file': [unicode],
-        'queued_task_id': unicode,
-
-        # A dictionary of logical names to filepaths
-        'media_files': dict,
-
-        # The following should be lists of lists, in appropriate file
-        # record form
-        'attachment_files': list,
-
-        # If things go badly in processing things, we'll store that
-        # data here
-        'fail_error': unicode,
-        'fail_metadata': dict}
-
-    required_fields = [
-        'uploader', 'created', 'media_type', 'slug']
-
-    default_values = {
-        'created': datetime.datetime.utcnow,
-        'state': u'unprocessed'}
+        if media_file:
+            if metadata_key:
+                return media_file.file_metadata.get(metadata_key, None)
 
-    def get_comments(self, ascending=False):
-        if ascending:
-            order = ASCENDING
-        else:
-            order = DESCENDING
-            
-        return self.db.MediaComment.find({
-                'media_entry': self._id}).sort('created', order)
+            return media_file.file_metadata
 
-    def get_display_media(self, media_map,
-                          fetch_order=common.DISPLAY_IMAGE_FETCHING_ORDER):
+    def set_file_metadata(self, file_key, **kwargs):
         """
-        Find the best media for display.
-
-        Args:
-        - media_map: a dict like
-          {u'image_size': [u'dir1', u'dir2', u'image.jpg']}
-        - fetch_order: the order we should try fetching images in
-
-        Returns:
-        (media_size, media_path)
+        Update the file_metadata of a MediaFile.
         """
-        media_sizes = media_map.keys()
+        media_file = MediaFile.query.filter_by(media_entry=self.id,
+                                               name=unicode(file_key)).first()
 
-        for media_size in common.DISPLAY_IMAGE_FETCHING_ORDER:
-            if media_size in media_sizes:
-                return media_map[media_size]
+        file_metadata = media_file.file_metadata or {}
 
-    def main_mediafile(self):
-        pass
+        for key, value in kwargs.iteritems():
+            file_metadata[key] = value
 
-    def generate_slug(self):
-        self.slug = url.slugify(self.title)
+        media_file.file_metadata = file_metadata
 
-        duplicate = mg_globals.database.media_entries.find_one(
-            {'slug': self.slug})
+    @property
+    def media_data(self):
+        return getattr(self, self.media_data_ref)
 
-        if duplicate:
-            self.slug = "%s-%s" % (self._id, self.slug)
-
-    def url_for_self(self, urlgen):
+    def media_data_init(self, **kwargs):
         """
-        Generate an appropriate url for ourselves
-
-        Use a slug if we have one, else use our '_id'.
+        Initialize or update the contents of a media entry's media_data row
         """
-        uploader = self.get_uploader()
-
-        if self.get('slug'):
-            return urlgen(
-                'mediagoblin.user_pages.media_home',
-                user=uploader.username,
-                media=self.slug)
+        media_data = self.media_data
+
+        if media_data is None:
+            # Get the correct table:
+            table = import_component(self.media_type + '.models:DATA_MODEL')
+            # No media data, so actually add a new one
+            media_data = table(**kwargs)
+            # Get the relationship set up.
+            media_data.get_media_entry = self
         else:
-            return urlgen(
-                'mediagoblin.user_pages.media_home',
-                user=uploader.username,
-                media=unicode(self._id))
-
-    def url_to_prev(self, urlgen):
-        """
-        Provide a url to the previous entry from this user, if there is one
-        """
-        cursor = self.db.MediaEntry.find({'_id': {"$gt": self._id},
-                                          'uploader': self.uploader,
-                                          'state': 'processed'}).sort(
-                                                    '_id', ASCENDING).limit(1)
-        if cursor.count():
-            return urlgen('mediagoblin.user_pages.media_home',
-                          user=self.get_uploader().username,
-                          media=unicode(cursor[0].slug))
-
-    def url_to_next(self, urlgen):
-        """
-        Provide a url to the next entry from this user, if there is one
-        """
-        cursor = self.db.MediaEntry.find({'_id': {"$lt": self._id},
-                                          'uploader': self.uploader,
-                                          'state': 'processed'}).sort(
-                                                    '_id', DESCENDING).limit(1)
-
-        if cursor.count():
-            return urlgen('mediagoblin.user_pages.media_home',
-                          user=self.get_uploader().username,
-                          media=unicode(cursor[0].slug))
+            # Update old media data
+            for field, value in kwargs.iteritems():
+                setattr(media_data, field, value)
+
+    @memoized_property
+    def media_data_ref(self):
+        return import_component(self.media_type + '.models:BACKREF_NAME')
+
+    def __repr__(self):
+        safe_title = self.title.encode('ascii', 'replace')
+
+        return '<{classname} {id}: {title}>'.format(
+                classname=self.__class__.__name__,
+                id=self.id,
+                title=safe_title)
+
+    def delete(self, del_orphan_tags=True, **kwargs):
+        """Delete MediaEntry and all related files/attachments/comments
+
+        This will *not* automatically delete unused collections, which
+        can remain empty...
+
+        :param del_orphan_tags: True/false if we delete unused Tags too
+        :param commit: True/False if this should end the db transaction"""
+        # User's CollectionItems are automatically deleted via "cascade".
+        # Comments on this Media are deleted by cascade, hopefully.
+
+        # Delete all related files/attachments
+        try:
+            delete_media_files(self)
+        except OSError, error:
+            # Returns list of files we failed to delete
+            _log.error('No such files from the user "{1}" to delete: '
+                       '{0}'.format(str(error), self.get_uploader))
+        _log.info('Deleted Media entry id "{0}"'.format(self.id))
+        # Related MediaTag's are automatically cleaned, but we might
+        # want to clean out unused Tag's too.
+        if del_orphan_tags:
+            # TODO: Import here due to cyclic imports!!!
+            #       This cries for refactoring
+            from mediagoblin.db.util import clean_orphan_tags
+            clean_orphan_tags(commit=False)
+        # pass through commit=False/True in kwargs
+        super(MediaEntry, self).delete(**kwargs)
+
+
+class FileKeynames(Base):
+    """
+    keywords for various places.
+    currently the MediaFile keys
+    """
+    __tablename__ = "core__file_keynames"
+    id = Column(Integer, primary_key=True)
+    name = Column(Unicode, unique=True)
 
-    def get_uploader(self):
-        return self.db.User.find_one({'_id': self.uploader})
+    def __repr__(self):
+        return "<FileKeyname %r: %r>" % (self.id, self.name)
 
-    def get_fail_exception(self):
-        """
-        Get the exception that's appropriate for this error
-        """
-        if self['fail_error']:
-            return common.import_component(self['fail_error'])
+    @classmethod
+    def find_or_new(cls, name):
+        t = cls.query.filter_by(name=name).first()
+        if t is not None:
+            return t
+        return cls(name=name)
 
 
-class MediaComment(Document):
+class MediaFile(Base):
     """
-    A comment on a MediaEntry.
-
-    Structure:
-     - media_entry: The media entry this comment is attached to
-     - author: user who posted this comment
-     - created: when the comment was created
-     - content: plaintext (but markdown'able) version of the comment's content.
-     - content_html: the actual html-rendered version of the comment displayed.
-       Run through Markdown and the HTML cleaner.
+    TODO: Highly consider moving "name" into a new table.
+    TODO: Consider preloading said table in software
     """
+    __tablename__ = "core__mediafiles"
+
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id),
+        nullable=False)
+    name_id = Column(SmallInteger, ForeignKey(FileKeynames.id), nullable=False)
+    file_path = Column(PathTupleWithSlashes)
+    file_metadata = Column(JSONEncoded)
+
+    __table_args__ = (
+        PrimaryKeyConstraint('media_entry', 'name_id'),
+        {})
+
+    def __repr__(self):
+        return "<MediaFile %s: %r>" % (self.name, self.file_path)
+
+    name_helper = relationship(FileKeynames, lazy="joined", innerjoin=True)
+    name = association_proxy('name_helper', 'name',
+        creator=FileKeynames.find_or_new
+        )
+
+
+class MediaAttachmentFile(Base):
+    __tablename__ = "core__attachment_files"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id),
+        nullable=False)
+    name = Column(Unicode, nullable=False)
+    filepath = Column(PathTupleWithSlashes)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+
+    @property
+    def dict_view(self):
+        """A dict like view on this object"""
+        return DictReadAttrProxy(self)
+
+
+class Tag(Base):
+    __tablename__ = "core__tags"
+
+    id = Column(Integer, primary_key=True)
+    slug = Column(Unicode, nullable=False, unique=True)
+
+    def __repr__(self):
+        return "<Tag %r: %r>" % (self.id, self.slug)
+
+    @classmethod
+    def find_or_new(cls, slug):
+        t = cls.query.filter_by(slug=slug).first()
+        if t is not None:
+            return t
+        return cls(slug=slug)
+
+
+class MediaTag(Base):
+    __tablename__ = "core__media_tags"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id),
+        nullable=False, index=True)
+    tag = Column(Integer, ForeignKey(Tag.id), nullable=False, index=True)
+    name = Column(Unicode)
+    # created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+
+    __table_args__ = (
+        UniqueConstraint('tag', 'media_entry'),
+        {})
+
+    tag_helper = relationship(Tag)
+    slug = association_proxy('tag_helper', 'slug',
+        creator=Tag.find_or_new
+        )
+
+    def __init__(self, name=None, slug=None):
+        Base.__init__(self)
+        if name is not None:
+            self.name = name
+        if slug is not None:
+            self.tag_helper = Tag.find_or_new(slug)
+
+    @property
+    def dict_view(self):
+        """A dict like view on this object"""
+        return DictReadAttrProxy(self)
+
+
+class MediaComment(Base, MediaCommentMixin):
+    __tablename__ = "core__media_comments"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id), nullable=False, index=True)
+    author = Column(Integer, ForeignKey(User.id), nullable=False)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    content = Column(UnicodeText, nullable=False)
+
+    # Cascade: Comments are owned by their creator. So do the full thing.
+    # lazy=dynamic: People might post a *lot* of comments,
+    #     so make the "posted_comments" a query-like thing.
+    get_author = relationship(User,
+                              backref=backref("posted_comments",
+                                              lazy="dynamic",
+                                              cascade="all, delete-orphan"))
+    get_entry = relationship(MediaEntry,
+                             backref=backref("comments",
+                                             lazy="dynamic",
+                                             cascade="all, delete-orphan"))
+
+    # Cascade: Comments are somewhat owned by their MediaEntry.
+    #     So do the full thing.
+    # lazy=dynamic: MediaEntries might have many comments,
+    #     so make the "all_comments" a query-like thing.
+    get_media_entry = relationship(MediaEntry,
+                                   backref=backref("all_comments",
+                                                   lazy="dynamic",
+                                                   cascade="all, delete-orphan"))
+
+
+class Collection(Base, CollectionMixin):
+    """An 'album' or 'set' of media by a user.
+
+    On deletion, contained CollectionItems get automatically reaped via
+    SQL cascade"""
+    __tablename__ = "core__collections"
+
+    id = Column(Integer, primary_key=True)
+    title = Column(Unicode, nullable=False)
+    slug = Column(Unicode)
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now,
+                     index=True)
+    description = Column(UnicodeText)
+    creator = Column(Integer, ForeignKey(User.id), nullable=False)
+    # TODO: No of items in Collection. Badly named, can we migrate to num_items?
+    items = Column(Integer, default=0)
+
+    # Cascade: Collections are owned by their creator. So do the full thing.
+    get_creator = relationship(User,
+                               backref=backref("collections",
+                                               cascade="all, delete-orphan"))
+
+    __table_args__ = (
+        UniqueConstraint('creator', 'slug'),
+        {})
+
+    def get_collection_items(self, ascending=False):
+        #TODO, is this still needed with self.collection_items being available?
+        order_col = CollectionItem.position
+        if not ascending:
+            order_col = desc(order_col)
+        return CollectionItem.query.filter_by(
+            collection=self.id).order_by(order_col)
+
+
+class CollectionItem(Base, CollectionItemMixin):
+    __tablename__ = "core__collection_items"
+
+    id = Column(Integer, primary_key=True)
+    media_entry = Column(
+        Integer, ForeignKey(MediaEntry.id), nullable=False, index=True)
+    collection = Column(Integer, ForeignKey(Collection.id), nullable=False)
+    note = Column(UnicodeText, nullable=True)
+    added = Column(DateTime, nullable=False, default=datetime.datetime.now)
+    position = Column(Integer)
+
+    # Cascade: CollectionItems are owned by their Collection. So do the full thing.
+    in_collection = relationship(Collection,
+                                 backref=backref(
+                                     "collection_items",
+                                     cascade="all, delete-orphan"))
+
+    get_media_entry = relationship(MediaEntry)
+
+    __table_args__ = (
+        UniqueConstraint('collection', 'media_entry'),
+        {})
+
+    @property
+    def dict_view(self):
+        """A dict like view on this object"""
+        return DictReadAttrProxy(self)
+
+
+class ProcessingMetaData(Base):
+    __tablename__ = 'core__processing_metadata'
+
+    id = Column(Integer, primary_key=True)
+    media_entry_id = Column(Integer, ForeignKey(MediaEntry.id), nullable=False,
+            index=True)
+    media_entry = relationship(MediaEntry,
+            backref=backref('processing_metadata',
+                cascade='all, delete-orphan'))
+    callback_url = Column(Unicode)
+
+    @property
+    def dict_view(self):
+        """A dict like view on this object"""
+        return DictReadAttrProxy(self)
+
+
+class CommentSubscription(Base):
+    __tablename__ = 'core__comment_subscriptions'
+    id = Column(Integer, primary_key=True)
+
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+
+    media_entry_id = Column(Integer, ForeignKey(MediaEntry.id), nullable=False)
+    media_entry = relationship(MediaEntry,
+                        backref=backref('comment_subscriptions',
+                                        cascade='all, delete-orphan'))
+
+    user_id = Column(Integer, ForeignKey(User.id), nullable=False)
+    user = relationship(User,
+                        backref=backref('comment_subscriptions',
+                                        cascade='all, delete-orphan'))
+
+    notify = Column(Boolean, nullable=False, default=True)
+    send_email = Column(Boolean, nullable=False, default=True)
+
+    def __repr__(self):
+        return ('<{classname} #{id}: {user} {media} notify: '
+                '{notify} email: {email}>').format(
+            id=self.id,
+            classname=self.__class__.__name__,
+            user=self.user,
+            media=self.media_entry,
+            notify=self.notify,
+            email=self.send_email)
+
+
+class Notification(Base):
+    __tablename__ = 'core__notifications'
+    id = Column(Integer, primary_key=True)
+    type = Column(Unicode)
+
+    created = Column(DateTime, nullable=False, default=datetime.datetime.now)
+
+    user_id = Column(Integer, ForeignKey('core__users.id'), nullable=False,
+                     index=True)
+    seen = Column(Boolean, default=lambda: False, index=True)
+    user = relationship(
+        User,
+        backref=backref('notifications', cascade='all, delete-orphan'))
+
+    __mapper_args__ = {
+        'polymorphic_identity': 'notification',
+        'polymorphic_on': type
+    }
+
+    def __repr__(self):
+        return '<{klass} #{id}: {user}: {subject} ({seen})>'.format(
+            id=self.id,
+            klass=self.__class__.__name__,
+            user=self.user,
+            subject=getattr(self, 'subject', None),
+            seen='unseen' if not self.seen else 'seen')
+
+
+class CommentNotification(Notification):
+    __tablename__ = 'core__comment_notifications'
+    id = Column(Integer, ForeignKey(Notification.id), primary_key=True)
+
+    subject_id = Column(Integer, ForeignKey(MediaComment.id))
+    subject = relationship(
+        MediaComment,
+        backref=backref('comment_notifications', cascade='all, delete-orphan'))
+
+    __mapper_args__ = {
+        'polymorphic_identity': 'comment_notification'
+    }
 
-    __collection__ = 'media_comments'
-    use_dot_notation = True
 
-    structure = {
-        'media_entry': ObjectId,
-        'author': ObjectId,
-        'created': datetime.datetime,
-        'content': unicode,
-        'content_html': unicode}
+class ProcessingNotification(Notification):
+    __tablename__ = 'core__processing_notifications'
+
+    id = Column(Integer, ForeignKey(Notification.id), primary_key=True)
+
+    subject_id = Column(Integer, ForeignKey(MediaEntry.id))
+    subject = relationship(
+        MediaEntry,
+        backref=backref('processing_notifications',
+                        cascade='all, delete-orphan'))
+
+    __mapper_args__ = {
+        'polymorphic_identity': 'processing_notification'
+    }
+
+
+with_polymorphic(
+    Notification,
+    [ProcessingNotification, CommentNotification])
+
+MODELS = [
+    User, Client, RequestToken, AccessToken, NonceTimestamp, MediaEntry, Tag,
+    MediaTag, MediaComment, Collection, CollectionItem, MediaFile, FileKeynames,
+    MediaAttachmentFile, ProcessingMetaData, Notification, CommentNotification,
+    ProcessingNotification, CommentSubscription]
+
+"""
+ Foundations are the default rows that are created immediately after the tables
+ are initialized. Each entry to  this dictionary should be in the format of:
+                 ModelConstructorObject:List of Dictionaries
+ (Each Dictionary represents a row on the Table to be created, containing each
+  of the columns' names as a key string, and each of the columns' values as a
+  value)
+
+ ex. [NOTE THIS IS NOT BASED OFF OF OUR USER TABLE]
+    user_foundations = [{'name':u'Joanna', 'age':24},
+                        {'name':u'Andrea', 'age':41}]
+
+    FOUNDATIONS = {User:user_foundations}
+"""
+FOUNDATIONS = {}
+
+######################################################
+# Special, migrations-tracking table
+#
+# Not listed in MODELS because this is special and not
+# really migrated, but used for migrations (for now)
+######################################################
 
-    required_fields = [
-        'media_entry', 'author', 'created', 'content']
+class MigrationData(Base):
+    __tablename__ = "core__migrations"
 
-    default_values = {
-        'created': datetime.datetime.utcnow}
+    name = Column(Unicode, primary_key=True)
+    version = Column(Integer, nullable=False, default=0)
 
-    def media_entry(self):
-        return self.db.MediaEntry.find_one({'_id': self['media_entry']})
+######################################################
 
-    def author(self):
-        return self.db.User.find_one({'_id': self['author']})
 
+def show_table_init(engine_uri):
+    if engine_uri is None:
+        engine_uri = 'sqlite:///:memory:'
+    from sqlalchemy import create_engine
+    engine = create_engine(engine_uri, echo=True)
 
-REGISTER_MODELS = [
-    MediaEntry,
-    User,
-    MediaComment]
+    Base.metadata.create_all(engine)
 
 
-def register_models(connection):
-    """
-    Register all models in REGISTER_MODELS with this connection.
-    """
-    connection.register(REGISTER_MODELS)
+if __name__ == '__main__':
+    from sys import argv
+    print repr(argv)
+    if len(argv) == 2:
+        uri = argv[1]
+    else:
+        uri = None
+    show_table_init(uri)