[mediagoblin.git] / mediagoblin / db / mongo / models.py

# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import datetime

from mongokit import Document

from mediagoblin import mg_globals
from mediagoblin.db.mongo import migrations
from mediagoblin.db.mongo.util import ASCENDING, DESCENDING, ObjectId
from mediagoblin.tools.pagination import Pagination
from mediagoblin.tools import url
from mediagoblin.db.mixin import UserMixin, MediaEntryMixin

###################
# Custom validators
###################

########
# Models
########


class User(Document, UserMixin):
    """
    A user of MediaGoblin.

    Structure:
     - username: The username of this user, should be unique to this instance.
     - email: Email address of this user
     - created: When the user was created
     - plugin_data: a mapping of extra plugin information for this User.
       Nothing uses this yet as we don't have plugins, but someday we
       might... :)
     - pw_hash: Hashed version of user's password.
     - email_verified: Whether or not the user has verified their email or not.
       Most parts of the site are disabled for users who haven't yet.
     - status: whether or not the user is active, etc.  Currently only has two
       values, 'needs_email_verification' or 'active'.  (In the future, maybe
       we'll change this to a boolean with a key of 'active' and have a
       separate field for a reason the user's been disabled if that's
       appropriate... email_verified is already separate, after all.)
     - verification_key: If the user is awaiting email verification, the user
       will have to provide this key (which will be encoded in the presented
       URL) in order to confirm their email as active.
     - is_admin: Whether or not this user is an administrator or not.
     - url: this user's personal webpage/website, if appropriate.
     - bio: biography of this user (plaintext, in markdown)
     - bio_html: biography of the user converted to proper HTML.
    """
    __collection__ = 'users'
    use_dot_notation = True

    structure = {
        'username': unicode,
        'email': unicode,
        'created': datetime.datetime,
        'plugin_data': dict,  # plugins can dump stuff here.
        'pw_hash': unicode,
        'email_verified': bool,
        'status': unicode,
        'verification_key': unicode,
        'is_admin': bool,
        'url': unicode,
        'bio': unicode,      # May contain markdown
        'bio_html': unicode,  # May contain plaintext, or HTML
        'fp_verification_key': unicode,  # forgotten password verification key
        'fp_token_expire': datetime.datetime,
        }

    required_fields = ['username', 'created', 'pw_hash', 'email']

    default_values = {
        'created': datetime.datetime.utcnow,
        'email_verified': False,
        'status': u'needs_email_verification',
        'is_admin': False}


class MediaEntry(Document, MediaEntryMixin):
    """
    Record of a piece of media.

    Structure:
     - uploader: A reference to a User who uploaded this.

     - title: Title of this work

     - slug: A normalized "slug" which can be used as part of a URL to retrieve
       this work, such as 'my-works-name-in-slug-form' may be viewable by
       'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
       Note that since URLs are constructed this way, slugs must be unique
       per-uploader.  (An index is provided to enforce that but code should be
       written on the python side to ensure this as well.)

     - created: Date and time of when this piece of work was uploaded.

     - description: Uploader-set description of this work.  This can be marked
       up with MarkDown for slight fanciness (links, boldness, italics,
       paragraphs...)

     - description_html: Rendered version of the description, run through
       Markdown and cleaned with our cleaning tool.

     - media_type: What type of media is this?  Currently we only support
       'image' ;)

     - media_data: Extra information that's media-format-dependent.
       For example, images might contain some EXIF data that's not appropriate
       to other formats.  You might store it like:

         mediaentry.media_data['exif'] = {
             'manufacturer': 'CASIO',
             'model': 'QV-4000',
             'exposure_time': .659}

       Alternately for video you might store:

         # play length in seconds
         mediaentry.media_data['play_length'] = 340

       ... so what's appropriate here really depends on the media type.

     - plugin_data: a mapping of extra plugin information for this User.
       Nothing uses this yet as we don't have plugins, but someday we
       might... :)

     - tags: A list of tags.  Each tag is stored as a dictionary that has a key
       for the actual name and the normalized name-as-slug, so ultimately this
       looks like:
         [{'name': 'Gully Gardens',
           'slug': 'gully-gardens'},
          {'name': 'Castle Adventure Time?!",
           'slug': 'castle-adventure-time'}]

     - state: What's the state of this file?  Active, inactive, disabled, etc...
       But really for now there are only two states:
        "unprocessed": uploaded but needs to go through processing for display
        "processed": processed and able to be displayed

     - queued_media_file: storage interface style filepath describing a file
       queued for processing.  This is stored in the mg_globals.queue_store
       storage system.

     - queued_task_id: celery task id.  Use this to fetch the task state.

     - media_files: Files relevant to this that have actually been processed
       and are available for various types of display.  Stored like:
         {'thumb': ['dir1', 'dir2', 'pic.png'}

     - attachment_files: A list of "attachment" files, ones that aren't
       critical to this piece of media but may be usefully relevant to people
       viewing the work.  (currently unused.)

     - fail_error: path to the exception raised
     - fail_metadata:
    """
    __collection__ = 'media_entries'
    use_dot_notation = True

    structure = {
        'uploader': ObjectId,
        'title': unicode,
        'slug': unicode,
        'created': datetime.datetime,
        'description': unicode,  # May contain markdown/up
        'description_html': unicode,  # May contain plaintext, or HTML
        'media_type': unicode,
        'media_data': dict,  # extra data relevant to this media_type
        'plugin_data': dict,  # plugins can dump stuff here.
        'tags': [dict],
        'state': unicode,

        # For now let's assume there can only be one main file queued
        # at a time
        'queued_media_file': [unicode],
        'queued_task_id': unicode,

        # A dictionary of logical names to filepaths
        'media_files': dict,

        # The following should be lists of lists, in appropriate file
        # record form
        'attachment_files': list,

        # If things go badly in processing things, we'll store that
        # data here
        'fail_error': unicode,
        'fail_metadata': dict}

    required_fields = [
        'uploader', 'created', 'media_type', 'slug']

    default_values = {
        'created': datetime.datetime.utcnow,
        'state': u'unprocessed'}

    def get_comments(self, ascending=False):
        if ascending:
            order = ASCENDING
        else:
            order = DESCENDING
            
        return self.db.MediaComment.find({
                'media_entry': self._id}).sort('created', order)

    def generate_slug(self):
        self.slug = url.slugify(self.title)

        duplicate = mg_globals.database.media_entries.find_one(
            {'slug': self.slug})

        if duplicate:
            self.slug = "%s-%s" % (self._id, self.slug)

    def url_to_prev(self, urlgen):
        """
        Provide a url to the previous entry from this user, if there is one
        """
        cursor = self.db.MediaEntry.find({'_id': {"$gt": self._id},
                                          'uploader': self.uploader,
                                          'state': 'processed'}).sort(
                                                    '_id', ASCENDING).limit(1)
        for media in cursor:
            return media.url_for_self(urlgen)

    def url_to_next(self, urlgen):
        """
        Provide a url to the next entry from this user, if there is one
        """
        cursor = self.db.MediaEntry.find({'_id': {"$lt": self._id},
                                          'uploader': self.uploader,
                                          'state': 'processed'}).sort(
                                                    '_id', DESCENDING).limit(1)

        for media in cursor:
            return media.url_for_self(urlgen)

    @property
    def get_uploader(self):
        return self.db.User.find_one({'_id': self.uploader})


class MediaComment(Document):
    """
    A comment on a MediaEntry.

    Structure:
     - media_entry: The media entry this comment is attached to
     - author: user who posted this comment
     - created: when the comment was created
     - content: plaintext (but markdown'able) version of the comment's content.
     - content_html: the actual html-rendered version of the comment displayed.
       Run through Markdown and the HTML cleaner.
    """

    __collection__ = 'media_comments'
    use_dot_notation = True

    structure = {
        'media_entry': ObjectId,
        'author': ObjectId,
        'created': datetime.datetime,
        'content': unicode,
        'content_html': unicode}

    required_fields = [
        'media_entry', 'author', 'created', 'content']

    default_values = {
        'created': datetime.datetime.utcnow}

    def media_entry(self):
        return self.db.MediaEntry.find_one({'_id': self['media_entry']})

    @property
    def get_author(self):
        return self.db.User.find_one({'_id': self['author']})


REGISTER_MODELS = [
    MediaEntry,
    User,
    MediaComment]


def register_models(connection):
    """
    Register all models in REGISTER_MODELS with this connection.
    """
    connection.register(REGISTER_MODELS)
Commit	Line	Data
	1	# GNU MediaGoblin -- federated, autonomous media hosting
	2	# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU Affero General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU Affero General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
	17	import datetime
	18
	19	from mongokit import Document
	20
	21	from mediagoblin import mg_globals
	22	from mediagoblin.db.mongo import migrations
	23	from mediagoblin.db.mongo.util import ASCENDING, DESCENDING, ObjectId
	24	from mediagoblin.tools.pagination import Pagination
	25	from mediagoblin.tools import url
	26	from mediagoblin.db.mixin import UserMixin, MediaEntryMixin
	27
	28	###################
	29	# Custom validators
	30	###################
	31
	32	########
	33	# Models
	34	########
	35
	36
	37	class User(Document, UserMixin):
	38	"""
	39	A user of MediaGoblin.
	40
	41	Structure:
	42	- username: The username of this user, should be unique to this instance.
	43	- email: Email address of this user
	44	- created: When the user was created
	45	- plugin_data: a mapping of extra plugin information for this User.
	46	Nothing uses this yet as we don't have plugins, but someday we
	47	might... :)
	48	- pw_hash: Hashed version of user's password.
	49	- email_verified: Whether or not the user has verified their email or not.
	50	Most parts of the site are disabled for users who haven't yet.
	51	- status: whether or not the user is active, etc. Currently only has two
	52	values, 'needs_email_verification' or 'active'. (In the future, maybe
	53	we'll change this to a boolean with a key of 'active' and have a
	54	separate field for a reason the user's been disabled if that's
	55	appropriate... email_verified is already separate, after all.)
	56	- verification_key: If the user is awaiting email verification, the user
	57	will have to provide this key (which will be encoded in the presented
	58	URL) in order to confirm their email as active.
	59	- is_admin: Whether or not this user is an administrator or not.
	60	- url: this user's personal webpage/website, if appropriate.
	61	- bio: biography of this user (plaintext, in markdown)
	62	- bio_html: biography of the user converted to proper HTML.
	63	"""
	64	__collection__ = 'users'
	65	use_dot_notation = True
	66
	67	structure = {
	68	'username': unicode,
	69	'email': unicode,
	70	'created': datetime.datetime,
	71	'plugin_data': dict, # plugins can dump stuff here.
	72	'pw_hash': unicode,
	73	'email_verified': bool,
	74	'status': unicode,
	75	'verification_key': unicode,
	76	'is_admin': bool,
	77	'url': unicode,
	78	'bio': unicode, # May contain markdown
	79	'bio_html': unicode, # May contain plaintext, or HTML
	80	'fp_verification_key': unicode, # forgotten password verification key
	81	'fp_token_expire': datetime.datetime,
	82	}
	83
	84	required_fields = ['username', 'created', 'pw_hash', 'email']
	85
	86	default_values = {
	87	'created': datetime.datetime.utcnow,
	88	'email_verified': False,
	89	'status': u'needs_email_verification',
	90	'is_admin': False}
	91
	92
	93	class MediaEntry(Document, MediaEntryMixin):
	94	"""
	95	Record of a piece of media.
	96
	97	Structure:
	98	- uploader: A reference to a User who uploaded this.
	99
	100	- title: Title of this work
	101
	102	- slug: A normalized "slug" which can be used as part of a URL to retrieve
	103	this work, such as 'my-works-name-in-slug-form' may be viewable by
	104	'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
	105	Note that since URLs are constructed this way, slugs must be unique
	106	per-uploader. (An index is provided to enforce that but code should be
	107	written on the python side to ensure this as well.)
	108
	109	- created: Date and time of when this piece of work was uploaded.
	110
	111	- description: Uploader-set description of this work. This can be marked
	112	up with MarkDown for slight fanciness (links, boldness, italics,
	113	paragraphs...)
	114
	115	- description_html: Rendered version of the description, run through
	116	Markdown and cleaned with our cleaning tool.
	117
	118	- media_type: What type of media is this? Currently we only support
	119	'image' ;)
	120
	121	- media_data: Extra information that's media-format-dependent.
	122	For example, images might contain some EXIF data that's not appropriate
	123	to other formats. You might store it like:
	124
	125	mediaentry.media_data['exif'] = {
	126	'manufacturer': 'CASIO',
	127	'model': 'QV-4000',
	128	'exposure_time': .659}
	129
	130	Alternately for video you might store:
	131
	132	# play length in seconds
	133	mediaentry.media_data['play_length'] = 340
	134
	135	... so what's appropriate here really depends on the media type.
	136
	137	- plugin_data: a mapping of extra plugin information for this User.
	138	Nothing uses this yet as we don't have plugins, but someday we
	139	might... :)
	140
	141	- tags: A list of tags. Each tag is stored as a dictionary that has a key
	142	for the actual name and the normalized name-as-slug, so ultimately this
	143	looks like:
	144	[{'name': 'Gully Gardens',
	145	'slug': 'gully-gardens'},
	146	{'name': 'Castle Adventure Time?!",
	147	'slug': 'castle-adventure-time'}]
	148
	149	- state: What's the state of this file? Active, inactive, disabled, etc...
	150	But really for now there are only two states:
	151	"unprocessed": uploaded but needs to go through processing for display
	152	"processed": processed and able to be displayed
	153
	154	- queued_media_file: storage interface style filepath describing a file
	155	queued for processing. This is stored in the mg_globals.queue_store
	156	storage system.
	157
	158	- queued_task_id: celery task id. Use this to fetch the task state.
	159
	160	- media_files: Files relevant to this that have actually been processed
	161	and are available for various types of display. Stored like:
	162	{'thumb': ['dir1', 'dir2', 'pic.png'}
	163
	164	- attachment_files: A list of "attachment" files, ones that aren't
	165	critical to this piece of media but may be usefully relevant to people
	166	viewing the work. (currently unused.)
	167
	168	- fail_error: path to the exception raised
	169	- fail_metadata:
	170	"""
	171	__collection__ = 'media_entries'
	172	use_dot_notation = True
	173
	174	structure = {
	175	'uploader': ObjectId,
	176	'title': unicode,
	177	'slug': unicode,
	178	'created': datetime.datetime,
	179	'description': unicode, # May contain markdown/up
	180	'description_html': unicode, # May contain plaintext, or HTML
	181	'media_type': unicode,
	182	'media_data': dict, # extra data relevant to this media_type
	183	'plugin_data': dict, # plugins can dump stuff here.
	184	'tags': [dict],
	185	'state': unicode,
	186
	187	# For now let's assume there can only be one main file queued
	188	# at a time
	189	'queued_media_file': [unicode],
	190	'queued_task_id': unicode,
	191
	192	# A dictionary of logical names to filepaths
	193	'media_files': dict,
	194
	195	# The following should be lists of lists, in appropriate file
	196	# record form
	197	'attachment_files': list,
	198
	199	# If things go badly in processing things, we'll store that
	200	# data here
	201	'fail_error': unicode,
	202	'fail_metadata': dict}
	203
	204	required_fields = [
	205	'uploader', 'created', 'media_type', 'slug']
	206
	207	default_values = {
	208	'created': datetime.datetime.utcnow,
	209	'state': u'unprocessed'}
	210
	211	def get_comments(self, ascending=False):
	212	if ascending:
	213	order = ASCENDING
	214	else:
	215	order = DESCENDING
	216
	217	return self.db.MediaComment.find({
	218	'media_entry': self._id}).sort('created', order)
	219
	220	def generate_slug(self):
	221	self.slug = url.slugify(self.title)
	222
	223	duplicate = mg_globals.database.media_entries.find_one(
	224	{'slug': self.slug})
	225
	226	if duplicate:
	227	self.slug = "%s-%s" % (self._id, self.slug)
	228
	229	def url_to_prev(self, urlgen):
	230	"""
	231	Provide a url to the previous entry from this user, if there is one
	232	"""
	233	cursor = self.db.MediaEntry.find({'_id': {"$gt": self._id},
	234	'uploader': self.uploader,
	235	'state': 'processed'}).sort(
	236	'_id', ASCENDING).limit(1)
	237	for media in cursor:
	238	return media.url_for_self(urlgen)
	239
	240	def url_to_next(self, urlgen):
	241	"""
	242	Provide a url to the next entry from this user, if there is one
	243	"""
	244	cursor = self.db.MediaEntry.find({'_id': {"$lt": self._id},
	245	'uploader': self.uploader,
	246	'state': 'processed'}).sort(
	247	'_id', DESCENDING).limit(1)
	248
	249	for media in cursor:
	250	return media.url_for_self(urlgen)
	251
	252	@property
	253	def get_uploader(self):
	254	return self.db.User.find_one({'_id': self.uploader})
	255
	256
	257	class MediaComment(Document):
	258	"""
	259	A comment on a MediaEntry.
	260
	261	Structure:
	262	- media_entry: The media entry this comment is attached to
	263	- author: user who posted this comment
	264	- created: when the comment was created
	265	- content: plaintext (but markdown'able) version of the comment's content.
	266	- content_html: the actual html-rendered version of the comment displayed.
	267	Run through Markdown and the HTML cleaner.
	268	"""
	269
	270	__collection__ = 'media_comments'
	271	use_dot_notation = True
	272
	273	structure = {
	274	'media_entry': ObjectId,
	275	'author': ObjectId,
	276	'created': datetime.datetime,
	277	'content': unicode,
	278	'content_html': unicode}
	279
	280	required_fields = [
	281	'media_entry', 'author', 'created', 'content']
	282
	283	default_values = {
	284	'created': datetime.datetime.utcnow}
	285
	286	def media_entry(self):
	287	return self.db.MediaEntry.find_one({'_id': self['media_entry']})
	288
	289	@property
	290	def get_author(self):
	291	return self.db.User.find_one({'_id': self['author']})
	292
	293
	294	REGISTER_MODELS = [
	295	MediaEntry,
	296	User,
	297	MediaComment]
	298
	299
	300	def register_models(connection):
	301	"""
	302	Register all models in REGISTER_MODELS with this connection.
	303	"""
	304	connection.register(REGISTER_MODELS)