[mediagoblin.git] / mediagoblin / db / mongo / models.py

# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011 MediaGoblin contributors.  See AUTHORS.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import datetime

from mongokit import Document

from mediagoblin import mg_globals
from mediagoblin.db.mongo import migrations
from mediagoblin.db.mongo.util import ASCENDING, DESCENDING, ObjectId
from mediagoblin.tools.pagination import Pagination
from mediagoblin.tools import url
from mediagoblin.db.mixin import UserMixin, MediaEntryMixin

###################
# Custom validators
###################

########
# Models
########


class User(Document, UserMixin):
    """
    A user of MediaGoblin.

    Structure:
     - username: The username of this user, should be unique to this instance.
     - email: Email address of this user
     - created: When the user was created
     - plugin_data: a mapping of extra plugin information for this User.
       Nothing uses this yet as we don't have plugins, but someday we
       might... :)
     - pw_hash: Hashed version of user's password.
     - email_verified: Whether or not the user has verified their email or not.
       Most parts of the site are disabled for users who haven't yet.
     - status: whether or not the user is active, etc.  Currently only has two
       values, 'needs_email_verification' or 'active'.  (In the future, maybe
       we'll change this to a boolean with a key of 'active' and have a
       separate field for a reason the user's been disabled if that's
       appropriate... email_verified is already separate, after all.)
     - verification_key: If the user is awaiting email verification, the user
       will have to provide this key (which will be encoded in the presented
       URL) in order to confirm their email as active.
     - is_admin: Whether or not this user is an administrator or not.
     - url: this user's personal webpage/website, if appropriate.
     - bio: biography of this user (plaintext, in markdown)
     - bio_html: biography of the user converted to proper HTML.
    """
    __collection__ = 'users'
    use_dot_notation = True

    structure = {
        'username': unicode,
        'email': unicode,
        'created': datetime.datetime,
        'plugin_data': dict,  # plugins can dump stuff here.
        'pw_hash': unicode,
        'email_verified': bool,
        'status': unicode,
        'verification_key': unicode,
        'is_admin': bool,
        'url': unicode,
        'bio': unicode,      # May contain markdown
        'bio_html': unicode,  # May contain plaintext, or HTML
        'fp_verification_key': unicode,  # forgotten password verification key
        'fp_token_expire': datetime.datetime,
        }

    required_fields = ['username', 'created', 'pw_hash', 'email']

    default_values = {
        'created': datetime.datetime.utcnow,
        'email_verified': False,
        'status': u'needs_email_verification',
        'is_admin': False}


class MediaEntry(Document, MediaEntryMixin):
    """
    Record of a piece of media.

    Structure:
     - uploader: A reference to a User who uploaded this.

     - title: Title of this work

     - slug: A normalized "slug" which can be used as part of a URL to retrieve
       this work, such as 'my-works-name-in-slug-form' may be viewable by
       'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
       Note that since URLs are constructed this way, slugs must be unique
       per-uploader.  (An index is provided to enforce that but code should be
       written on the python side to ensure this as well.)

     - created: Date and time of when this piece of work was uploaded.

     - description: Uploader-set description of this work.  This can be marked
       up with MarkDown for slight fanciness (links, boldness, italics,
       paragraphs...)

     - description_html: Rendered version of the description, run through
       Markdown and cleaned with our cleaning tool.

     - media_type: What type of media is this?  Currently we only support
       'image' ;)

     - media_data: Extra information that's media-format-dependent.
       For example, images might contain some EXIF data that's not appropriate
       to other formats.  You might store it like:

         mediaentry.media_data['exif'] = {
             'manufacturer': 'CASIO',
             'model': 'QV-4000',
             'exposure_time': .659}

       Alternately for video you might store:

         # play length in seconds
         mediaentry.media_data['play_length'] = 340

       ... so what's appropriate here really depends on the media type.

     - plugin_data: a mapping of extra plugin information for this User.
       Nothing uses this yet as we don't have plugins, but someday we
       might... :)

     - tags: A list of tags.  Each tag is stored as a dictionary that has a key
       for the actual name and the normalized name-as-slug, so ultimately this
       looks like:
         [{'name': 'Gully Gardens',
           'slug': 'gully-gardens'},
          {'name': 'Castle Adventure Time?!",
           'slug': 'castle-adventure-time'}]

     - state: What's the state of this file?  Active, inactive, disabled, etc...
       But really for now there are only two states:
        "unprocessed": uploaded but needs to go through processing for display
        "processed": processed and able to be displayed

     - queued_media_file: storage interface style filepath describing a file
       queued for processing.  This is stored in the mg_globals.queue_store
       storage system.

     - queued_task_id: celery task id.  Use this to fetch the task state.

     - media_files: Files relevant to this that have actually been processed
       and are available for various types of display.  Stored like:
         {'thumb': ['dir1', 'dir2', 'pic.png'}

     - attachment_files: A list of "attachment" files, ones that aren't
       critical to this piece of media but may be usefully relevant to people
       viewing the work.  (currently unused.)

     - fail_error: path to the exception raised
     - fail_metadata:
    """
    __collection__ = 'media_entries'
    use_dot_notation = True

    structure = {
        'uploader': ObjectId,
        'title': unicode,
        'slug': unicode,
        'created': datetime.datetime,
        'description': unicode,  # May contain markdown/up
        'description_html': unicode,  # May contain plaintext, or HTML
        'media_type': unicode,
        'media_data': dict,  # extra data relevant to this media_type
        'plugin_data': dict,  # plugins can dump stuff here.
        'tags': [dict],
        'state': unicode,

        # For now let's assume there can only be one main file queued
        # at a time
        'queued_media_file': [unicode],
        'queued_task_id': unicode,

        # A dictionary of logical names to filepaths
        'media_files': dict,

        # The following should be lists of lists, in appropriate file
        # record form
        'attachment_files': list,

        # If things go badly in processing things, we'll store that
        # data here
        'fail_error': unicode,
        'fail_metadata': dict}

    required_fields = [
        'uploader', 'created', 'media_type', 'slug']

    default_values = {
        'created': datetime.datetime.utcnow,
        'state': u'unprocessed'}

    def get_comments(self, ascending=False):
        if ascending:
            order = ASCENDING
        else:
            order = DESCENDING
            
        return self.db.MediaComment.find({
                'media_entry': self._id}).sort('created', order)

    def generate_slug(self):
        self.slug = url.slugify(self.title)

        duplicate = mg_globals.database.media_entries.find_one(
            {'slug': self.slug})

        if duplicate:
            self.slug = "%s-%s" % (self._id, self.slug)

    def url_to_prev(self, urlgen):
        """
        Provide a url to the previous entry from this user, if there is one
        """
        cursor = self.db.MediaEntry.find({'_id': {"$gt": self._id},
                                          'uploader': self.uploader,
                                          'state': 'processed'}).sort(
                                                    '_id', ASCENDING).limit(1)
        for media in cursor:
            return media.url_for_self(urlgen)

    def url_to_next(self, urlgen):
        """
        Provide a url to the next entry from this user, if there is one
        """
        cursor = self.db.MediaEntry.find({'_id': {"$lt": self._id},
                                          'uploader': self.uploader,
                                          'state': 'processed'}).sort(
                                                    '_id', DESCENDING).limit(1)

        for media in cursor:
            return media.url_for_self(urlgen)

    @property
    def get_uploader(self):
        return self.db.User.find_one({'_id': self.uploader})


class MediaComment(Document):
    """
    A comment on a MediaEntry.

    Structure:
     - media_entry: The media entry this comment is attached to
     - author: user who posted this comment
     - created: when the comment was created
     - content: plaintext (but markdown'able) version of the comment's content.
     - content_html: the actual html-rendered version of the comment displayed.
       Run through Markdown and the HTML cleaner.
    """

    __collection__ = 'media_comments'
    use_dot_notation = True

    structure = {
        'media_entry': ObjectId,
        'author': ObjectId,
        'created': datetime.datetime,
        'content': unicode,
        'content_html': unicode}

    required_fields = [
        'media_entry', 'author', 'created', 'content']

    default_values = {
        'created': datetime.datetime.utcnow}

    def media_entry(self):
        return self.db.MediaEntry.find_one({'_id': self['media_entry']})

    @property
    def get_author(self):
        return self.db.User.find_one({'_id': self['author']})


REGISTER_MODELS = [
    MediaEntry,
    User,
    MediaComment]


def register_models(connection):
    """
    Register all models in REGISTER_MODELS with this connection.
    """
    connection.register(REGISTER_MODELS)
Commit	Line	Data
8e1e744d	1	# GNU MediaGoblin -- federated, autonomous media hosting
12a100e4	2	# Copyright (C) 2011 MediaGoblin contributors. See AUTHORS.
e5572c60 ML	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU Affero General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU Affero General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
243c3843	17	import datetime
4ad5af85	18
c2ddd85e	19	from mongokit import Document
4329be14	20
6e7ce8d1	21	from mediagoblin import mg_globals
faf74067	22	from mediagoblin.db.mongo import migrations
59bd06aa	23	from mediagoblin.db.mongo.util import ASCENDING, DESCENDING, ObjectId
152a3bfa	24	from mediagoblin.tools.pagination import Pagination
f42e49c3 E	25	from mediagoblin.tools import url
f42e49c3 E	26	from mediagoblin.db.mixin import UserMixin, MediaEntryMixin
d232e0f6	27
7bf3f5db CAW	28	###################
	29	# Custom validators
	30	###################
	31
	32	########
	33	# Models
	34	########
	35
	36
f42e49c3	37	class User(Document, UserMixin):
16bcd1e7 CAW	38	"""
	39	A user of MediaGoblin.
	40
	41	Structure:
	42	- username: The username of this user, should be unique to this instance.
	43	- email: Email address of this user
	44	- created: When the user was created
	45	- plugin_data: a mapping of extra plugin information for this User.
	46	Nothing uses this yet as we don't have plugins, but someday we
	47	might... :)
	48	- pw_hash: Hashed version of user's password.
	49	- email_verified: Whether or not the user has verified their email or not.
	50	Most parts of the site are disabled for users who haven't yet.
	51	- status: whether or not the user is active, etc. Currently only has two
	52	values, 'needs_email_verification' or 'active'. (In the future, maybe
	53	we'll change this to a boolean with a key of 'active' and have a
	54	separate field for a reason the user's been disabled if that's
	55	appropriate... email_verified is already separate, after all.)
	56	- verification_key: If the user is awaiting email verification, the user
	57	will have to provide this key (which will be encoded in the presented
	58	URL) in order to confirm their email as active.
	59	- is_admin: Whether or not this user is an administrator or not.
	60	- url: this user's personal webpage/website, if appropriate.
	61	- bio: biography of this user (plaintext, in markdown)
	62	- bio_html: biography of the user converted to proper HTML.
	63	"""
73a6e206	64	__collection__ = 'users'
7cbddc96	65	use_dot_notation = True
73a6e206	66
d232e0f6 CAW	67	structure = {
d232e0f6 CAW	68	'username': unicode,
24181820	69	'email': unicode,
d232e0f6	70	'created': datetime.datetime,
243c3843	71	'plugin_data': dict, # plugins can dump stuff here.
d232e0f6	72	'pw_hash': unicode,
24181820	73	'email_verified': bool,
4d75522b	74	'status': unicode,
18cf34d4 CAW	75	'verification_key': unicode,
18cf34d4 CAW	76	'is_admin': bool,
243c3843 NY	77	'url': unicode,
	78	'bio': unicode, # May contain markdown
	79	'bio_html': unicode, # May contain plaintext, or HTML
	80	'fp_verification_key': unicode, # forgotten password verification key
	81	'fp_token_expire': datetime.datetime,
d232e0f6 CAW	82	}
d232e0f6 CAW	83
db5912e3	84	required_fields = ['username', 'created', 'pw_hash', 'email']
fc9bb821 CAW	85
fc9bb821 CAW	86	default_values = {
24181820	87	'created': datetime.datetime.utcnow,
4d75522b	88	'email_verified': False,
db1a438f	89	'status': u'needs_email_verification',
18cf34d4	90	'is_admin': False}
080a81ec	91
d232e0f6	92
f42e49c3	93	class MediaEntry(Document, MediaEntryMixin):
080a81ec CAW	94	"""
	95	Record of a piece of media.
	96
	97	Structure:
	98	- uploader: A reference to a User who uploaded this.
	99
	100	- title: Title of this work
	101
	102	- slug: A normalized "slug" which can be used as part of a URL to retrieve
	103	this work, such as 'my-works-name-in-slug-form' may be viewable by
	104	'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
	105	Note that since URLs are constructed this way, slugs must be unique
	106	per-uploader. (An index is provided to enforce that but code should be
	107	written on the python side to ensure this as well.)
	108
	109	- created: Date and time of when this piece of work was uploaded.
	110
	111	- description: Uploader-set description of this work. This can be marked
	112	up with MarkDown for slight fanciness (links, boldness, italics,
	113	paragraphs...)
	114
	115	- description_html: Rendered version of the description, run through
	116	Markdown and cleaned with our cleaning tool.
	117
	118	- media_type: What type of media is this? Currently we only support
	119	'image' ;)
	120
	121	- media_data: Extra information that's media-format-dependent.
	122	For example, images might contain some EXIF data that's not appropriate
	123	to other formats. You might store it like:
	124
ddc1cae9	125	mediaentry.media_data['exif'] = {
080a81ec CAW	126	'manufacturer': 'CASIO',
	127	'model': 'QV-4000',
	128	'exposure_time': .659}
	129
	130	Alternately for video you might store:
	131
	132	# play length in seconds
ddc1cae9	133	mediaentry.media_data['play_length'] = 340
080a81ec CAW	134
	135	... so what's appropriate here really depends on the media type.
	136
	137	- plugin_data: a mapping of extra plugin information for this User.
	138	Nothing uses this yet as we don't have plugins, but someday we
	139	might... :)
	140
	141	- tags: A list of tags. Each tag is stored as a dictionary that has a key
	142	for the actual name and the normalized name-as-slug, so ultimately this
	143	looks like:
	144	[{'name': 'Gully Gardens',
	145	'slug': 'gully-gardens'},
	146	{'name': 'Castle Adventure Time?!",
	147	'slug': 'castle-adventure-time'}]
	148
	149	- state: What's the state of this file? Active, inactive, disabled, etc...
	150	But really for now there are only two states:
	151	"unprocessed": uploaded but needs to go through processing for display
	152	"processed": processed and able to be displayed
	153
	154	- queued_media_file: storage interface style filepath describing a file
	155	queued for processing. This is stored in the mg_globals.queue_store
	156	storage system.
	157
6b9ee0ca CAW	158	- queued_task_id: celery task id. Use this to fetch the task state.
6b9ee0ca CAW	159
080a81ec CAW	160	- media_files: Files relevant to this that have actually been processed
	161	and are available for various types of display. Stored like:
	162	{'thumb': ['dir1', 'dir2', 'pic.png'}
	163
	164	- attachment_files: A list of "attachment" files, ones that aren't
	165	critical to this piece of media but may be usefully relevant to people
	166	viewing the work. (currently unused.)
6c50c210	167
243c3843 NY	168	- fail_error: path to the exception raised
243c3843 NY	169	- fail_metadata:
080a81ec	170	"""
4d75522b	171	__collection__ = 'media_entries'
7cbddc96	172	use_dot_notation = True
4d75522b CAW	173
4d75522b CAW	174	structure = {
757f37a5	175	'uploader': ObjectId,
4d75522b	176	'title': unicode,
1013bdaf	177	'slug': unicode,
4d75522b	178	'created': datetime.datetime,
243c3843 NY	179	'description': unicode, # May contain markdown/up
243c3843 NY	180	'description_html': unicode, # May contain plaintext, or HTML
4d75522b	181	'media_type': unicode,
243c3843 NY	182	'media_data': dict, # extra data relevant to this media_type
243c3843 NY	183	'plugin_data': dict, # plugins can dump stuff here.
0712a06d	184	'tags': [dict],
74ae6b11 CAW	185	'state': unicode,
74ae6b11 CAW	186
fa7f9c61 CAW	187	# For now let's assume there can only be one main file queued
	188	# at a time
	189	'queued_media_file': [unicode],
6b9ee0ca	190	'queued_task_id': unicode,
fa7f9c61 CAW	191
	192	# A dictionary of logical names to filepaths
	193	'media_files': dict,
	194
74ae6b11 CAW	195	# The following should be lists of lists, in appropriate file
74ae6b11 CAW	196	# record form
6c50c210 CAW	197	'attachment_files': list,
	198
	199	# If things go badly in processing things, we'll store that
	200	# data here
	201	'fail_error': unicode,
	202	'fail_metadata': dict}
4d75522b CAW	203
4d75522b CAW	204	required_fields = [
b1ae76ae	205	'uploader', 'created', 'media_type', 'slug']
4d75522b CAW	206
4d75522b CAW	207	default_values = {
74ae6b11 CAW	208	'created': datetime.datetime.utcnow,
74ae6b11 CAW	209	'state': u'unprocessed'}
4d75522b	210
e62fc611 PUS	211	def get_comments(self, ascending=False):
	212	if ascending:
	213	order = ASCENDING
	214	else:
	215	order = DESCENDING
	216
6f59a3a3	217	return self.db.MediaComment.find({
e62fc611	218	'media_entry': self._id}).sort('created', order)
6f59a3a3	219
0546833c	220	def generate_slug(self):
5da0bf90	221	self.slug = url.slugify(self.title)
0546833c	222
6e7ce8d1	223	duplicate = mg_globals.database.media_entries.find_one(
5da0bf90	224	{'slug': self.slug})
080a81ec	225
0546833c	226	if duplicate:
5da0bf90	227	self.slug = "%s-%s" % (self._id, self.slug)
4d75522b	228
9c0fe63f CFD	229	def url_to_prev(self, urlgen):
	230	"""
	231	Provide a url to the previous entry from this user, if there is one
	232	"""
eabe6b67	233	cursor = self.db.MediaEntry.find({'_id': {"$gt": self._id},
1ceb4fc8	234	'uploader': self.uploader,
ce2ac488	235	'state': 'processed'}).sort(
77b95801	236	'_id', ASCENDING).limit(1)
0eb649ff E	237	for media in cursor:
0eb649ff E	238	return media.url_for_self(urlgen)
080a81ec	239
9c0fe63f CFD	240	def url_to_next(self, urlgen):
	241	"""
	242	Provide a url to the next entry from this user, if there is one
	243	"""
eabe6b67	244	cursor = self.db.MediaEntry.find({'_id': {"$lt": self._id},
1ceb4fc8	245	'uploader': self.uploader,
ce2ac488	246	'state': 'processed'}).sort(
77b95801	247	'_id', DESCENDING).limit(1)
9c0fe63f	248
0eb649ff E	249	for media in cursor:
0eb649ff E	250	return media.url_for_self(urlgen)
6926b23d	251
05751758	252	@property
30188321	253	def get_uploader(self):
1ceb4fc8	254	return self.db.User.find_one({'_id': self.uploader})
16509be1	255
b27ec167	256
c11f21ab	257	class MediaComment(Document):
e83dc091 CAW	258	"""
	259	A comment on a MediaEntry.
	260
	261	Structure:
	262	- media_entry: The media entry this comment is attached to
	263	- author: user who posted this comment
	264	- created: when the comment was created
	265	- content: plaintext (but markdown'able) version of the comment's content.
	266	- content_html: the actual html-rendered version of the comment displayed.
	267	Run through Markdown and the HTML cleaner.
	268	"""
	269
c11f21ab	270	__collection__ = 'media_comments'
7cbddc96	271	use_dot_notation = True
6926b23d	272
c11f21ab JW	273	structure = {
	274	'media_entry': ObjectId,
	275	'author': ObjectId,
	276	'created': datetime.datetime,
	277	'content': unicode,
	278	'content_html': unicode}
	279
	280	required_fields = [
7bd8197f	281	'media_entry', 'author', 'created', 'content']
c11f21ab JW	282
	283	default_values = {
	284	'created': datetime.datetime.utcnow}
	285
	286	def media_entry(self):
7bd8197f	287	return self.db.MediaEntry.find_one({'_id': self['media_entry']})
c11f21ab	288
fb7dd855 E	289	@property
fb7dd855 E	290	def get_author(self):
c11f21ab	291	return self.db.User.find_one({'_id': self['author']})
6926b23d	292
c2ddd85e	293
c11f21ab JW	294	REGISTER_MODELS = [
	295	MediaEntry,
	296	User,
	297	MediaComment]
d232e0f6	298
4329be14	299
d232e0f6 CAW	300	def register_models(connection):
	301	"""
	302	Register all models in REGISTER_MODELS with this connection.
	303	"""
db61f7d1	304	connection.register(REGISTER_MODELS)