[mediagoblin.git] / mediagoblin / db / models.py

# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011 Free Software Foundation, Inc
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import datetime, uuid

from mongokit import Document

from mediagoblin import util
from mediagoblin.auth import lib as auth_lib
from mediagoblin import mg_globals
from mediagoblin.db import migrations
from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId
from mediagoblin.util import Pagination
from mediagoblin.util import DISPLAY_IMAGE_FETCHING_ORDER


###################
# Custom validators
###################

########
# Models
########


class User(Document):
    """
    A user of MediaGoblin.

    Structure:
     - username: The username of this user, should be unique to this instance.
     - email: Email address of this user
     - created: When the user was created
     - plugin_data: a mapping of extra plugin information for this User.
       Nothing uses this yet as we don't have plugins, but someday we
       might... :)
     - pw_hash: Hashed version of user's password.
     - email_verified: Whether or not the user has verified their email or not.
       Most parts of the site are disabled for users who haven't yet.
     - status: whether or not the user is active, etc.  Currently only has two
       values, 'needs_email_verification' or 'active'.  (In the future, maybe
       we'll change this to a boolean with a key of 'active' and have a
       separate field for a reason the user's been disabled if that's
       appropriate... email_verified is already separate, after all.)
     - verification_key: If the user is awaiting email verification, the user
       will have to provide this key (which will be encoded in the presented
       URL) in order to confirm their email as active.
     - is_admin: Whether or not this user is an administrator or not.
     - url: this user's personal webpage/website, if appropriate.
     - bio: biography of this user (plaintext, in markdown)
     - bio_html: biography of the user converted to proper HTML.
    """
    __collection__ = 'users'

    structure = {
        'username': unicode,
        'email': unicode,
        'created': datetime.datetime,
        'plugin_data': dict, # plugins can dump stuff here.
        'pw_hash': unicode,
        'email_verified': bool,
        'status': unicode,
        'verification_key': unicode,
        'is_admin': bool,
        'url' : unicode,
        'bio' : unicode,     # May contain markdown
        'bio_html': unicode, # May contain plaintext, or HTML
        }

    required_fields = ['username', 'created', 'pw_hash', 'email']

    default_values = {
        'created': datetime.datetime.utcnow,
        'email_verified': False,
        'status': u'needs_email_verification',
        'verification_key': lambda: unicode(uuid.uuid4()),
        'is_admin': False}

    def check_login(self, password):
        """
        See if a user can login with this password
        """
        return auth_lib.bcrypt_check_password(
            password, self['pw_hash'])


class MediaEntry(Document):
    """
    Record of a piece of media.

    Structure:
     - uploader: A reference to a User who uploaded this.

     - title: Title of this work

     - slug: A normalized "slug" which can be used as part of a URL to retrieve
       this work, such as 'my-works-name-in-slug-form' may be viewable by
       'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
       Note that since URLs are constructed this way, slugs must be unique
       per-uploader.  (An index is provided to enforce that but code should be
       written on the python side to ensure this as well.)

     - created: Date and time of when this piece of work was uploaded.

     - description: Uploader-set description of this work.  This can be marked
       up with MarkDown for slight fanciness (links, boldness, italics,
       paragraphs...)

     - description_html: Rendered version of the description, run through
       Markdown and cleaned with our cleaning tool.

     - media_type: What type of media is this?  Currently we only support
       'image' ;)

     - media_data: Extra information that's media-format-dependent.
       For example, images might contain some EXIF data that's not appropriate
       to other formats.  You might store it like:

         mediaentry['media_data']['exif'] = {
             'manufacturer': 'CASIO',
             'model': 'QV-4000',
             'exposure_time': .659}

       Alternately for video you might store:

         # play length in seconds
         mediaentry['media_data']['play_length'] = 340

       ... so what's appropriate here really depends on the media type.

     - plugin_data: a mapping of extra plugin information for this User.
       Nothing uses this yet as we don't have plugins, but someday we
       might... :)

     - tags: A list of tags.  Each tag is stored as a dictionary that has a key
       for the actual name and the normalized name-as-slug, so ultimately this
       looks like:
         [{'name': 'Gully Gardens',
           'slug': 'gully-gardens'},
          {'name': 'Castle Adventure Time?!",
           'slug': 'castle-adventure-time'}]

     - state: What's the state of this file?  Active, inactive, disabled, etc...
       But really for now there are only two states:
        "unprocessed": uploaded but needs to go through processing for display
        "processed": processed and able to be displayed

     - queued_media_file: storage interface style filepath describing a file
       queued for processing.  This is stored in the mg_globals.queue_store
       storage system.

     - queued_task_id: celery task id.  Use this to fetch the task state.

     - media_files: Files relevant to this that have actually been processed
       and are available for various types of display.  Stored like:
         {'thumb': ['dir1', 'dir2', 'pic.png'}

     - attachment_files: A list of "attachment" files, ones that aren't
       critical to this piece of media but may be usefully relevant to people
       viewing the work.  (currently unused.)
    """
    __collection__ = 'media_entries'

    structure = {
        'uploader': ObjectId,
        'title': unicode,
        'slug': unicode,
        'created': datetime.datetime,
        'description': unicode, # May contain markdown/up
        'description_html': unicode, # May contain plaintext, or HTML
        'media_type': unicode,
        'media_data': dict, # extra data relevant to this media_type
        'plugin_data': dict, # plugins can dump stuff here.
        'tags': [dict],
        'state': unicode,

        # For now let's assume there can only be one main file queued
        # at a time
        'queued_media_file': [unicode],
        'queued_task_id': unicode,

        # A dictionary of logical names to filepaths
        'media_files': dict,

        # The following should be lists of lists, in appropriate file
        # record form
        'attachment_files': list}

    required_fields = [
        'uploader', 'created', 'media_type', 'slug']

    default_values = {
        'created': datetime.datetime.utcnow,
        'state': u'unprocessed'}

    def get_comments(self):
        return self.db.MediaComment.find({
                'media_entry': self['_id']}).sort('created', DESCENDING)

    def get_display_media(self, media_map, fetch_order=DISPLAY_IMAGE_FETCHING_ORDER):
        """
        Find the best media for display.

        Args:
        - media_map: a dict like
          {u'image_size': [u'dir1', u'dir2', u'image.jpg']}
        - fetch_order: the order we should try fetching images in

        Returns:
        (media_size, media_path)
        """
        media_sizes = media_map.keys()

        for media_size in DISPLAY_IMAGE_FETCHING_ORDER:
            if media_size in media_sizes:
                return media_map[media_size]

    def main_mediafile(self):
        pass

    def generate_slug(self):
        self['slug'] = util.slugify(self['title'])

        duplicate = mg_globals.database.media_entries.find_one(
            {'slug': self['slug']})

        if duplicate:
            self['slug'] = "%s-%s" % (self['_id'], self['slug'])

    def url_for_self(self, urlgen):
        """
        Generate an appropriate url for ourselves

        Use a slug if we have one, else use our '_id'.
        """
        uploader = self.uploader()

        if self.get('slug'):
            return urlgen(
                'mediagoblin.user_pages.media_home',
                user=uploader['username'],
                media=self['slug'])
        else:
            return urlgen(
                'mediagoblin.user_pages.media_home',
                user=uploader['username'],
                media=unicode(self['_id']))

    def url_to_prev(self, urlgen):
        """
        Provide a url to the previous entry from this user, if there is one
        """
        cursor = self.db.MediaEntry.find({'_id' : {"$gt": self['_id']},
                                          'uploader': self['uploader'],
                                          'state': 'processed'}).sort(
                                                    '_id', ASCENDING).limit(1)
        if cursor.count():
            return urlgen('mediagoblin.user_pages.media_home',
                          user=self.uploader()['username'],
                          media=unicode(cursor[0]['slug']))

    def url_to_next(self, urlgen):
        """
        Provide a url to the next entry from this user, if there is one
        """
        cursor = self.db.MediaEntry.find({'_id' : {"$lt": self['_id']},
                                          'uploader': self['uploader'],
                                          'state': 'processed'}).sort(
                                                    '_id', DESCENDING).limit(1)

        if cursor.count():
            return urlgen('mediagoblin.user_pages.media_home',
                          user=self.uploader()['username'],
                          media=unicode(cursor[0]['slug']))

    def uploader(self):
        return self.db.User.find_one({'_id': self['uploader']})


class MediaComment(Document):
    """
    A comment on a MediaEntry.

    Structure:
     - media_entry: The media entry this comment is attached to
     - author: user who posted this comment
     - created: when the comment was created
     - content: plaintext (but markdown'able) version of the comment's content.
     - content_html: the actual html-rendered version of the comment displayed.
       Run through Markdown and the HTML cleaner.
    """

    __collection__ = 'media_comments'

    structure = {
        'media_entry': ObjectId,
        'author': ObjectId,
        'created': datetime.datetime,
        'content': unicode,
        'content_html': unicode}

    required_fields = [
        'media_entry', 'author', 'created', 'content']

    default_values = {
        'created': datetime.datetime.utcnow}

    def media_entry(self):
        return self.db.MediaEntry.find_one({'_id': self['media_entry']})

    def author(self):
        return self.db.User.find_one({'_id': self['author']})


REGISTER_MODELS = [
    MediaEntry,
    User,
    MediaComment]


def register_models(connection):
    """
    Register all models in REGISTER_MODELS with this connection.
    """
    connection.register(REGISTER_MODELS)
Commit	Line	Data
8e1e744d	1	# GNU MediaGoblin -- federated, autonomous media hosting
e5572c60 ML	2	# Copyright (C) 2011 Free Software Foundation, Inc
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU Affero General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU Affero General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
db1a438f	17	import datetime, uuid
4ad5af85	18
c2ddd85e	19	from mongokit import Document
4329be14	20
0546833c	21	from mediagoblin import util
4ad5af85	22	from mediagoblin.auth import lib as auth_lib
6e7ce8d1	23	from mediagoblin import mg_globals
757f37a5	24	from mediagoblin.db import migrations
9c0fe63f	25	from mediagoblin.db.util import ASCENDING, DESCENDING, ObjectId
7bd8197f	26	from mediagoblin.util import Pagination
2c9e635a	27	from mediagoblin.util import DISPLAY_IMAGE_FETCHING_ORDER
d232e0f6	28
d232e0f6	29
7bf3f5db CAW	30	###################
	31	# Custom validators
	32	###################
	33
	34	########
	35	# Models
	36	########
	37
	38
d232e0f6	39	class User(Document):
16bcd1e7 CAW	40	"""
	41	A user of MediaGoblin.
	42
	43	Structure:
	44	- username: The username of this user, should be unique to this instance.
	45	- email: Email address of this user
	46	- created: When the user was created
	47	- plugin_data: a mapping of extra plugin information for this User.
	48	Nothing uses this yet as we don't have plugins, but someday we
	49	might... :)
	50	- pw_hash: Hashed version of user's password.
	51	- email_verified: Whether or not the user has verified their email or not.
	52	Most parts of the site are disabled for users who haven't yet.
	53	- status: whether or not the user is active, etc. Currently only has two
	54	values, 'needs_email_verification' or 'active'. (In the future, maybe
	55	we'll change this to a boolean with a key of 'active' and have a
	56	separate field for a reason the user's been disabled if that's
	57	appropriate... email_verified is already separate, after all.)
	58	- verification_key: If the user is awaiting email verification, the user
	59	will have to provide this key (which will be encoded in the presented
	60	URL) in order to confirm their email as active.
	61	- is_admin: Whether or not this user is an administrator or not.
	62	- url: this user's personal webpage/website, if appropriate.
	63	- bio: biography of this user (plaintext, in markdown)
	64	- bio_html: biography of the user converted to proper HTML.
	65	"""
73a6e206 CAW	66	__collection__ = 'users'
73a6e206 CAW	67
d232e0f6 CAW	68	structure = {
d232e0f6 CAW	69	'username': unicode,
24181820	70	'email': unicode,
d232e0f6 CAW	71	'created': datetime.datetime,
	72	'plugin_data': dict, # plugins can dump stuff here.
	73	'pw_hash': unicode,
24181820	74	'email_verified': bool,
4d75522b	75	'status': unicode,
18cf34d4 CAW	76	'verification_key': unicode,
18cf34d4 CAW	77	'is_admin': bool,
630b57a3	78	'url' : unicode,
4c465852 AW	79	'bio' : unicode, # May contain markdown
4c465852 AW	80	'bio_html': unicode, # May contain plaintext, or HTML
d232e0f6 CAW	81	}
d232e0f6 CAW	82
db5912e3	83	required_fields = ['username', 'created', 'pw_hash', 'email']
fc9bb821 CAW	84
fc9bb821 CAW	85	default_values = {
24181820	86	'created': datetime.datetime.utcnow,
4d75522b	87	'email_verified': False,
db1a438f	88	'status': u'needs_email_verification',
18cf34d4 CAW	89	'verification_key': lambda: unicode(uuid.uuid4()),
18cf34d4 CAW	90	'is_admin': False}
080a81ec	91
4ad5af85 CAW	92	def check_login(self, password):
	93	"""
	94	See if a user can login with this password
	95	"""
	96	return auth_lib.bcrypt_check_password(
	97	password, self['pw_hash'])
	98
d232e0f6	99
4d75522b	100	class MediaEntry(Document):
080a81ec CAW	101	"""
	102	Record of a piece of media.
	103
	104	Structure:
	105	- uploader: A reference to a User who uploaded this.
	106
	107	- title: Title of this work
	108
	109	- slug: A normalized "slug" which can be used as part of a URL to retrieve
	110	this work, such as 'my-works-name-in-slug-form' may be viewable by
	111	'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
	112	Note that since URLs are constructed this way, slugs must be unique
	113	per-uploader. (An index is provided to enforce that but code should be
	114	written on the python side to ensure this as well.)
	115
	116	- created: Date and time of when this piece of work was uploaded.
	117
	118	- description: Uploader-set description of this work. This can be marked
	119	up with MarkDown for slight fanciness (links, boldness, italics,
	120	paragraphs...)
	121
	122	- description_html: Rendered version of the description, run through
	123	Markdown and cleaned with our cleaning tool.
	124
	125	- media_type: What type of media is this? Currently we only support
	126	'image' ;)
	127
	128	- media_data: Extra information that's media-format-dependent.
	129	For example, images might contain some EXIF data that's not appropriate
	130	to other formats. You might store it like:
	131
	132	mediaentry['media_data']['exif'] = {
	133	'manufacturer': 'CASIO',
	134	'model': 'QV-4000',
	135	'exposure_time': .659}
	136
	137	Alternately for video you might store:
	138
	139	# play length in seconds
	140	mediaentry['media_data']['play_length'] = 340
	141
	142	... so what's appropriate here really depends on the media type.
	143
	144	- plugin_data: a mapping of extra plugin information for this User.
	145	Nothing uses this yet as we don't have plugins, but someday we
	146	might... :)
	147
	148	- tags: A list of tags. Each tag is stored as a dictionary that has a key
	149	for the actual name and the normalized name-as-slug, so ultimately this
	150	looks like:
	151	[{'name': 'Gully Gardens',
	152	'slug': 'gully-gardens'},
	153	{'name': 'Castle Adventure Time?!",
	154	'slug': 'castle-adventure-time'}]
	155
	156	- state: What's the state of this file? Active, inactive, disabled, etc...
	157	But really for now there are only two states:
	158	"unprocessed": uploaded but needs to go through processing for display
	159	"processed": processed and able to be displayed
	160
	161	- queued_media_file: storage interface style filepath describing a file
	162	queued for processing. This is stored in the mg_globals.queue_store
	163	storage system.
	164
6b9ee0ca CAW	165	- queued_task_id: celery task id. Use this to fetch the task state.
6b9ee0ca CAW	166
080a81ec CAW	167	- media_files: Files relevant to this that have actually been processed
	168	and are available for various types of display. Stored like:
	169	{'thumb': ['dir1', 'dir2', 'pic.png'}
	170
	171	- attachment_files: A list of "attachment" files, ones that aren't
	172	critical to this piece of media but may be usefully relevant to people
	173	viewing the work. (currently unused.)
080a81ec	174	"""
4d75522b CAW	175	__collection__ = 'media_entries'
	176
	177	structure = {
757f37a5	178	'uploader': ObjectId,
4d75522b	179	'title': unicode,
1013bdaf	180	'slug': unicode,
4d75522b	181	'created': datetime.datetime,
44e2da2f JW	182	'description': unicode, # May contain markdown/up
44e2da2f JW	183	'description_html': unicode, # May contain plaintext, or HTML
4d75522b CAW	184	'media_type': unicode,
	185	'media_data': dict, # extra data relevant to this media_type
	186	'plugin_data': dict, # plugins can dump stuff here.
0712a06d	187	'tags': [dict],
74ae6b11 CAW	188	'state': unicode,
74ae6b11 CAW	189
fa7f9c61 CAW	190	# For now let's assume there can only be one main file queued
	191	# at a time
	192	'queued_media_file': [unicode],
6b9ee0ca	193	'queued_task_id': unicode,
fa7f9c61 CAW	194
	195	# A dictionary of logical names to filepaths
	196	'media_files': dict,
	197
74ae6b11 CAW	198	# The following should be lists of lists, in appropriate file
74ae6b11 CAW	199	# record form
84abd2bb	200	'attachment_files': list}
4d75522b CAW	201
4d75522b CAW	202	required_fields = [
b1ae76ae	203	'uploader', 'created', 'media_type', 'slug']
4d75522b CAW	204
4d75522b CAW	205	default_values = {
74ae6b11 CAW	206	'created': datetime.datetime.utcnow,
74ae6b11 CAW	207	'state': u'unprocessed'}
4d75522b	208
6f59a3a3 JW	209	def get_comments(self):
	210	return self.db.MediaComment.find({
	211	'media_entry': self['_id']}).sort('created', DESCENDING)
	212
2c9e635a JW	213	def get_display_media(self, media_map, fetch_order=DISPLAY_IMAGE_FETCHING_ORDER):
	214	"""
	215	Find the best media for display.
	216
	217	Args:
	218	- media_map: a dict like
	219	{u'image_size': [u'dir1', u'dir2', u'image.jpg']}
	220	- fetch_order: the order we should try fetching images in
	221
	222	Returns:
	223	(media_size, media_path)
	224	"""
	225	media_sizes = media_map.keys()
380ac094	226
2c9e635a JW	227	for media_size in DISPLAY_IMAGE_FETCHING_ORDER:
	228	if media_size in media_sizes:
	229	return media_map[media_size]
	230
4d75522b CAW	231	def main_mediafile(self):
4d75522b CAW	232	pass
6f59a3a3	233
0546833c AW	234	def generate_slug(self):
	235	self['slug'] = util.slugify(self['title'])
	236
6e7ce8d1	237	duplicate = mg_globals.database.media_entries.find_one(
f0545dde	238	{'slug': self['slug']})
080a81ec	239
0546833c AW	240	if duplicate:
0546833c AW	241	self['slug'] = "%s-%s" % (self['_id'], self['slug'])
4d75522b	242
6926b23d CAW	243	def url_for_self(self, urlgen):
	244	"""
	245	Generate an appropriate url for ourselves
	246
	247	Use a slug if we have one, else use our '_id'.
	248	"""
16509be1 CAW	249	uploader = self.uploader()
16509be1 CAW	250
6926b23d CAW	251	if self.get('slug'):
	252	return urlgen(
	253	'mediagoblin.user_pages.media_home',
16509be1	254	user=uploader['username'],
6926b23d CAW	255	media=self['slug'])
	256	else:
	257	return urlgen(
	258	'mediagoblin.user_pages.media_home',
16509be1	259	user=uploader['username'],
6926b23d	260	media=unicode(self['_id']))
080a81ec	261
9c0fe63f CFD	262	def url_to_prev(self, urlgen):
	263	"""
	264	Provide a url to the previous entry from this user, if there is one
	265	"""
080a81ec	266	cursor = self.db.MediaEntry.find({'_id' : {"$gt": self['_id']},
ce2ac488 CFD	267	'uploader': self['uploader'],
ce2ac488 CFD	268	'state': 'processed'}).sort(
77b95801	269	'_id', ASCENDING).limit(1)
9c0fe63f CFD	270	if cursor.count():
	271	return urlgen('mediagoblin.user_pages.media_home',
	272	user=self.uploader()['username'],
b1db2c2e	273	media=unicode(cursor[0]['slug']))
080a81ec	274
9c0fe63f CFD	275	def url_to_next(self, urlgen):
	276	"""
	277	Provide a url to the next entry from this user, if there is one
	278	"""
080a81ec	279	cursor = self.db.MediaEntry.find({'_id' : {"$lt": self['_id']},
ce2ac488 CFD	280	'uploader': self['uploader'],
ce2ac488 CFD	281	'state': 'processed'}).sort(
77b95801	282	'_id', DESCENDING).limit(1)
9c0fe63f CFD	283
	284	if cursor.count():
	285	return urlgen('mediagoblin.user_pages.media_home',
	286	user=self.uploader()['username'],
b1db2c2e	287	media=unicode(cursor[0]['slug']))
6926b23d	288
16509be1 CAW	289	def uploader(self):
	290	return self.db.User.find_one({'_id': self['uploader']})
	291
b27ec167	292
c11f21ab	293	class MediaComment(Document):
e83dc091 CAW	294	"""
	295	A comment on a MediaEntry.
	296
	297	Structure:
	298	- media_entry: The media entry this comment is attached to
	299	- author: user who posted this comment
	300	- created: when the comment was created
	301	- content: plaintext (but markdown'able) version of the comment's content.
	302	- content_html: the actual html-rendered version of the comment displayed.
	303	Run through Markdown and the HTML cleaner.
	304	"""
	305
c11f21ab	306	__collection__ = 'media_comments'
6926b23d	307
c11f21ab JW	308	structure = {
	309	'media_entry': ObjectId,
	310	'author': ObjectId,
	311	'created': datetime.datetime,
	312	'content': unicode,
	313	'content_html': unicode}
	314
	315	required_fields = [
7bd8197f	316	'media_entry', 'author', 'created', 'content']
c11f21ab JW	317
	318	default_values = {
	319	'created': datetime.datetime.utcnow}
	320
	321	def media_entry(self):
7bd8197f	322	return self.db.MediaEntry.find_one({'_id': self['media_entry']})
c11f21ab JW	323
	324	def author(self):
	325	return self.db.User.find_one({'_id': self['author']})
6926b23d	326
c2ddd85e	327
c11f21ab JW	328	REGISTER_MODELS = [
	329	MediaEntry,
	330	User,
	331	MediaComment]
d232e0f6	332
4329be14	333
d232e0f6 CAW	334	def register_models(connection):
	335	"""
	336	Register all models in REGISTER_MODELS with this connection.
	337	"""
db61f7d1 CAW	338	connection.register(REGISTER_MODELS)
db61f7d1 CAW	339