It's 2012 all up in here
[mediagoblin.git] / mediagoblin / db / mongo / models.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 import datetime
18
19 from mongokit import Document
20
21 from mediagoblin import mg_globals
22 from mediagoblin.db.mongo import migrations
23 from mediagoblin.db.mongo.util import ASCENDING, DESCENDING, ObjectId
24 from mediagoblin.tools.pagination import Pagination
25 from mediagoblin.tools import url
26 from mediagoblin.db.mixin import UserMixin, MediaEntryMixin
27
28 ###################
29 # Custom validators
30 ###################
31
32 ########
33 # Models
34 ########
35
36
37 class User(Document, UserMixin):
38 """
39 A user of MediaGoblin.
40
41 Structure:
42 - username: The username of this user, should be unique to this instance.
43 - email: Email address of this user
44 - created: When the user was created
45 - plugin_data: a mapping of extra plugin information for this User.
46 Nothing uses this yet as we don't have plugins, but someday we
47 might... :)
48 - pw_hash: Hashed version of user's password.
49 - email_verified: Whether or not the user has verified their email or not.
50 Most parts of the site are disabled for users who haven't yet.
51 - status: whether or not the user is active, etc. Currently only has two
52 values, 'needs_email_verification' or 'active'. (In the future, maybe
53 we'll change this to a boolean with a key of 'active' and have a
54 separate field for a reason the user's been disabled if that's
55 appropriate... email_verified is already separate, after all.)
56 - verification_key: If the user is awaiting email verification, the user
57 will have to provide this key (which will be encoded in the presented
58 URL) in order to confirm their email as active.
59 - is_admin: Whether or not this user is an administrator or not.
60 - url: this user's personal webpage/website, if appropriate.
61 - bio: biography of this user (plaintext, in markdown)
62 - bio_html: biography of the user converted to proper HTML.
63 """
64 __collection__ = 'users'
65 use_dot_notation = True
66
67 structure = {
68 'username': unicode,
69 'email': unicode,
70 'created': datetime.datetime,
71 'plugin_data': dict, # plugins can dump stuff here.
72 'pw_hash': unicode,
73 'email_verified': bool,
74 'status': unicode,
75 'verification_key': unicode,
76 'is_admin': bool,
77 'url': unicode,
78 'bio': unicode, # May contain markdown
79 'bio_html': unicode, # May contain plaintext, or HTML
80 'fp_verification_key': unicode, # forgotten password verification key
81 'fp_token_expire': datetime.datetime,
82 }
83
84 required_fields = ['username', 'created', 'pw_hash', 'email']
85
86 default_values = {
87 'created': datetime.datetime.utcnow,
88 'email_verified': False,
89 'status': u'needs_email_verification',
90 'is_admin': False}
91
92
93 class MediaEntry(Document, MediaEntryMixin):
94 """
95 Record of a piece of media.
96
97 Structure:
98 - uploader: A reference to a User who uploaded this.
99
100 - title: Title of this work
101
102 - slug: A normalized "slug" which can be used as part of a URL to retrieve
103 this work, such as 'my-works-name-in-slug-form' may be viewable by
104 'http://mg.example.org/u/username/m/my-works-name-in-slug-form/'
105 Note that since URLs are constructed this way, slugs must be unique
106 per-uploader. (An index is provided to enforce that but code should be
107 written on the python side to ensure this as well.)
108
109 - created: Date and time of when this piece of work was uploaded.
110
111 - description: Uploader-set description of this work. This can be marked
112 up with MarkDown for slight fanciness (links, boldness, italics,
113 paragraphs...)
114
115 - description_html: Rendered version of the description, run through
116 Markdown and cleaned with our cleaning tool.
117
118 - media_type: What type of media is this? Currently we only support
119 'image' ;)
120
121 - media_data: Extra information that's media-format-dependent.
122 For example, images might contain some EXIF data that's not appropriate
123 to other formats. You might store it like:
124
125 mediaentry.media_data['exif'] = {
126 'manufacturer': 'CASIO',
127 'model': 'QV-4000',
128 'exposure_time': .659}
129
130 Alternately for video you might store:
131
132 # play length in seconds
133 mediaentry.media_data['play_length'] = 340
134
135 ... so what's appropriate here really depends on the media type.
136
137 - plugin_data: a mapping of extra plugin information for this User.
138 Nothing uses this yet as we don't have plugins, but someday we
139 might... :)
140
141 - tags: A list of tags. Each tag is stored as a dictionary that has a key
142 for the actual name and the normalized name-as-slug, so ultimately this
143 looks like:
144 [{'name': 'Gully Gardens',
145 'slug': 'gully-gardens'},
146 {'name': 'Castle Adventure Time?!",
147 'slug': 'castle-adventure-time'}]
148
149 - state: What's the state of this file? Active, inactive, disabled, etc...
150 But really for now there are only two states:
151 "unprocessed": uploaded but needs to go through processing for display
152 "processed": processed and able to be displayed
153
154 - license: URI for media's license.
155
156 - queued_media_file: storage interface style filepath describing a file
157 queued for processing. This is stored in the mg_globals.queue_store
158 storage system.
159
160 - queued_task_id: celery task id. Use this to fetch the task state.
161
162 - media_files: Files relevant to this that have actually been processed
163 and are available for various types of display. Stored like:
164 {'thumb': ['dir1', 'dir2', 'pic.png'}
165
166 - attachment_files: A list of "attachment" files, ones that aren't
167 critical to this piece of media but may be usefully relevant to people
168 viewing the work. (currently unused.)
169
170 - fail_error: path to the exception raised
171 - fail_metadata:
172 """
173 __collection__ = 'media_entries'
174 use_dot_notation = True
175
176 structure = {
177 'uploader': ObjectId,
178 'title': unicode,
179 'slug': unicode,
180 'created': datetime.datetime,
181 'description': unicode, # May contain markdown/up
182 'description_html': unicode, # May contain plaintext, or HTML
183 'media_type': unicode,
184 'media_data': dict, # extra data relevant to this media_type
185 'plugin_data': dict, # plugins can dump stuff here.
186 'tags': [dict],
187 'state': unicode,
188 'license': unicode,
189
190 # For now let's assume there can only be one main file queued
191 # at a time
192 'queued_media_file': [unicode],
193 'queued_task_id': unicode,
194
195 # A dictionary of logical names to filepaths
196 'media_files': dict,
197
198 # The following should be lists of lists, in appropriate file
199 # record form
200 'attachment_files': list,
201
202 # If things go badly in processing things, we'll store that
203 # data here
204 'fail_error': unicode,
205 'fail_metadata': dict}
206
207 required_fields = [
208 'uploader', 'created', 'media_type', 'slug']
209
210 default_values = {
211 'created': datetime.datetime.utcnow,
212 'state': u'unprocessed'}
213
214 def get_comments(self, ascending=False):
215 if ascending:
216 order = ASCENDING
217 else:
218 order = DESCENDING
219
220 return self.db.MediaComment.find({
221 'media_entry': self._id}).sort('created', order)
222
223 def generate_slug(self):
224 self.slug = url.slugify(self.title)
225
226 duplicate = mg_globals.database.media_entries.find_one(
227 {'slug': self.slug})
228
229 if duplicate:
230 self.slug = "%s-%s" % (self._id, self.slug)
231
232 def url_to_prev(self, urlgen):
233 """
234 Provide a url to the previous entry from this user, if there is one
235 """
236 cursor = self.db.MediaEntry.find({'_id': {"$gt": self._id},
237 'uploader': self.uploader,
238 'state': 'processed'}).sort(
239 '_id', ASCENDING).limit(1)
240 for media in cursor:
241 return media.url_for_self(urlgen)
242
243 def url_to_next(self, urlgen):
244 """
245 Provide a url to the next entry from this user, if there is one
246 """
247 cursor = self.db.MediaEntry.find({'_id': {"$lt": self._id},
248 'uploader': self.uploader,
249 'state': 'processed'}).sort(
250 '_id', DESCENDING).limit(1)
251
252 for media in cursor:
253 return media.url_for_self(urlgen)
254
255 @property
256 def get_uploader(self):
257 return self.db.User.find_one({'_id': self.uploader})
258
259
260 class MediaComment(Document):
261 """
262 A comment on a MediaEntry.
263
264 Structure:
265 - media_entry: The media entry this comment is attached to
266 - author: user who posted this comment
267 - created: when the comment was created
268 - content: plaintext (but markdown'able) version of the comment's content.
269 - content_html: the actual html-rendered version of the comment displayed.
270 Run through Markdown and the HTML cleaner.
271 """
272
273 __collection__ = 'media_comments'
274 use_dot_notation = True
275
276 structure = {
277 'media_entry': ObjectId,
278 'author': ObjectId,
279 'created': datetime.datetime,
280 'content': unicode,
281 'content_html': unicode}
282
283 required_fields = [
284 'media_entry', 'author', 'created', 'content']
285
286 default_values = {
287 'created': datetime.datetime.utcnow}
288
289 def media_entry(self):
290 return self.db.MediaEntry.find_one({'_id': self['media_entry']})
291
292 @property
293 def get_author(self):
294 return self.db.User.find_one({'_id': self['author']})
295
296
297 REGISTER_MODELS = [
298 MediaEntry,
299 User,
300 MediaComment]
301
302
303 def register_models(connection):
304 """
305 Register all models in REGISTER_MODELS with this connection.
306 """
307 connection.register(REGISTER_MODELS)