Merge branch 'metadata'
[mediagoblin.git] / mediagoblin / submit / lib.py
CommitLineData
be1f0f7d
E
1# GNU MediaGoblin -- federated, autonomous media hosting
2# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU Affero General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Affero General Public License for more details.
13#
14# You should have received a copy of the GNU Affero General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
be1f0f7d 17import logging
8eb47d02 18import uuid
1779a070
CAW
19from os.path import splitext
20
8eb47d02 21from werkzeug.utils import secure_filename
2ef2f46e 22from werkzeug.datastructures import FileStorage
be1f0f7d 23
5d754da7 24from mediagoblin import mg_globals
1779a070 25from mediagoblin.tools.text import convert_to_tag_list_of_dicts
131b7495 26from mediagoblin.db.models import MediaEntry, ProcessingMetaData
86bb44ef 27from mediagoblin.processing import mark_entry_failed
b5059525 28from mediagoblin.processing.task import ProcessMedia
9e15c674 29from mediagoblin.notifications import add_comment_subscription
5d754da7 30from mediagoblin.media_types import sniff_media
86bb44ef 31
be1f0f7d
E
32
33_log = logging.getLogger(__name__)
34
35
2ef2f46e
E
36def check_file_field(request, field_name):
37 """Check if a file field meets minimal criteria"""
38 retval = (field_name in request.files
39 and isinstance(request.files[field_name], FileStorage)
40 and request.files[field_name].stream)
41 if not retval:
42 _log.debug("Form did not contain proper file field %s", field_name)
43 return retval
44
45
6c1467d5
E
46def new_upload_entry(user):
47 """
48 Create a new MediaEntry for uploading
49 """
50 entry = MediaEntry()
51 entry.uploader = user.id
52 entry.license = user.license_preference
53 return entry
54
55
5d754da7
CAW
56def get_upload_file_limits(user):
57 """
58 Get the upload_limit and max_file_size for this user
59 """
60 if user.upload_limit >= 0:
61 upload_limit = user.upload_limit
62 else:
63 upload_limit = mg_globals.app_config.get('upload_limit', None)
64
65 max_file_size = mg_globals.app_config.get('max_file_size', None)
66
67 return upload_limit, max_file_size
68
69
9e15c674
CAW
70class UploadLimitError(Exception):
71 """
72 General exception for when an upload will be over some upload limit
73 """
74 pass
75
76
77class FileUploadLimit(UploadLimitError):
78 """
79 This file is over the site upload limit
80 """
81 pass
82
83
84class UserUploadLimit(UploadLimitError):
85 """
86 This file is over the user's particular upload limit
87 """
88 pass
89
90
91class UserPastUploadLimit(UploadLimitError):
92 """
93 The user is *already* past their upload limit!
94 """
95 pass
96
97
98
1779a070
CAW
99def submit_media(mg_app, user, submitted_file, filename,
100 title=None, description=None,
45f426dd 101 license=None, metadata=None, tags_string=u"",
9e15c674 102 upload_limit=None, max_file_size=None,
131b7495 103 callback_url=None,
9e15c674 104 # If provided we'll do the feed_url update, otherwise ignore
131b7495 105 urlgen=None,):
5202924c
CAW
106 """
107 Args:
108 - mg_app: The MediaGoblinApp instantiated for this process
109 - user: the user object this media entry should be associated with
110 - submitted_file: the file-like object that has the
111 being-submitted file data in it (this object should really have
112 a .name attribute which is the filename on disk!)
113 - filename: the *original* filename of this. Not necessarily the
114 one on disk being referenced by submitted_file.
115 - title: title for this media entry
116 - description: description for this media entry
117 - license: license for this media entry
118 - tags_string: comma separated string of tags to be associated
119 with this entry
120 - upload_limit: size in megabytes that's the per-user upload limit
121 - max_file_size: maximum size each file can be that's uploaded
131b7495 122 - callback_url: possible post-hook to call after submission
5202924c
CAW
123 - urlgen: if provided, used to do the feed_url update
124 """
9e15c674
CAW
125 if upload_limit and user.uploaded >= upload_limit:
126 raise UserPastUploadLimit()
127
1779a070
CAW
128 # If the filename contains non ascii generate a unique name
129 if not all(ord(c) < 128 for c in filename):
130 filename = unicode(uuid.uuid4()) + splitext(filename)[-1]
131
132 # Sniff the submitted media to determine which
133 # media plugin should handle processing
301da9ca 134 media_type, media_manager = sniff_media(submitted_file, filename)
1779a070
CAW
135
136 # create entry and save in database
137 entry = new_upload_entry(user)
138 entry.media_type = media_type
cb7716f3 139 entry.title = (title or unicode(splitext(filename)[0]))
1779a070 140
cb7716f3 141 entry.description = description or u""
1779a070
CAW
142
143 entry.license = license or None
144
2daf8ec0 145 entry.media_metadata = metadata or {}
45f426dd 146
1779a070
CAW
147 # Process the user's folksonomy "tags"
148 entry.tags = convert_to_tag_list_of_dicts(tags_string)
149
150 # Generate a slug from the title
151 entry.generate_slug()
152
153 queue_file = prepare_queue_task(mg_app, entry, filename)
154
155 with queue_file:
301da9ca 156 queue_file.write(submitted_file.read())
1779a070
CAW
157
158 # Get file size and round to 2 decimal places
159 file_size = mg_app.queue_store.get_file_size(
160 entry.queued_media_file) / (1024.0 * 1024)
161 file_size = float('{0:.2f}'.format(file_size))
162
1779a070
CAW
163 # Check if file size is over the limit
164 if max_file_size and file_size >= max_file_size:
9e15c674 165 raise FileUploadLimit()
1779a070
CAW
166
167 # Check if user is over upload limit
168 if upload_limit and (user.uploaded + file_size) >= upload_limit:
9e15c674
CAW
169 raise UserUploadLimit()
170
171 user.uploaded = user.uploaded + file_size
172 user.save()
173
174 entry.file_size = file_size
175
176 # Save now so we have this data before kicking off processing
177 entry.save()
178
131b7495
CAW
179 # Various "submit to stuff" things, callbackurl and this silly urlgen
180 # thing
181 if callback_url:
182 metadata = ProcessingMetaData()
183 metadata.media_entry = entry
184 metadata.callback_url = callback_url
185 metadata.save()
186
9e15c674
CAW
187 if urlgen:
188 feed_url = urlgen(
1779a070 189 'mediagoblin.user_pages.atom_feed',
9e15c674
CAW
190 qualified=True, user=user.username)
191 else:
192 feed_url = None
1779a070 193
9e15c674
CAW
194 # Pass off to processing
195 #
196 # (... don't change entry after this point to avoid race
197 # conditions with changes to the document via processing code)
198 run_process_media(entry, feed_url)
1779a070 199
9e15c674 200 add_comment_subscription(user, entry)
1779a070 201
5d754da7
CAW
202 return entry
203
1779a070 204
b228d897
E
205def prepare_queue_task(app, entry, filename):
206 """
207 Prepare a MediaEntry for the processing queue and get a queue file
208 """
cec9648c 209 # We generate this ourselves so we know what the task id is for
8eb47d02
E
210 # retrieval later.
211
212 # (If we got it off the task's auto-generation, there'd be
213 # a risk of a race condition when we'd save after sending
214 # off the task)
215 task_id = unicode(uuid.uuid4())
216 entry.queued_task_id = task_id
217
218 # Now store generate the queueing related filename
b228d897 219 queue_filepath = app.queue_store.get_unique_filepath(
8eb47d02
E
220 ['media_entries',
221 task_id,
222 secure_filename(filename)])
223
224 # queue appropriately
b228d897 225 queue_file = app.queue_store.get_file(
8eb47d02
E
226 queue_filepath, 'wb')
227
228 # Add queued filename to the entry
229 entry.queued_media_file = queue_filepath
230
231 return queue_file
232
233
77ea4c9b 234def run_process_media(entry, feed_url=None,
98d1fa3b 235 reprocess_action="initial", reprocess_info=None):
c7b3d070
SS
236 """Process the media asynchronously
237
238 :param entry: MediaEntry() instance to be processed.
239 :param feed_url: A string indicating the feed_url that the PuSH servers
240 should be notified of. This will be sth like: `request.urlgen(
241 'mediagoblin.user_pages.atom_feed',qualified=True,
9a2c66ca 242 user=request.user.username)`
77ea4c9b
CAW
243 :param reprocess_action: What particular action should be run.
244 :param reprocess_info: A dict containing all of the necessary reprocessing
9a2c66ca 245 info for the given media_type"""
86bb44ef 246 try:
b5059525 247 ProcessMedia().apply_async(
77ea4c9b 248 [entry.id, feed_url, reprocess_action, reprocess_info], {},
86bb44ef
E
249 task_id=entry.queued_task_id)
250 except BaseException as exc:
251 # The purpose of this section is because when running in "lazy"
252 # or always-eager-with-exceptions-propagated celery mode that
253 # the failure handling won't happen on Celery end. Since we
254 # expect a lot of users to run things in this way we have to
255 # capture stuff here.
256 #
257 # ... not completely the diaper pattern because the
258 # exception is re-raised :)
259 mark_entry_failed(entry.id, exc)
260 # re-raise the exception
261 raise