2edea70f193242a2d36f86cd56a02e6984355bcf
[mediagoblin.git] / mediagoblin / submit / lib.py
1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17 import logging
18 import uuid
19 from os.path import splitext
20
21 import six
22
23 from werkzeug.utils import secure_filename
24 from werkzeug.datastructures import FileStorage
25
26 from mediagoblin import mg_globals
27 from mediagoblin.tools.response import json_response
28 from mediagoblin.tools.text import convert_to_tag_list_of_dicts
29 from mediagoblin.tools.federation import create_activity, create_generator
30 from mediagoblin.db.models import MediaEntry, ProcessingMetaData
31 from mediagoblin.processing import mark_entry_failed
32 from mediagoblin.processing.task import ProcessMedia
33 from mediagoblin.notifications import add_comment_subscription
34 from mediagoblin.media_types import sniff_media
35
36
37 _log = logging.getLogger(__name__)
38
39
40 def check_file_field(request, field_name):
41 """Check if a file field meets minimal criteria"""
42 retval = (field_name in request.files
43 and isinstance(request.files[field_name], FileStorage)
44 and request.files[field_name].stream)
45 if not retval:
46 _log.debug("Form did not contain proper file field %s", field_name)
47 return retval
48
49
50 def new_upload_entry(user):
51 """
52 Create a new MediaEntry for uploading
53 """
54 entry = MediaEntry()
55 entry.actor = user.id
56 entry.license = user.license_preference
57 return entry
58
59
60 def get_upload_file_limits(user):
61 """
62 Get the upload_limit and max_file_size for this user
63 """
64 if user.upload_limit is not None and user.upload_limit >= 0: # TODO: debug this
65 upload_limit = user.upload_limit
66 else:
67 upload_limit = mg_globals.app_config.get('upload_limit', None)
68
69 max_file_size = mg_globals.app_config.get('max_file_size', None)
70
71 return upload_limit, max_file_size
72
73
74 class UploadLimitError(Exception):
75 """
76 General exception for when an upload will be over some upload limit
77 """
78 pass
79
80
81 class FileUploadLimit(UploadLimitError):
82 """
83 This file is over the site upload limit
84 """
85 pass
86
87
88 class UserUploadLimit(UploadLimitError):
89 """
90 This file is over the user's particular upload limit
91 """
92 pass
93
94
95 class UserPastUploadLimit(UploadLimitError):
96 """
97 The user is *already* past their upload limit!
98 """
99 pass
100
101
102
103 def submit_media(mg_app, user, submitted_file, filename,
104 title=None, description=None,
105 license=None, metadata=None, tags_string=u"",
106 upload_limit=None, max_file_size=None,
107 callback_url=None, urlgen=None,):
108 """
109 Args:
110 - mg_app: The MediaGoblinApp instantiated for this process
111 - user: the user object this media entry should be associated with
112 - submitted_file: the file-like object that has the
113 being-submitted file data in it (this object should really have
114 a .name attribute which is the filename on disk!)
115 - filename: the *original* filename of this. Not necessarily the
116 one on disk being referenced by submitted_file.
117 - title: title for this media entry
118 - description: description for this media entry
119 - license: license for this media entry
120 - tags_string: comma separated string of tags to be associated
121 with this entry
122 - upload_limit: size in megabytes that's the per-user upload limit
123 - max_file_size: maximum size each file can be that's uploaded
124 - callback_url: possible post-hook to call after submission
125 - urlgen: if provided, used to do the feed_url update and assign a public
126 ID used in the API (very important).
127 """
128 if upload_limit and user.uploaded >= upload_limit:
129 raise UserPastUploadLimit()
130
131 # If the filename contains non ascii generate a unique name
132 if not all(ord(c) < 128 for c in filename):
133 filename = six.text_type(uuid.uuid4()) + splitext(filename)[-1]
134
135 # Sniff the submitted media to determine which
136 # media plugin should handle processing
137 media_type, media_manager = sniff_media(submitted_file, filename)
138
139 # create entry and save in database
140 entry = new_upload_entry(user)
141 entry.media_type = media_type
142 entry.title = (title or six.text_type(splitext(filename)[0]))
143
144 entry.description = description or u""
145
146 entry.license = license or None
147
148 entry.media_metadata = metadata or {}
149
150 # Process the user's folksonomy "tags"
151 entry.tags = convert_to_tag_list_of_dicts(tags_string)
152
153 # Generate a slug from the title
154 entry.generate_slug()
155
156 queue_file = prepare_queue_task(mg_app, entry, filename)
157
158 with queue_file:
159 queue_file.write(submitted_file)
160
161 # Get file size and round to 2 decimal places
162 file_size = mg_app.queue_store.get_file_size(
163 entry.queued_media_file) / (1024.0 * 1024)
164 file_size = float('{0:.2f}'.format(file_size))
165
166 # Check if file size is over the limit
167 if max_file_size and file_size >= max_file_size:
168 raise FileUploadLimit()
169
170 # Check if user is over upload limit
171 if upload_limit and (user.uploaded + file_size) >= upload_limit:
172 raise UserUploadLimit()
173
174 user.uploaded = user.uploaded + file_size
175 user.save()
176
177 entry.file_size = file_size
178
179 # Save now so we have this data before kicking off processing
180 entry.save()
181
182 # Various "submit to stuff" things, callbackurl and this silly urlgen
183 # thing
184 if callback_url:
185 metadata = ProcessingMetaData()
186 metadata.media_entry = entry
187 metadata.callback_url = callback_url
188 metadata.save()
189
190 if urlgen:
191 # Generate the public_id, this is very importent, especially relating
192 # to deletion, it allows the shell to be accessable post-delete!
193 entry.get_public_id(urlgen)
194
195 # Generate the feed URL
196 feed_url = urlgen(
197 'mediagoblin.user_pages.atom_feed',
198 qualified=True, user=user.username)
199 else:
200 feed_url = None
201
202 add_comment_subscription(user, entry)
203
204 # Create activity
205 create_activity("post", entry, entry.actor)
206 entry.save()
207
208 # Pass off to processing
209 #
210 # (... don't change entry after this point to avoid race
211 # conditions with changes to the document via processing code)
212 run_process_media(entry, feed_url)
213
214 return entry
215
216
217 def prepare_queue_task(app, entry, filename):
218 """
219 Prepare a MediaEntry for the processing queue and get a queue file
220 """
221 # We generate this ourselves so we know what the task id is for
222 # retrieval later.
223
224 # (If we got it off the task's auto-generation, there'd be
225 # a risk of a race condition when we'd save after sending
226 # off the task)
227 task_id = six.text_type(uuid.uuid4())
228 entry.queued_task_id = task_id
229
230 # Now store generate the queueing related filename
231 queue_filepath = app.queue_store.get_unique_filepath(
232 ['media_entries',
233 task_id,
234 secure_filename(filename)])
235
236 # queue appropriately
237 queue_file = app.queue_store.get_file(
238 queue_filepath, 'wb')
239
240 # Add queued filename to the entry
241 entry.queued_media_file = queue_filepath
242
243 return queue_file
244
245
246 def run_process_media(entry, feed_url=None,
247 reprocess_action="initial", reprocess_info=None):
248 """Process the media asynchronously
249
250 :param entry: MediaEntry() instance to be processed.
251 :param feed_url: A string indicating the feed_url that the PuSH servers
252 should be notified of. This will be sth like: `request.urlgen(
253 'mediagoblin.user_pages.atom_feed',qualified=True,
254 user=request.user.username)`
255 :param reprocess_action: What particular action should be run.
256 :param reprocess_info: A dict containing all of the necessary reprocessing
257 info for the given media_type"""
258 try:
259 ProcessMedia().apply_async(
260 [entry.id, feed_url, reprocess_action, reprocess_info], {},
261 task_id=entry.queued_task_id)
262 except BaseException as exc:
263 # The purpose of this section is because when running in "lazy"
264 # or always-eager-with-exceptions-propagated celery mode that
265 # the failure handling won't happen on Celery end. Since we
266 # expect a lot of users to run things in this way we have to
267 # capture stuff here.
268 #
269 # ... not completely the diaper pattern because the
270 # exception is re-raised :)
271 mark_entry_failed(entry.id, exc)
272 # re-raise the exception
273 raise
274
275
276 def api_upload_request(request, file_data, entry):
277 """ This handles a image upload request """
278 # Use the same kind of method from mediagoblin/submit/views:submit_start
279 entry.title = file_data.filename
280
281 # This will be set later but currently we just don't have enough information
282 entry.slug = None
283
284 # This is a MUST.
285 entry.get_public_id(request.urlgen)
286
287 queue_file = prepare_queue_task(request.app, entry, file_data.filename)
288 with queue_file:
289 queue_file.write(request.data)
290
291 entry.save()
292 return json_response(entry.serialize(request))
293
294 def api_add_to_feed(request, entry):
295 """ Add media to Feed """
296 feed_url = request.urlgen(
297 'mediagoblin.user_pages.atom_feed',
298 qualified=True, user=request.user.username
299 )
300
301 add_comment_subscription(request.user, entry)
302
303 # Create activity
304 activity = create_activity(
305 verb="post",
306 obj=entry,
307 actor=entry.actor,
308 generator=create_generator(request)
309 )
310 entry.save()
311 run_process_media(entry, feed_url)
312
313 return activity