mediagoblin/submit/lib.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 import logging
  18 import uuid
  19 from os.path import splitext
  20
  21 import six
  22
  23 from werkzeug.utils import secure_filename
  24 from werkzeug.datastructures import FileStorage
  25
  26 from mediagoblin import mg_globals
  27 from mediagoblin.tools.response import json_response
  28 from mediagoblin.tools.text import convert_to_tag_list_of_dicts
  29 from mediagoblin.tools.federation import create_activity, create_generator
  30 from mediagoblin.db.models import Collection, MediaEntry, ProcessingMetaData
  31 from mediagoblin.processing import mark_entry_failed, get_entry_and_processing_manager
  32 from mediagoblin.processing.task import ProcessMedia
  33 from mediagoblin.notifications import add_comment_subscription
  34 from mediagoblin.media_types import sniff_media
  35 from mediagoblin.user_pages.lib import add_media_to_collection
  36
  37
  38 _log = logging.getLogger(__name__)
  39
  40
  41 def check_file_field(request, field_name):
  42     """Check if a file field meets minimal criteria"""
  43     retval = (field_name in request.files
  44               and isinstance(request.files[field_name], FileStorage)
  45               and request.files[field_name].stream)
  46     if not retval:
  47         _log.debug("Form did not contain proper file field %s", field_name)
  48     return retval
  49
  50
  51 def new_upload_entry(user):
  52     """
  53     Create a new MediaEntry for uploading
  54     """
  55     entry = MediaEntry()
  56     entry.actor = user.id
  57     entry.license = user.license_preference
  58     return entry
  59
  60
  61 def get_upload_file_limits(user):
  62     """
  63     Get the upload_limit and max_file_size for this user
  64     """
  65     if user.upload_limit is not None and user.upload_limit >= 0:  # TODO: debug this
  66         upload_limit = user.upload_limit
  67     else:
  68         upload_limit = mg_globals.app_config.get('upload_limit', None)
  69
  70     max_file_size = mg_globals.app_config.get('max_file_size', None)
  71
  72     return upload_limit, max_file_size
  73
  74
  75 class UploadLimitError(Exception):
  76     """
  77     General exception for when an upload will be over some upload limit
  78     """
  79     pass
  80
  81
  82 class FileUploadLimit(UploadLimitError):
  83     """
  84     This file is over the site upload limit
  85     """
  86     pass
  87
  88
  89 class UserUploadLimit(UploadLimitError):
  90     """
  91     This file is over the user's particular upload limit
  92     """
  93     pass
  94
  95
  96 class UserPastUploadLimit(UploadLimitError):
  97     """
  98     The user is *already* past their upload limit!
  99     """
 100     pass
 101
 102
 103
 104 def submit_media(mg_app, user, submitted_file, filename,
 105                  title=None, description=None, collection_slug=None,
 106                  license=None, metadata=None, tags_string=u"",
 107                  callback_url=None, urlgen=None,):
 108     """
 109     Args:
 110      - mg_app: The MediaGoblinApp instantiated for this process
 111      - user: the user object this media entry should be associated with
 112      - submitted_file: the file-like object that has the
 113        being-submitted file data in it (this object should really have
 114        a .name attribute which is the filename on disk!)
 115      - filename: the *original* filename of this.  Not necessarily the
 116        one on disk being referenced by submitted_file.
 117      - title: title for this media entry
 118      - description: description for this media entry
 119      - collection_slug: collection for this media entry
 120      - license: license for this media entry
 121      - tags_string: comma separated string of tags to be associated
 122        with this entry
 123      - callback_url: possible post-hook to call after submission
 124      - urlgen: if provided, used to do the feed_url update and assign a public
 125                ID used in the API (very important).
 126     """
 127     upload_limit, max_file_size = get_upload_file_limits(user)
 128     if upload_limit and user.uploaded >= upload_limit:
 129         raise UserPastUploadLimit()
 130
 131     # If the filename contains non ascii generate a unique name
 132     if not all(ord(c) < 128 for c in filename):
 133         filename = six.text_type(uuid.uuid4()) + splitext(filename)[-1]
 134
 135     # Sniff the submitted media to determine which
 136     # media plugin should handle processing
 137     media_type, media_manager = sniff_media(submitted_file, filename)
 138
 139     # create entry and save in database
 140     entry = new_upload_entry(user)
 141     entry.media_type = media_type
 142     entry.title = (title or six.text_type(splitext(filename)[0]))
 143
 144     entry.description = description or u""
 145
 146     entry.license = license or None
 147
 148     entry.media_metadata = metadata or {}
 149
 150     # Process the user's folksonomy "tags"
 151     entry.tags = convert_to_tag_list_of_dicts(tags_string)
 152
 153     # Generate a slug from the title
 154     entry.generate_slug()
 155
 156     queue_file = prepare_queue_task(mg_app, entry, filename)
 157
 158     with queue_file:
 159         queue_file.write(submitted_file)
 160
 161     # Get file size and round to 2 decimal places
 162     file_size = mg_app.queue_store.get_file_size(
 163         entry.queued_media_file) / (1024.0 * 1024)
 164     file_size = float('{0:.2f}'.format(file_size))
 165
 166     # Check if file size is over the limit
 167     if max_file_size and file_size >= max_file_size:
 168         raise FileUploadLimit()
 169
 170     # Check if user is over upload limit
 171     if upload_limit and (user.uploaded + file_size) >= upload_limit:
 172         raise UserUploadLimit()
 173
 174     user.uploaded = user.uploaded + file_size
 175     user.save()
 176
 177     entry.file_size = file_size
 178
 179     # Save now so we have this data before kicking off processing
 180     entry.save()
 181
 182     # Various "submit to stuff" things, callbackurl and this silly urlgen
 183     # thing
 184     if callback_url:
 185         metadata = ProcessingMetaData()
 186         metadata.media_entry = entry
 187         metadata.callback_url = callback_url
 188         metadata.save()
 189
 190     if urlgen:
 191         # Generate the public_id, this is very importent, especially relating
 192         # to deletion, it allows the shell to be accessable post-delete!
 193         entry.get_public_id(urlgen)
 194
 195         # Generate the feed URL
 196         feed_url = urlgen(
 197             'mediagoblin.user_pages.atom_feed',
 198             qualified=True, user=user.username)
 199     else:
 200         feed_url = None
 201
 202     add_comment_subscription(user, entry)
 203
 204     # Create activity
 205     create_activity("post", entry, entry.actor)
 206     entry.save()
 207
 208     # add to collection
 209     if collection_slug:
 210         collection = Collection.query.filter_by(slug=collection_slug,
 211                                                 actor=user.id).first()
 212         if collection:
 213             add_media_to_collection(collection, entry)
 214
 215     # Pass off to processing
 216     #
 217     # (... don't change entry after this point to avoid race
 218     # conditions with changes to the document via processing code)
 219     run_process_media(entry, feed_url)
 220
 221     return entry
 222
 223
 224 def prepare_queue_task(app, entry, filename):
 225     """
 226     Prepare a MediaEntry for the processing queue and get a queue file
 227     """
 228     # We generate this ourselves so we know what the task id is for
 229     # retrieval later.
 230
 231     # (If we got it off the task's auto-generation, there'd be
 232     # a risk of a race condition when we'd save after sending
 233     # off the task)
 234     task_id = six.text_type(uuid.uuid4())
 235     entry.queued_task_id = task_id
 236
 237     # Now store generate the queueing related filename
 238     queue_filepath = app.queue_store.get_unique_filepath(
 239         ['media_entries',
 240          task_id,
 241          secure_filename(filename)])
 242
 243     # queue appropriately
 244     queue_file = app.queue_store.get_file(
 245         queue_filepath, 'wb')
 246
 247     # Add queued filename to the entry
 248     entry.queued_media_file = queue_filepath
 249
 250     return queue_file
 251
 252
 253 def run_process_media(entry, feed_url=None,
 254                       reprocess_action="initial", reprocess_info=None):
 255     """Process the media asynchronously
 256
 257     :param entry: MediaEntry() instance to be processed.
 258     :param feed_url: A string indicating the feed_url that the PuSH servers
 259         should be notified of. This will be sth like: `request.urlgen(
 260             'mediagoblin.user_pages.atom_feed',qualified=True,
 261             user=request.user.username)`
 262     :param reprocess_action: What particular action should be run.
 263     :param reprocess_info: A dict containing all of the necessary reprocessing
 264         info for the given media_type"""
 265
 266     reprocess_info = reprocess_info or {}
 267     entry, manager = get_entry_and_processing_manager(entry.id)
 268
 269     try:
 270         manager.workflow(entry, manager, feed_url, reprocess_action, reprocess_info)
 271     except BaseException as exc:
 272         # The purpose of this section is because when running in "lazy"
 273         # or always-eager-with-exceptions-propagated celery mode that
 274         # the failure handling won't happen on Celery end.  Since we
 275         # expect a lot of users to run things in this way we have to
 276         # capture stuff here.
 277         #
 278         # ... not completely the diaper pattern because the
 279         # exception is re-raised :)
 280         mark_entry_failed(entry.id, exc)
 281         # re-raise the exception
 282         raise
 283
 284
 285 def api_upload_request(request, file_data, entry):
 286     """ This handles a image upload request """
 287     # Use the same kind of method from mediagoblin/submit/views:submit_start
 288     entry.title = file_data.filename
 289
 290     # This will be set later but currently we just don't have enough information
 291     entry.slug = None
 292
 293     # This is a MUST.
 294     entry.get_public_id(request.urlgen)
 295
 296     queue_file = prepare_queue_task(request.app, entry, file_data.filename)
 297     with queue_file:
 298         queue_file.write(request.data)
 299
 300     entry.save()
 301     return json_response(entry.serialize(request))
 302
 303 def api_add_to_feed(request, entry):
 304     """ Add media to Feed """
 305     feed_url = request.urlgen(
 306         'mediagoblin.user_pages.atom_feed',
 307         qualified=True, user=request.user.username
 308     )
 309
 310     add_comment_subscription(request.user, entry)
 311
 312     # Create activity
 313     activity = create_activity(
 314         verb="post",
 315         obj=entry,
 316         actor=entry.actor,
 317         generator=create_generator(request)
 318     )
 319     entry.save()
 320     run_process_media(entry, feed_url)
 321
 322     return activity