mediagoblin/submit/lib.py

   1 # GNU MediaGoblin -- federated, autonomous media hosting
   2 # Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
   3 #
   4 # This program is free software: you can redistribute it and/or modify
   5 # it under the terms of the GNU Affero General Public License as published by
   6 # the Free Software Foundation, either version 3 of the License, or
   7 # (at your option) any later version.
   8 #
   9 # This program is distributed in the hope that it will be useful,
  10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 # GNU Affero General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17 import logging
  18 import uuid
  19 from os.path import splitext
  20
  21 import six
  22
  23 from werkzeug.utils import secure_filename
  24 from werkzeug.datastructures import FileStorage
  25
  26 from mediagoblin import mg_globals
  27 from mediagoblin.tools.response import json_response
  28 from mediagoblin.tools.text import convert_to_tag_list_of_dicts
  29 from mediagoblin.tools.federation import create_activity
  30 from mediagoblin.db.models import MediaEntry, ProcessingMetaData
  31 from mediagoblin.processing import mark_entry_failed
  32 from mediagoblin.processing.task import ProcessMedia
  33 from mediagoblin.notifications import add_comment_subscription
  34 from mediagoblin.media_types import sniff_media
  35
  36
  37 _log = logging.getLogger(__name__)
  38
  39
  40 def check_file_field(request, field_name):
  41     """Check if a file field meets minimal criteria"""
  42     retval = (field_name in request.files
  43               and isinstance(request.files[field_name], FileStorage)
  44               and request.files[field_name].stream)
  45     if not retval:
  46         _log.debug("Form did not contain proper file field %s", field_name)
  47     return retval
  48
  49
  50 def new_upload_entry(user):
  51     """
  52     Create a new MediaEntry for uploading
  53     """
  54     entry = MediaEntry()
  55     entry.uploader = user.id
  56     entry.license = user.license_preference
  57     return entry
  58
  59
  60 def get_upload_file_limits(user):
  61     """
  62     Get the upload_limit and max_file_size for this user
  63     """
  64     if user.upload_limit is not None and user.upload_limit >= 0:  # TODO: debug this
  65         upload_limit = user.upload_limit
  66     else:
  67         upload_limit = mg_globals.app_config.get('upload_limit', None)
  68
  69     max_file_size = mg_globals.app_config.get('max_file_size', None)
  70
  71     return upload_limit, max_file_size
  72
  73
  74 class UploadLimitError(Exception):
  75     """
  76     General exception for when an upload will be over some upload limit
  77     """
  78     pass
  79
  80
  81 class FileUploadLimit(UploadLimitError):
  82     """
  83     This file is over the site upload limit
  84     """
  85     pass
  86
  87
  88 class UserUploadLimit(UploadLimitError):
  89     """
  90     This file is over the user's particular upload limit
  91     """
  92     pass
  93
  94
  95 class UserPastUploadLimit(UploadLimitError):
  96     """
  97     The user is *already* past their upload limit!
  98     """
  99     pass
 100
 101
 102
 103 def submit_media(mg_app, user, submitted_file, filename,
 104                  title=None, description=None,
 105                  license=None, metadata=None, tags_string=u"",
 106                  upload_limit=None, max_file_size=None,
 107                  callback_url=None,
 108                  # If provided we'll do the feed_url update, otherwise ignore
 109                  urlgen=None,):
 110     """
 111     Args:
 112      - mg_app: The MediaGoblinApp instantiated for this process
 113      - user: the user object this media entry should be associated with
 114      - submitted_file: the file-like object that has the
 115        being-submitted file data in it (this object should really have
 116        a .name attribute which is the filename on disk!)
 117      - filename: the *original* filename of this.  Not necessarily the
 118        one on disk being referenced by submitted_file.
 119      - title: title for this media entry
 120      - description: description for this media entry
 121      - license: license for this media entry
 122      - tags_string: comma separated string of tags to be associated
 123        with this entry
 124      - upload_limit: size in megabytes that's the per-user upload limit
 125      - max_file_size: maximum size each file can be that's uploaded
 126      - callback_url: possible post-hook to call after submission
 127      - urlgen: if provided, used to do the feed_url update
 128     """
 129     if upload_limit and user.uploaded >= upload_limit:
 130         raise UserPastUploadLimit()
 131
 132     # If the filename contains non ascii generate a unique name
 133     if not all(ord(c) < 128 for c in filename):
 134         filename = six.text_type(uuid.uuid4()) + splitext(filename)[-1]
 135
 136     # Sniff the submitted media to determine which
 137     # media plugin should handle processing
 138     media_type, media_manager = sniff_media(submitted_file, filename)
 139
 140     # create entry and save in database
 141     entry = new_upload_entry(user)
 142     entry.media_type = media_type
 143     entry.title = (title or six.text_type(splitext(filename)[0]))
 144
 145     entry.description = description or u""
 146
 147     entry.license = license or None
 148
 149     entry.media_metadata = metadata or {}
 150
 151     # Process the user's folksonomy "tags"
 152     entry.tags = convert_to_tag_list_of_dicts(tags_string)
 153
 154     # Generate a slug from the title
 155     entry.generate_slug()
 156
 157     queue_file = prepare_queue_task(mg_app, entry, filename)
 158
 159     with queue_file:
 160         queue_file.write(submitted_file.read())
 161
 162     # Get file size and round to 2 decimal places
 163     file_size = mg_app.queue_store.get_file_size(
 164         entry.queued_media_file) / (1024.0 * 1024)
 165     file_size = float('{0:.2f}'.format(file_size))
 166
 167     # Check if file size is over the limit
 168     if max_file_size and file_size >= max_file_size:
 169         raise FileUploadLimit()
 170
 171     # Check if user is over upload limit
 172     if upload_limit and (user.uploaded + file_size) >= upload_limit:
 173         raise UserUploadLimit()
 174
 175     user.uploaded = user.uploaded + file_size
 176     user.save()
 177
 178     entry.file_size = file_size
 179
 180     # Save now so we have this data before kicking off processing
 181     entry.save()
 182
 183     # Various "submit to stuff" things, callbackurl and this silly urlgen
 184     # thing
 185     if callback_url:
 186         metadata = ProcessingMetaData()
 187         metadata.media_entry = entry
 188         metadata.callback_url = callback_url
 189         metadata.save()
 190
 191     if urlgen:
 192         feed_url = urlgen(
 193             'mediagoblin.user_pages.atom_feed',
 194             qualified=True, user=user.username)
 195     else:
 196         feed_url = None
 197
 198     # Pass off to processing
 199     #
 200     # (... don't change entry after this point to avoid race
 201     # conditions with changes to the document via processing code)
 202     run_process_media(entry, feed_url)
 203
 204     add_comment_subscription(user, entry)
 205
 206     # Create activity
 207     entry.activity = create_activity("post", entry, entry.uploader).id
 208     entry.save()
 209
 210     return entry
 211
 212
 213 def prepare_queue_task(app, entry, filename):
 214     """
 215     Prepare a MediaEntry for the processing queue and get a queue file
 216     """
 217     # We generate this ourselves so we know what the task id is for
 218     # retrieval later.
 219
 220     # (If we got it off the task's auto-generation, there'd be
 221     # a risk of a race condition when we'd save after sending
 222     # off the task)
 223     task_id = six.text_type(uuid.uuid4())
 224     entry.queued_task_id = task_id
 225
 226     # Now store generate the queueing related filename
 227     queue_filepath = app.queue_store.get_unique_filepath(
 228         ['media_entries',
 229          task_id,
 230          secure_filename(filename)])
 231
 232     # queue appropriately
 233     queue_file = app.queue_store.get_file(
 234         queue_filepath, 'wb')
 235
 236     # Add queued filename to the entry
 237     entry.queued_media_file = queue_filepath
 238
 239     return queue_file
 240
 241
 242 def run_process_media(entry, feed_url=None,
 243                       reprocess_action="initial", reprocess_info=None):
 244     """Process the media asynchronously
 245
 246     :param entry: MediaEntry() instance to be processed.
 247     :param feed_url: A string indicating the feed_url that the PuSH servers
 248         should be notified of. This will be sth like: `request.urlgen(
 249             'mediagoblin.user_pages.atom_feed',qualified=True,
 250             user=request.user.username)`
 251     :param reprocess_action: What particular action should be run.
 252     :param reprocess_info: A dict containing all of the necessary reprocessing
 253         info for the given media_type"""
 254     try:
 255         ProcessMedia().apply_async(
 256             [entry.id, feed_url, reprocess_action, reprocess_info], {},
 257             task_id=entry.queued_task_id)
 258     except BaseException as exc:
 259         # The purpose of this section is because when running in "lazy"
 260         # or always-eager-with-exceptions-propagated celery mode that
 261         # the failure handling won't happen on Celery end.  Since we
 262         # expect a lot of users to run things in this way we have to
 263         # capture stuff here.
 264         #
 265         # ... not completely the diaper pattern because the
 266         # exception is re-raised :)
 267         mark_entry_failed(entry.id, exc)
 268         # re-raise the exception
 269         raise
 270
 271
 272 def api_upload_request(request, file_data, entry):
 273     """ This handles a image upload request """
 274     # Use the same kind of method from mediagoblin/submit/views:submit_start
 275     entry.title = file_data.filename
 276
 277     # This will be set later but currently we just don't have enough information
 278     entry.slug = None
 279
 280     queue_file = prepare_queue_task(request.app, entry, file_data.filename)
 281     with queue_file:
 282         queue_file.write(request.data)
 283
 284     entry.save()
 285     return json_response(entry.serialize(request))
 286
 287 def api_add_to_feed(request, entry):
 288     """ Add media to Feed """
 289     feed_url = request.urlgen(
 290         'mediagoblin.user_pages.atom_feed',
 291         qualified=True, user=request.user.username
 292     )
 293
 294     run_process_media(entry, feed_url)
 295     add_comment_subscription(request.user, entry)
 296
 297     # Create activity
 298     entry.activity = create_activity("post", entry, entry.uploader).id
 299     entry.save()
 300
 301     return json_response(entry.serialize(request))