[mediagoblin.git] / mediagoblin / submit / lib.py

# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import logging
import uuid
from os.path import splitext

from werkzeug.utils import secure_filename
from werkzeug.datastructures import FileStorage

from mediagoblin import mg_globals
from mediagoblin.tools.text import convert_to_tag_list_of_dicts
from mediagoblin.db.models import MediaEntry, ProcessingMetaData
from mediagoblin.processing import mark_entry_failed
from mediagoblin.processing.task import ProcessMedia
from mediagoblin.notifications import add_comment_subscription
from mediagoblin.media_types import sniff_media


_log = logging.getLogger(__name__)


def check_file_field(request, field_name):
    """Check if a file field meets minimal criteria"""
    retval = (field_name in request.files
              and isinstance(request.files[field_name], FileStorage)
              and request.files[field_name].stream)
    if not retval:
        _log.debug("Form did not contain proper file field %s", field_name)
    return retval


def new_upload_entry(user):
    """
    Create a new MediaEntry for uploading
    """
    entry = MediaEntry()
    entry.uploader = user.id
    entry.license = user.license_preference
    return entry


def get_upload_file_limits(user):
    """
    Get the upload_limit and max_file_size for this user
    """
    if user.upload_limit >= 0:
        upload_limit = user.upload_limit
    else:
        upload_limit = mg_globals.app_config.get('upload_limit', None)

    max_file_size = mg_globals.app_config.get('max_file_size', None)

    return upload_limit, max_file_size


class UploadLimitError(Exception):
    """
    General exception for when an upload will be over some upload limit
    """
    pass


class FileUploadLimit(UploadLimitError):
    """
    This file is over the site upload limit
    """
    pass


class UserUploadLimit(UploadLimitError):
    """
    This file is over the user's particular upload limit
    """
    pass


class UserPastUploadLimit(UploadLimitError):
    """
    The user is *already* past their upload limit!
    """
    pass


def submit_media(mg_app, user, submitted_file, filename,
                 title=None, description=None,
                 license=None, metadata=None, tags_string=u"",
                 upload_limit=None, max_file_size=None,
                 callback_url=None,
                 # If provided we'll do the feed_url update, otherwise ignore
                 urlgen=None,):
    """
    Args:
     - mg_app: The MediaGoblinApp instantiated for this process
     - user: the user object this media entry should be associated with
     - submitted_file: the file-like object that has the
       being-submitted file data in it (this object should really have
       a .name attribute which is the filename on disk!)
     - filename: the *original* filename of this.  Not necessarily the
       one on disk being referenced by submitted_file.
     - title: title for this media entry
     - description: description for this media entry
     - license: license for this media entry
     - tags_string: comma separated string of tags to be associated
       with this entry
     - upload_limit: size in megabytes that's the per-user upload limit
     - max_file_size: maximum size each file can be that's uploaded
     - callback_url: possible post-hook to call after submission
     - urlgen: if provided, used to do the feed_url update
    """
    if upload_limit and user.uploaded >= upload_limit:
        raise UserPastUploadLimit()

    # If the filename contains non ascii generate a unique name
    if not all(ord(c) < 128 for c in filename):
        filename = unicode(uuid.uuid4()) + splitext(filename)[-1]

    # Sniff the submitted media to determine which
    # media plugin should handle processing
    media_type, media_manager = sniff_media(submitted_file, filename)

    # create entry and save in database
    entry = new_upload_entry(user)
    entry.media_type = media_type
    entry.title = (title or unicode(splitext(filename)[0]))

    entry.description = description or u""

    entry.license = license or None

    entry.media_metadata = metadata or {}

    # Process the user's folksonomy "tags"
    entry.tags = convert_to_tag_list_of_dicts(tags_string)

    # Generate a slug from the title
    entry.generate_slug()

    queue_file = prepare_queue_task(mg_app, entry, filename)

    with queue_file:
        queue_file.write(submitted_file.read())

    # Get file size and round to 2 decimal places
    file_size = mg_app.queue_store.get_file_size(
        entry.queued_media_file) / (1024.0 * 1024)
    file_size = float('{0:.2f}'.format(file_size))

    # Check if file size is over the limit
    if max_file_size and file_size >= max_file_size:
        raise FileUploadLimit()

    # Check if user is over upload limit
    if upload_limit and (user.uploaded + file_size) >= upload_limit:
        raise UserUploadLimit()

    user.uploaded = user.uploaded + file_size
    user.save()

    entry.file_size = file_size

    # Save now so we have this data before kicking off processing
    entry.save()

    # Various "submit to stuff" things, callbackurl and this silly urlgen
    # thing
    if callback_url:
        metadata = ProcessingMetaData()
        metadata.media_entry = entry
        metadata.callback_url = callback_url
        metadata.save()

    if urlgen:
        feed_url = urlgen(
            'mediagoblin.user_pages.atom_feed',
            qualified=True, user=user.username)
    else:
        feed_url = None

    # Pass off to processing
    #
    # (... don't change entry after this point to avoid race
    # conditions with changes to the document via processing code)
    run_process_media(entry, feed_url)

    add_comment_subscription(user, entry)

    return entry


def prepare_queue_task(app, entry, filename):
    """
    Prepare a MediaEntry for the processing queue and get a queue file
    """
    # We generate this ourselves so we know what the task id is for
    # retrieval later.

    # (If we got it off the task's auto-generation, there'd be
    # a risk of a race condition when we'd save after sending
    # off the task)
    task_id = unicode(uuid.uuid4())
    entry.queued_task_id = task_id

    # Now store generate the queueing related filename
    queue_filepath = app.queue_store.get_unique_filepath(
        ['media_entries',
         task_id,
         secure_filename(filename)])

    # queue appropriately
    queue_file = app.queue_store.get_file(
        queue_filepath, 'wb')

    # Add queued filename to the entry
    entry.queued_media_file = queue_filepath

    return queue_file


def run_process_media(entry, feed_url=None,
                      reprocess_action="initial", reprocess_info=None):
    """Process the media asynchronously

    :param entry: MediaEntry() instance to be processed.
    :param feed_url: A string indicating the feed_url that the PuSH servers
        should be notified of. This will be sth like: `request.urlgen(
            'mediagoblin.user_pages.atom_feed',qualified=True,
            user=request.user.username)`
    :param reprocess_action: What particular action should be run.
    :param reprocess_info: A dict containing all of the necessary reprocessing
        info for the given media_type"""
    try:
        ProcessMedia().apply_async(
            [entry.id, feed_url, reprocess_action, reprocess_info], {},
            task_id=entry.queued_task_id)
    except BaseException as exc:
        # The purpose of this section is because when running in "lazy"
        # or always-eager-with-exceptions-propagated celery mode that
        # the failure handling won't happen on Celery end.  Since we
        # expect a lot of users to run things in this way we have to
        # capture stuff here.
        #
        # ... not completely the diaper pattern because the
        # exception is re-raised :)
        mark_entry_failed(entry.id, exc)
        # re-raise the exception
        raise
Commit	Line	Data
be1f0f7d E	1	# GNU MediaGoblin -- federated, autonomous media hosting
	2	# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU Affero General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU Affero General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
be1f0f7d	17	import logging
8eb47d02	18	import uuid
1779a070 CAW	19	from os.path import splitext
1779a070 CAW	20
8eb47d02	21	from werkzeug.utils import secure_filename
2ef2f46e	22	from werkzeug.datastructures import FileStorage
be1f0f7d	23
5d754da7	24	from mediagoblin import mg_globals
1779a070	25	from mediagoblin.tools.text import convert_to_tag_list_of_dicts
131b7495	26	from mediagoblin.db.models import MediaEntry, ProcessingMetaData
86bb44ef	27	from mediagoblin.processing import mark_entry_failed
b5059525	28	from mediagoblin.processing.task import ProcessMedia
9e15c674	29	from mediagoblin.notifications import add_comment_subscription
5d754da7	30	from mediagoblin.media_types import sniff_media
86bb44ef	31
be1f0f7d E	32
	33	_log = logging.getLogger(__name__)
	34
	35
2ef2f46e E	36	def check_file_field(request, field_name):
	37	"""Check if a file field meets minimal criteria"""
	38	retval = (field_name in request.files
	39	and isinstance(request.files[field_name], FileStorage)
	40	and request.files[field_name].stream)
	41	if not retval:
	42	_log.debug("Form did not contain proper file field %s", field_name)
	43	return retval
	44
	45
6c1467d5 E	46	def new_upload_entry(user):
	47	"""
	48	Create a new MediaEntry for uploading
	49	"""
	50	entry = MediaEntry()
	51	entry.uploader = user.id
	52	entry.license = user.license_preference
	53	return entry
	54
	55
5d754da7 CAW	56	def get_upload_file_limits(user):
	57	"""
	58	Get the upload_limit and max_file_size for this user
	59	"""
	60	if user.upload_limit >= 0:
	61	upload_limit = user.upload_limit
	62	else:
	63	upload_limit = mg_globals.app_config.get('upload_limit', None)
	64
	65	max_file_size = mg_globals.app_config.get('max_file_size', None)
	66
	67	return upload_limit, max_file_size
	68
	69
9e15c674 CAW	70	class UploadLimitError(Exception):
	71	"""
	72	General exception for when an upload will be over some upload limit
	73	"""
	74	pass
	75
	76
	77	class FileUploadLimit(UploadLimitError):
	78	"""
	79	This file is over the site upload limit
	80	"""
	81	pass
	82
	83
	84	class UserUploadLimit(UploadLimitError):
	85	"""
	86	This file is over the user's particular upload limit
	87	"""
	88	pass
	89
	90
	91	class UserPastUploadLimit(UploadLimitError):
	92	"""
	93	The user is already past their upload limit!
	94	"""
	95	pass
	96
	97
	98
1779a070 CAW	99	def submit_media(mg_app, user, submitted_file, filename,
1779a070 CAW	100	title=None, description=None,
45f426dd	101	license=None, metadata=None, tags_string=u"",
9e15c674	102	upload_limit=None, max_file_size=None,
131b7495	103	callback_url=None,
9e15c674	104	# If provided we'll do the feed_url update, otherwise ignore
131b7495	105	urlgen=None,):
5202924c CAW	106	"""
	107	Args:
	108	- mg_app: The MediaGoblinApp instantiated for this process
	109	- user: the user object this media entry should be associated with
	110	- submitted_file: the file-like object that has the
	111	being-submitted file data in it (this object should really have
	112	a .name attribute which is the filename on disk!)
	113	- filename: the original filename of this. Not necessarily the
	114	one on disk being referenced by submitted_file.
	115	- title: title for this media entry
	116	- description: description for this media entry
	117	- license: license for this media entry
	118	- tags_string: comma separated string of tags to be associated
	119	with this entry
	120	- upload_limit: size in megabytes that's the per-user upload limit
	121	- max_file_size: maximum size each file can be that's uploaded
131b7495	122	- callback_url: possible post-hook to call after submission
5202924c CAW	123	- urlgen: if provided, used to do the feed_url update
5202924c CAW	124	"""
9e15c674 CAW	125	if upload_limit and user.uploaded >= upload_limit:
	126	raise UserPastUploadLimit()
	127
1779a070 CAW	128	# If the filename contains non ascii generate a unique name
	129	if not all(ord(c) < 128 for c in filename):
	130	filename = unicode(uuid.uuid4()) + splitext(filename)[-1]
	131
	132	# Sniff the submitted media to determine which
	133	# media plugin should handle processing
301da9ca	134	media_type, media_manager = sniff_media(submitted_file, filename)
1779a070 CAW	135
	136	# create entry and save in database
	137	entry = new_upload_entry(user)
	138	entry.media_type = media_type
cb7716f3	139	entry.title = (title or unicode(splitext(filename)[0]))
1779a070	140
cb7716f3	141	entry.description = description or u""
1779a070 CAW	142
	143	entry.license = license or None
	144
2daf8ec0	145	entry.media_metadata = metadata or {}
45f426dd	146
1779a070 CAW	147	# Process the user's folksonomy "tags"
	148	entry.tags = convert_to_tag_list_of_dicts(tags_string)
	149
	150	# Generate a slug from the title
	151	entry.generate_slug()
	152
	153	queue_file = prepare_queue_task(mg_app, entry, filename)
	154
	155	with queue_file:
301da9ca	156	queue_file.write(submitted_file.read())
1779a070 CAW	157
	158	# Get file size and round to 2 decimal places
	159	file_size = mg_app.queue_store.get_file_size(
	160	entry.queued_media_file) / (1024.0 * 1024)
	161	file_size = float('{0:.2f}'.format(file_size))
	162
1779a070 CAW	163	# Check if file size is over the limit
1779a070 CAW	164	if max_file_size and file_size >= max_file_size:
9e15c674	165	raise FileUploadLimit()
1779a070 CAW	166
	167	# Check if user is over upload limit
	168	if upload_limit and (user.uploaded + file_size) >= upload_limit:
9e15c674 CAW	169	raise UserUploadLimit()
	170
	171	user.uploaded = user.uploaded + file_size
	172	user.save()
	173
	174	entry.file_size = file_size
	175
	176	# Save now so we have this data before kicking off processing
	177	entry.save()
	178
131b7495 CAW	179	# Various "submit to stuff" things, callbackurl and this silly urlgen
	180	# thing
	181	if callback_url:
	182	metadata = ProcessingMetaData()
	183	metadata.media_entry = entry
	184	metadata.callback_url = callback_url
	185	metadata.save()
	186
9e15c674 CAW	187	if urlgen:
9e15c674 CAW	188	feed_url = urlgen(
1779a070	189	'mediagoblin.user_pages.atom_feed',
9e15c674 CAW	190	qualified=True, user=user.username)
	191	else:
	192	feed_url = None
1779a070	193
9e15c674 CAW	194	# Pass off to processing
	195	#
	196	# (... don't change entry after this point to avoid race
	197	# conditions with changes to the document via processing code)
	198	run_process_media(entry, feed_url)
1779a070	199
9e15c674	200	add_comment_subscription(user, entry)
1779a070	201
5d754da7 CAW	202	return entry
5d754da7 CAW	203
1779a070	204
b228d897 E	205	def prepare_queue_task(app, entry, filename):
	206	"""
	207	Prepare a MediaEntry for the processing queue and get a queue file
	208	"""
cec9648c	209	# We generate this ourselves so we know what the task id is for
8eb47d02 E	210	# retrieval later.
	211
	212	# (If we got it off the task's auto-generation, there'd be
	213	# a risk of a race condition when we'd save after sending
	214	# off the task)
	215	task_id = unicode(uuid.uuid4())
	216	entry.queued_task_id = task_id
	217
	218	# Now store generate the queueing related filename
b228d897	219	queue_filepath = app.queue_store.get_unique_filepath(
8eb47d02 E	220	['media_entries',
	221	task_id,
	222	secure_filename(filename)])
	223
	224	# queue appropriately
b228d897	225	queue_file = app.queue_store.get_file(
8eb47d02 E	226	queue_filepath, 'wb')
	227
	228	# Add queued filename to the entry
	229	entry.queued_media_file = queue_filepath
	230
	231	return queue_file
	232
	233
77ea4c9b	234	def run_process_media(entry, feed_url=None,
98d1fa3b	235	reprocess_action="initial", reprocess_info=None):
c7b3d070 SS	236	"""Process the media asynchronously
	237
	238	:param entry: MediaEntry() instance to be processed.
	239	:param feed_url: A string indicating the feed_url that the PuSH servers
	240	should be notified of. This will be sth like: `request.urlgen(
	241	'mediagoblin.user_pages.atom_feed',qualified=True,
9a2c66ca	242	user=request.user.username)`
77ea4c9b CAW	243	:param reprocess_action: What particular action should be run.
77ea4c9b CAW	244	:param reprocess_info: A dict containing all of the necessary reprocessing
9a2c66ca	245	info for the given media_type"""
86bb44ef	246	try:
b5059525	247	ProcessMedia().apply_async(
77ea4c9b	248	[entry.id, feed_url, reprocess_action, reprocess_info], {},
86bb44ef E	249	task_id=entry.queued_task_id)
	250	except BaseException as exc:
	251	# The purpose of this section is because when running in "lazy"
	252	# or always-eager-with-exceptions-propagated celery mode that
	253	# the failure handling won't happen on Celery end. Since we
	254	# expect a lot of users to run things in this way we have to
	255	# capture stuff here.
	256	#
	257	# ... not completely the diaper pattern because the
	258	# exception is re-raised :)
	259	mark_entry_failed(entry.id, exc)
	260	# re-raise the exception
	261	raise