[mediagoblin.git] / mediagoblin / submit / lib.py

# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import logging
import uuid
from os.path import splitext

import six

from werkzeug.utils import secure_filename
from werkzeug.datastructures import FileStorage

from mediagoblin import mg_globals
from mediagoblin.tools.response import json_response
from mediagoblin.tools.text import convert_to_tag_list_of_dicts
from mediagoblin.tools.federation import create_activity
from mediagoblin.db.models import MediaEntry, ProcessingMetaData
from mediagoblin.processing import mark_entry_failed
from mediagoblin.processing.task import ProcessMedia
from mediagoblin.notifications import add_comment_subscription
from mediagoblin.media_types import sniff_media


_log = logging.getLogger(__name__)


def check_file_field(request, field_name):
    """Check if a file field meets minimal criteria"""
    retval = (field_name in request.files
              and isinstance(request.files[field_name], FileStorage)
              and request.files[field_name].stream)
    if not retval:
        _log.debug("Form did not contain proper file field %s", field_name)
    return retval


def new_upload_entry(user):
    """
    Create a new MediaEntry for uploading
    """
    entry = MediaEntry()
    entry.uploader = user.id
    entry.license = user.license_preference
    return entry


def get_upload_file_limits(user):
    """
    Get the upload_limit and max_file_size for this user
    """
    if user.upload_limit is not None and user.upload_limit >= 0:  # TODO: debug this
        upload_limit = user.upload_limit
    else:
        upload_limit = mg_globals.app_config.get('upload_limit', None)

    max_file_size = mg_globals.app_config.get('max_file_size', None)

    return upload_limit, max_file_size


class UploadLimitError(Exception):
    """
    General exception for when an upload will be over some upload limit
    """
    pass


class FileUploadLimit(UploadLimitError):
    """
    This file is over the site upload limit
    """
    pass


class UserUploadLimit(UploadLimitError):
    """
    This file is over the user's particular upload limit
    """
    pass


class UserPastUploadLimit(UploadLimitError):
    """
    The user is *already* past their upload limit!
    """
    pass


def submit_media(mg_app, user, submitted_file, filename,
                 title=None, description=None,
                 license=None, metadata=None, tags_string=u"",
                 upload_limit=None, max_file_size=None,
                 callback_url=None,
                 # If provided we'll do the feed_url update, otherwise ignore
                 urlgen=None,):
    """
    Args:
     - mg_app: The MediaGoblinApp instantiated for this process
     - user: the user object this media entry should be associated with
     - submitted_file: the file-like object that has the
       being-submitted file data in it (this object should really have
       a .name attribute which is the filename on disk!)
     - filename: the *original* filename of this.  Not necessarily the
       one on disk being referenced by submitted_file.
     - title: title for this media entry
     - description: description for this media entry
     - license: license for this media entry
     - tags_string: comma separated string of tags to be associated
       with this entry
     - upload_limit: size in megabytes that's the per-user upload limit
     - max_file_size: maximum size each file can be that's uploaded
     - callback_url: possible post-hook to call after submission
     - urlgen: if provided, used to do the feed_url update
    """
    if upload_limit and user.uploaded >= upload_limit:
        raise UserPastUploadLimit()

    # If the filename contains non ascii generate a unique name
    if not all(ord(c) < 128 for c in filename):
        filename = six.text_type(uuid.uuid4()) + splitext(filename)[-1]

    # Sniff the submitted media to determine which
    # media plugin should handle processing
    media_type, media_manager = sniff_media(submitted_file, filename)

    # create entry and save in database
    entry = new_upload_entry(user)
    entry.media_type = media_type
    entry.title = (title or six.text_type(splitext(filename)[0]))

    entry.description = description or u""

    entry.license = license or None

    entry.media_metadata = metadata or {}

    # Process the user's folksonomy "tags"
    entry.tags = convert_to_tag_list_of_dicts(tags_string)

    # Generate a slug from the title
    entry.generate_slug()

    queue_file = prepare_queue_task(mg_app, entry, filename)

    with queue_file:
        queue_file.write(submitted_file.read())

    # Get file size and round to 2 decimal places
    file_size = mg_app.queue_store.get_file_size(
        entry.queued_media_file) / (1024.0 * 1024)
    file_size = float('{0:.2f}'.format(file_size))

    # Check if file size is over the limit
    if max_file_size and file_size >= max_file_size:
        raise FileUploadLimit()

    # Check if user is over upload limit
    if upload_limit and (user.uploaded + file_size) >= upload_limit:
        raise UserUploadLimit()

    user.uploaded = user.uploaded + file_size
    user.save()

    entry.file_size = file_size

    # Save now so we have this data before kicking off processing
    entry.save()

    # Various "submit to stuff" things, callbackurl and this silly urlgen
    # thing
    if callback_url:
        metadata = ProcessingMetaData()
        metadata.media_entry = entry
        metadata.callback_url = callback_url
        metadata.save()

    if urlgen:
        feed_url = urlgen(
            'mediagoblin.user_pages.atom_feed',
            qualified=True, user=user.username)
    else:
        feed_url = None

    add_comment_subscription(user, entry)

    # Create activity
    create_activity("post", entry, entry.uploader)
    entry.save()

    # Pass off to processing
    #
    # (... don't change entry after this point to avoid race
    # conditions with changes to the document via processing code)
    run_process_media(entry, feed_url)

    return entry


def prepare_queue_task(app, entry, filename):
    """
    Prepare a MediaEntry for the processing queue and get a queue file
    """
    # We generate this ourselves so we know what the task id is for
    # retrieval later.

    # (If we got it off the task's auto-generation, there'd be
    # a risk of a race condition when we'd save after sending
    # off the task)
    task_id = six.text_type(uuid.uuid4())
    entry.queued_task_id = task_id

    # Now store generate the queueing related filename
    queue_filepath = app.queue_store.get_unique_filepath(
        ['media_entries',
         task_id,
         secure_filename(filename)])

    # queue appropriately
    queue_file = app.queue_store.get_file(
        queue_filepath, 'wb')

    # Add queued filename to the entry
    entry.queued_media_file = queue_filepath

    return queue_file


def run_process_media(entry, feed_url=None,
                      reprocess_action="initial", reprocess_info=None):
    """Process the media asynchronously

    :param entry: MediaEntry() instance to be processed.
    :param feed_url: A string indicating the feed_url that the PuSH servers
        should be notified of. This will be sth like: `request.urlgen(
            'mediagoblin.user_pages.atom_feed',qualified=True,
            user=request.user.username)`
    :param reprocess_action: What particular action should be run.
    :param reprocess_info: A dict containing all of the necessary reprocessing
        info for the given media_type"""
    try:
        ProcessMedia().apply_async(
            [entry.id, feed_url, reprocess_action, reprocess_info], {},
            task_id=entry.queued_task_id)
    except BaseException as exc:
        # The purpose of this section is because when running in "lazy"
        # or always-eager-with-exceptions-propagated celery mode that
        # the failure handling won't happen on Celery end.  Since we
        # expect a lot of users to run things in this way we have to
        # capture stuff here.
        #
        # ... not completely the diaper pattern because the
        # exception is re-raised :)
        mark_entry_failed(entry.id, exc)
        # re-raise the exception
        raise


def api_upload_request(request, file_data, entry):
    """ This handles a image upload request """
    # Use the same kind of method from mediagoblin/submit/views:submit_start
    entry.title = file_data.filename

    # This will be set later but currently we just don't have enough information
    entry.slug = None

    queue_file = prepare_queue_task(request.app, entry, file_data.filename)
    with queue_file:
        queue_file.write(request.data)

    entry.save()
    return json_response(entry.serialize(request))

def api_add_to_feed(request, entry):
    """ Add media to Feed """
    feed_url = request.urlgen(
        'mediagoblin.user_pages.atom_feed',
        qualified=True, user=request.user.username
    )

    add_comment_subscription(request.user, entry)

    # Create activity
    create_activity("post", entry, entry.uploader)
    entry.save()
    run_process_media(entry, feed_url)

    return json_response(entry.serialize(request))
Commit	Line	Data
be1f0f7d E	1	# GNU MediaGoblin -- federated, autonomous media hosting
	2	# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU Affero General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU Affero General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
be1f0f7d	17	import logging
8eb47d02	18	import uuid
1779a070 CAW	19	from os.path import splitext
1779a070 CAW	20
e49b7e02 BP	21	import six
e49b7e02 BP	22
8eb47d02	23	from werkzeug.utils import secure_filename
2ef2f46e	24	from werkzeug.datastructures import FileStorage
be1f0f7d	25
5d754da7	26	from mediagoblin import mg_globals
5e5d4458	27	from mediagoblin.tools.response import json_response
1779a070	28	from mediagoblin.tools.text import convert_to_tag_list_of_dicts
b9492011	29	from mediagoblin.tools.federation import create_activity
131b7495	30	from mediagoblin.db.models import MediaEntry, ProcessingMetaData
86bb44ef	31	from mediagoblin.processing import mark_entry_failed
b5059525	32	from mediagoblin.processing.task import ProcessMedia
9e15c674	33	from mediagoblin.notifications import add_comment_subscription
5d754da7	34	from mediagoblin.media_types import sniff_media
86bb44ef	35
be1f0f7d E	36
	37	_log = logging.getLogger(__name__)
	38
	39
2ef2f46e E	40	def check_file_field(request, field_name):
	41	"""Check if a file field meets minimal criteria"""
	42	retval = (field_name in request.files
	43	and isinstance(request.files[field_name], FileStorage)
	44	and request.files[field_name].stream)
	45	if not retval:
	46	_log.debug("Form did not contain proper file field %s", field_name)
	47	return retval
	48
	49
6c1467d5 E	50	def new_upload_entry(user):
	51	"""
	52	Create a new MediaEntry for uploading
	53	"""
	54	entry = MediaEntry()
	55	entry.uploader = user.id
	56	entry.license = user.license_preference
	57	return entry
	58
	59
5d754da7 CAW	60	def get_upload_file_limits(user):
	61	"""
	62	Get the upload_limit and max_file_size for this user
	63	"""
cda3055b	64	if user.upload_limit is not None and user.upload_limit >= 0: # TODO: debug this
5d754da7 CAW	65	upload_limit = user.upload_limit
	66	else:
	67	upload_limit = mg_globals.app_config.get('upload_limit', None)
	68
	69	max_file_size = mg_globals.app_config.get('max_file_size', None)
	70
	71	return upload_limit, max_file_size
	72
	73
9e15c674 CAW	74	class UploadLimitError(Exception):
	75	"""
	76	General exception for when an upload will be over some upload limit
	77	"""
	78	pass
	79
	80
	81	class FileUploadLimit(UploadLimitError):
	82	"""
	83	This file is over the site upload limit
	84	"""
	85	pass
	86
	87
	88	class UserUploadLimit(UploadLimitError):
	89	"""
	90	This file is over the user's particular upload limit
	91	"""
	92	pass
	93
	94
	95	class UserPastUploadLimit(UploadLimitError):
	96	"""
	97	The user is already past their upload limit!
	98	"""
	99	pass
	100
	101
	102
1779a070 CAW	103	def submit_media(mg_app, user, submitted_file, filename,
1779a070 CAW	104	title=None, description=None,
45f426dd	105	license=None, metadata=None, tags_string=u"",
9e15c674	106	upload_limit=None, max_file_size=None,
131b7495	107	callback_url=None,
9e15c674	108	# If provided we'll do the feed_url update, otherwise ignore
131b7495	109	urlgen=None,):
5202924c CAW	110	"""
	111	Args:
	112	- mg_app: The MediaGoblinApp instantiated for this process
	113	- user: the user object this media entry should be associated with
	114	- submitted_file: the file-like object that has the
	115	being-submitted file data in it (this object should really have
	116	a .name attribute which is the filename on disk!)
	117	- filename: the original filename of this. Not necessarily the
	118	one on disk being referenced by submitted_file.
	119	- title: title for this media entry
	120	- description: description for this media entry
	121	- license: license for this media entry
	122	- tags_string: comma separated string of tags to be associated
	123	with this entry
	124	- upload_limit: size in megabytes that's the per-user upload limit
	125	- max_file_size: maximum size each file can be that's uploaded
131b7495	126	- callback_url: possible post-hook to call after submission
5202924c CAW	127	- urlgen: if provided, used to do the feed_url update
5202924c CAW	128	"""
9e15c674 CAW	129	if upload_limit and user.uploaded >= upload_limit:
	130	raise UserPastUploadLimit()
	131
1779a070 CAW	132	# If the filename contains non ascii generate a unique name
1779a070 CAW	133	if not all(ord(c) < 128 for c in filename):
e49b7e02	134	filename = six.text_type(uuid.uuid4()) + splitext(filename)[-1]
1779a070 CAW	135
	136	# Sniff the submitted media to determine which
	137	# media plugin should handle processing
301da9ca	138	media_type, media_manager = sniff_media(submitted_file, filename)
1779a070 CAW	139
	140	# create entry and save in database
	141	entry = new_upload_entry(user)
	142	entry.media_type = media_type
e49b7e02	143	entry.title = (title or six.text_type(splitext(filename)[0]))
1779a070	144
cb7716f3	145	entry.description = description or u""
1779a070 CAW	146
	147	entry.license = license or None
	148
2daf8ec0	149	entry.media_metadata = metadata or {}
45f426dd	150
1779a070 CAW	151	# Process the user's folksonomy "tags"
	152	entry.tags = convert_to_tag_list_of_dicts(tags_string)
	153
	154	# Generate a slug from the title
	155	entry.generate_slug()
	156
	157	queue_file = prepare_queue_task(mg_app, entry, filename)
	158
	159	with queue_file:
301da9ca	160	queue_file.write(submitted_file.read())
1779a070 CAW	161
	162	# Get file size and round to 2 decimal places
	163	file_size = mg_app.queue_store.get_file_size(
	164	entry.queued_media_file) / (1024.0 * 1024)
	165	file_size = float('{0:.2f}'.format(file_size))
	166
1779a070 CAW	167	# Check if file size is over the limit
1779a070 CAW	168	if max_file_size and file_size >= max_file_size:
9e15c674	169	raise FileUploadLimit()
1779a070 CAW	170
	171	# Check if user is over upload limit
	172	if upload_limit and (user.uploaded + file_size) >= upload_limit:
9e15c674 CAW	173	raise UserUploadLimit()
	174
	175	user.uploaded = user.uploaded + file_size
	176	user.save()
	177
	178	entry.file_size = file_size
	179
	180	# Save now so we have this data before kicking off processing
	181	entry.save()
	182
131b7495 CAW	183	# Various "submit to stuff" things, callbackurl and this silly urlgen
	184	# thing
	185	if callback_url:
	186	metadata = ProcessingMetaData()
	187	metadata.media_entry = entry
	188	metadata.callback_url = callback_url
	189	metadata.save()
	190
9e15c674 CAW	191	if urlgen:
9e15c674 CAW	192	feed_url = urlgen(
1779a070	193	'mediagoblin.user_pages.atom_feed',
9e15c674 CAW	194	qualified=True, user=user.username)
	195	else:
	196	feed_url = None
1779a070	197
bc2c06a1 JT	198	add_comment_subscription(user, entry)
	199
	200	# Create activity
	201	create_activity("post", entry, entry.uploader)
	202	entry.save()
	203
9e15c674 CAW	204	# Pass off to processing
	205	#
	206	# (... don't change entry after this point to avoid race
	207	# conditions with changes to the document via processing code)
	208	run_process_media(entry, feed_url)
1779a070	209
5d754da7 CAW	210	return entry
5d754da7 CAW	211
1779a070	212
b228d897 E	213	def prepare_queue_task(app, entry, filename):
	214	"""
	215	Prepare a MediaEntry for the processing queue and get a queue file
	216	"""
cec9648c	217	# We generate this ourselves so we know what the task id is for
8eb47d02 E	218	# retrieval later.
	219
	220	# (If we got it off the task's auto-generation, there'd be
	221	# a risk of a race condition when we'd save after sending
	222	# off the task)
e49b7e02	223	task_id = six.text_type(uuid.uuid4())
8eb47d02 E	224	entry.queued_task_id = task_id
	225
	226	# Now store generate the queueing related filename
b228d897	227	queue_filepath = app.queue_store.get_unique_filepath(
8eb47d02 E	228	['media_entries',
	229	task_id,
	230	secure_filename(filename)])
	231
	232	# queue appropriately
b228d897	233	queue_file = app.queue_store.get_file(
8eb47d02 E	234	queue_filepath, 'wb')
	235
	236	# Add queued filename to the entry
	237	entry.queued_media_file = queue_filepath
	238
	239	return queue_file
	240
	241
77ea4c9b	242	def run_process_media(entry, feed_url=None,
98d1fa3b	243	reprocess_action="initial", reprocess_info=None):
c7b3d070 SS	244	"""Process the media asynchronously
	245
	246	:param entry: MediaEntry() instance to be processed.
	247	:param feed_url: A string indicating the feed_url that the PuSH servers
	248	should be notified of. This will be sth like: `request.urlgen(
	249	'mediagoblin.user_pages.atom_feed',qualified=True,
9a2c66ca	250	user=request.user.username)`
77ea4c9b CAW	251	:param reprocess_action: What particular action should be run.
77ea4c9b CAW	252	:param reprocess_info: A dict containing all of the necessary reprocessing
9a2c66ca	253	info for the given media_type"""
86bb44ef	254	try:
b5059525	255	ProcessMedia().apply_async(
77ea4c9b	256	[entry.id, feed_url, reprocess_action, reprocess_info], {},
86bb44ef E	257	task_id=entry.queued_task_id)
	258	except BaseException as exc:
	259	# The purpose of this section is because when running in "lazy"
	260	# or always-eager-with-exceptions-propagated celery mode that
	261	# the failure handling won't happen on Celery end. Since we
	262	# expect a lot of users to run things in this way we have to
	263	# capture stuff here.
	264	#
	265	# ... not completely the diaper pattern because the
	266	# exception is re-raised :)
	267	mark_entry_failed(entry.id, exc)
	268	# re-raise the exception
	269	raise
5e5d4458 JT	270
	271
	272	def api_upload_request(request, file_data, entry):
	273	""" This handles a image upload request """
	274	# Use the same kind of method from mediagoblin/submit/views:submit_start
	275	entry.title = file_data.filename
9246a6ba JT	276
	277	# This will be set later but currently we just don't have enough information
	278	entry.slug = None
5e5d4458 JT	279
	280	queue_file = prepare_queue_task(request.app, entry, file_data.filename)
	281	with queue_file:
	282	queue_file.write(request.data)
	283
	284	entry.save()
	285	return json_response(entry.serialize(request))
	286
	287	def api_add_to_feed(request, entry):
	288	""" Add media to Feed """
5e5d4458 JT	289	feed_url = request.urlgen(
5e5d4458 JT	290	'mediagoblin.user_pages.atom_feed',
9246a6ba JT	291	qualified=True, user=request.user.username
9246a6ba JT	292	)
5e5d4458	293
5e5d4458	294	add_comment_subscription(request.user, entry)
6d36f75f	295
b9492011	296	# Create activity
bc2c06a1	297	create_activity("post", entry, entry.uploader)
6d36f75f	298	entry.save()
bc2c06a1	299	run_process_media(entry, feed_url)
b9492011	300
9246a6ba	301	return json_response(entry.serialize(request))