[mediagoblin.git] / mediagoblin / submit / lib.py

# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import logging
import uuid
from os.path import splitext

import six

from werkzeug.utils import secure_filename
from werkzeug.datastructures import FileStorage

from mediagoblin import mg_globals
from mediagoblin.tools.response import json_response
from mediagoblin.tools.text import convert_to_tag_list_of_dicts
from mediagoblin.tools.federation import create_activity, create_generator
from mediagoblin.db.models import Collection, MediaEntry, ProcessingMetaData
from mediagoblin.processing import mark_entry_failed, get_entry_and_processing_manager
from mediagoblin.processing.task import ProcessMedia
from mediagoblin.notifications import add_comment_subscription
from mediagoblin.media_types import sniff_media
from mediagoblin.user_pages.lib import add_media_to_collection


_log = logging.getLogger(__name__)


def check_file_field(request, field_name):
    """Check if a file field meets minimal criteria"""
    retval = (field_name in request.files
              and isinstance(request.files[field_name], FileStorage)
              and request.files[field_name].stream)
    if not retval:
        _log.debug("Form did not contain proper file field %s", field_name)
    return retval


def new_upload_entry(user):
    """
    Create a new MediaEntry for uploading
    """
    entry = MediaEntry()
    entry.actor = user.id
    entry.license = user.license_preference
    return entry


def get_upload_file_limits(user):
    """
    Get the upload_limit and max_file_size for this user
    """
    if user.upload_limit is not None and user.upload_limit >= 0:  # TODO: debug this
        upload_limit = user.upload_limit
    else:
        upload_limit = mg_globals.app_config.get('upload_limit', None)

    max_file_size = mg_globals.app_config.get('max_file_size', None)

    return upload_limit, max_file_size


class UploadLimitError(Exception):
    """
    General exception for when an upload will be over some upload limit
    """
    pass


class FileUploadLimit(UploadLimitError):
    """
    This file is over the site upload limit
    """
    pass


class UserUploadLimit(UploadLimitError):
    """
    This file is over the user's particular upload limit
    """
    pass


class UserPastUploadLimit(UploadLimitError):
    """
    The user is *already* past their upload limit!
    """
    pass


def submit_media(mg_app, user, submitted_file, filename,
                 title=None, description=None, collection_slug=None,
                 license=None, metadata=None, tags_string=u"",
                 callback_url=None, urlgen=None,):
    """
    Args:
     - mg_app: The MediaGoblinApp instantiated for this process
     - user: the user object this media entry should be associated with
     - submitted_file: the file-like object that has the
       being-submitted file data in it (this object should really have
       a .name attribute which is the filename on disk!)
     - filename: the *original* filename of this.  Not necessarily the
       one on disk being referenced by submitted_file.
     - title: title for this media entry
     - description: description for this media entry
     - collection_slug: collection for this media entry
     - license: license for this media entry
     - tags_string: comma separated string of tags to be associated
       with this entry
     - callback_url: possible post-hook to call after submission
     - urlgen: if provided, used to do the feed_url update and assign a public
               ID used in the API (very important).
    """
    upload_limit, max_file_size = get_upload_file_limits(user)
    if upload_limit and user.uploaded >= upload_limit:
        raise UserPastUploadLimit()

    # If the filename contains non ascii generate a unique name
    if not all(ord(c) < 128 for c in filename):
        filename = six.text_type(uuid.uuid4()) + splitext(filename)[-1]

    # Sniff the submitted media to determine which
    # media plugin should handle processing
    media_type, media_manager = sniff_media(submitted_file, filename)

    # create entry and save in database
    entry = new_upload_entry(user)
    entry.media_type = media_type
    entry.title = (title or six.text_type(splitext(filename)[0]))

    entry.description = description or u""

    entry.license = license or None

    entry.media_metadata = metadata or {}

    # Process the user's folksonomy "tags"
    entry.tags = convert_to_tag_list_of_dicts(tags_string)

    # Generate a slug from the title
    entry.generate_slug()

    queue_file = prepare_queue_task(mg_app, entry, filename)

    with queue_file:
        queue_file.write(submitted_file)

    # Get file size and round to 2 decimal places
    file_size = mg_app.queue_store.get_file_size(
        entry.queued_media_file) / (1024.0 * 1024)
    file_size = float('{0:.2f}'.format(file_size))

    # Check if file size is over the limit
    if max_file_size and file_size >= max_file_size:
        raise FileUploadLimit()

    # Check if user is over upload limit
    if upload_limit and (user.uploaded + file_size) >= upload_limit:
        raise UserUploadLimit()

    user.uploaded = user.uploaded + file_size
    user.save()

    entry.file_size = file_size

    # Save now so we have this data before kicking off processing
    entry.save()

    # Various "submit to stuff" things, callbackurl and this silly urlgen
    # thing
    if callback_url:
        metadata = ProcessingMetaData()
        metadata.media_entry = entry
        metadata.callback_url = callback_url
        metadata.save()

    if urlgen:
        # Generate the public_id, this is very importent, especially relating
        # to deletion, it allows the shell to be accessable post-delete!
        entry.get_public_id(urlgen)

        # Generate the feed URL
        feed_url = urlgen(
            'mediagoblin.user_pages.atom_feed',
            qualified=True, user=user.username)
    else:
        feed_url = None

    add_comment_subscription(user, entry)

    # Create activity
    create_activity("post", entry, entry.actor)
    entry.save()

    # add to collection
    if collection_slug:
        collection = Collection.query.filter_by(slug=collection_slug,
                                                actor=user.id).first()
        if collection:
            add_media_to_collection(collection, entry)

    # Pass off to processing
    #
    # (... don't change entry after this point to avoid race
    # conditions with changes to the document via processing code)
    run_process_media(entry, feed_url)

    return entry


def prepare_queue_task(app, entry, filename):
    """
    Prepare a MediaEntry for the processing queue and get a queue file
    """
    # We generate this ourselves so we know what the task id is for
    # retrieval later.

    # (If we got it off the task's auto-generation, there'd be
    # a risk of a race condition when we'd save after sending
    # off the task)
    task_id = six.text_type(uuid.uuid4())
    entry.queued_task_id = task_id

    # Now store generate the queueing related filename
    queue_filepath = app.queue_store.get_unique_filepath(
        ['media_entries',
         task_id,
         secure_filename(filename)])

    # queue appropriately
    queue_file = app.queue_store.get_file(
        queue_filepath, 'wb')

    # Add queued filename to the entry
    entry.queued_media_file = queue_filepath

    return queue_file


def run_process_media(entry, feed_url=None,
                      reprocess_action="initial", reprocess_info=None):
    """Process the media asynchronously

    :param entry: MediaEntry() instance to be processed.
    :param feed_url: A string indicating the feed_url that the PuSH servers
        should be notified of. This will be sth like: `request.urlgen(
            'mediagoblin.user_pages.atom_feed',qualified=True,
            user=request.user.username)`
    :param reprocess_action: What particular action should be run.
    :param reprocess_info: A dict containing all of the necessary reprocessing
        info for the given media_type"""

    entry, manager = get_entry_and_processing_manager(entry.id)

    try:
        wf = manager.workflow(entry, feed_url, reprocess_action, reprocess_info)
        if wf is None:
            ProcessMedia().apply_async(
                [entry.id, feed_url, reprocess_action, reprocess_info], {},
                task_id=entry.queued_task_id)
    except BaseException as exc:
        # The purpose of this section is because when running in "lazy"
        # or always-eager-with-exceptions-propagated celery mode that
        # the failure handling won't happen on Celery end.  Since we
        # expect a lot of users to run things in this way we have to
        # capture stuff here.
        #
        # ... not completely the diaper pattern because the
        # exception is re-raised :)
        mark_entry_failed(entry.id, exc)
        # re-raise the exception
        raise


def api_upload_request(request, file_data, entry):
    """ This handles a image upload request """
    # Use the same kind of method from mediagoblin/submit/views:submit_start
    entry.title = file_data.filename

    # This will be set later but currently we just don't have enough information
    entry.slug = None

    # This is a MUST.
    entry.get_public_id(request.urlgen)

    queue_file = prepare_queue_task(request.app, entry, file_data.filename)
    with queue_file:
        queue_file.write(request.data)

    entry.save()
    return json_response(entry.serialize(request))

def api_add_to_feed(request, entry):
    """ Add media to Feed """
    feed_url = request.urlgen(
        'mediagoblin.user_pages.atom_feed',
        qualified=True, user=request.user.username
    )

    add_comment_subscription(request.user, entry)

    # Create activity
    activity = create_activity(
        verb="post",
        obj=entry,
        actor=entry.actor,
        generator=create_generator(request)
    )
    entry.save()
    run_process_media(entry, feed_url)

    return activity
Commit	Line	Data
be1f0f7d E	1	# GNU MediaGoblin -- federated, autonomous media hosting
	2	# Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
	3	#
	4	# This program is free software: you can redistribute it and/or modify
	5	# it under the terms of the GNU Affero General Public License as published by
	6	# the Free Software Foundation, either version 3 of the License, or
	7	# (at your option) any later version.
	8	#
	9	# This program is distributed in the hope that it will be useful,
	10	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	# GNU Affero General Public License for more details.
	13	#
	14	# You should have received a copy of the GNU Affero General Public License
	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
	16
be1f0f7d	17	import logging
8eb47d02	18	import uuid
1779a070 CAW	19	from os.path import splitext
1779a070 CAW	20
e49b7e02 BP	21	import six
e49b7e02 BP	22
8eb47d02	23	from werkzeug.utils import secure_filename
2ef2f46e	24	from werkzeug.datastructures import FileStorage
be1f0f7d	25
5d754da7	26	from mediagoblin import mg_globals
5e5d4458	27	from mediagoblin.tools.response import json_response
1779a070	28	from mediagoblin.tools.text import convert_to_tag_list_of_dicts
5436d980	29	from mediagoblin.tools.federation import create_activity, create_generator
a3c48024	30	from mediagoblin.db.models import Collection, MediaEntry, ProcessingMetaData
81c59ef0	31	from mediagoblin.processing import mark_entry_failed, get_entry_and_processing_manager
b5059525	32	from mediagoblin.processing.task import ProcessMedia
9e15c674	33	from mediagoblin.notifications import add_comment_subscription
5d754da7	34	from mediagoblin.media_types import sniff_media
a3c48024	35	from mediagoblin.user_pages.lib import add_media_to_collection
86bb44ef	36
be1f0f7d E	37
	38	_log = logging.getLogger(__name__)
	39
	40
2ef2f46e E	41	def check_file_field(request, field_name):
	42	"""Check if a file field meets minimal criteria"""
	43	retval = (field_name in request.files
	44	and isinstance(request.files[field_name], FileStorage)
	45	and request.files[field_name].stream)
	46	if not retval:
	47	_log.debug("Form did not contain proper file field %s", field_name)
	48	return retval
	49
	50
6c1467d5 E	51	def new_upload_entry(user):
	52	"""
	53	Create a new MediaEntry for uploading
	54	"""
	55	entry = MediaEntry()
0f3bf8d4	56	entry.actor = user.id
6c1467d5 E	57	entry.license = user.license_preference
	58	return entry
	59
	60
5d754da7 CAW	61	def get_upload_file_limits(user):
	62	"""
	63	Get the upload_limit and max_file_size for this user
	64	"""
cda3055b	65	if user.upload_limit is not None and user.upload_limit >= 0: # TODO: debug this
5d754da7 CAW	66	upload_limit = user.upload_limit
	67	else:
	68	upload_limit = mg_globals.app_config.get('upload_limit', None)
	69
	70	max_file_size = mg_globals.app_config.get('max_file_size', None)
	71
	72	return upload_limit, max_file_size
	73
	74
9e15c674 CAW	75	class UploadLimitError(Exception):
	76	"""
	77	General exception for when an upload will be over some upload limit
	78	"""
	79	pass
	80
	81
	82	class FileUploadLimit(UploadLimitError):
	83	"""
	84	This file is over the site upload limit
	85	"""
	86	pass
	87
	88
	89	class UserUploadLimit(UploadLimitError):
	90	"""
	91	This file is over the user's particular upload limit
	92	"""
	93	pass
	94
	95
	96	class UserPastUploadLimit(UploadLimitError):
	97	"""
	98	The user is already past their upload limit!
	99	"""
	100	pass
	101
	102
	103
1779a070	104	def submit_media(mg_app, user, submitted_file, filename,
a3c48024	105	title=None, description=None, collection_slug=None,
45f426dd	106	license=None, metadata=None, tags_string=u"",
d216d771	107	callback_url=None, urlgen=None,):
5202924c CAW	108	"""
	109	Args:
	110	- mg_app: The MediaGoblinApp instantiated for this process
	111	- user: the user object this media entry should be associated with
	112	- submitted_file: the file-like object that has the
	113	being-submitted file data in it (this object should really have
	114	a .name attribute which is the filename on disk!)
	115	- filename: the original filename of this. Not necessarily the
	116	one on disk being referenced by submitted_file.
	117	- title: title for this media entry
	118	- description: description for this media entry
a3c48024	119	- collection_slug: collection for this media entry
5202924c CAW	120	- license: license for this media entry
	121	- tags_string: comma separated string of tags to be associated
	122	with this entry
131b7495	123	- callback_url: possible post-hook to call after submission
d216d771 JT	124	- urlgen: if provided, used to do the feed_url update and assign a public
d216d771 JT	125	ID used in the API (very important).
5202924c	126	"""
6c067857	127	upload_limit, max_file_size = get_upload_file_limits(user)
9e15c674 CAW	128	if upload_limit and user.uploaded >= upload_limit:
	129	raise UserPastUploadLimit()
	130
1779a070 CAW	131	# If the filename contains non ascii generate a unique name
1779a070 CAW	132	if not all(ord(c) < 128 for c in filename):
e49b7e02	133	filename = six.text_type(uuid.uuid4()) + splitext(filename)[-1]
1779a070 CAW	134
	135	# Sniff the submitted media to determine which
	136	# media plugin should handle processing
301da9ca	137	media_type, media_manager = sniff_media(submitted_file, filename)
1779a070 CAW	138
	139	# create entry and save in database
	140	entry = new_upload_entry(user)
	141	entry.media_type = media_type
e49b7e02	142	entry.title = (title or six.text_type(splitext(filename)[0]))
1779a070	143
cb7716f3	144	entry.description = description or u""
1779a070 CAW	145
	146	entry.license = license or None
	147
2daf8ec0	148	entry.media_metadata = metadata or {}
45f426dd	149
1779a070 CAW	150	# Process the user's folksonomy "tags"
	151	entry.tags = convert_to_tag_list_of_dicts(tags_string)
	152
	153	# Generate a slug from the title
	154	entry.generate_slug()
	155
	156	queue_file = prepare_queue_task(mg_app, entry, filename)
	157
	158	with queue_file:
2b4c339d	159	queue_file.write(submitted_file)
1779a070 CAW	160
	161	# Get file size and round to 2 decimal places
	162	file_size = mg_app.queue_store.get_file_size(
	163	entry.queued_media_file) / (1024.0 * 1024)
	164	file_size = float('{0:.2f}'.format(file_size))
	165
1779a070 CAW	166	# Check if file size is over the limit
1779a070 CAW	167	if max_file_size and file_size >= max_file_size:
9e15c674	168	raise FileUploadLimit()
1779a070 CAW	169
	170	# Check if user is over upload limit
	171	if upload_limit and (user.uploaded + file_size) >= upload_limit:
9e15c674 CAW	172	raise UserUploadLimit()
	173
	174	user.uploaded = user.uploaded + file_size
	175	user.save()
	176
	177	entry.file_size = file_size
	178
	179	# Save now so we have this data before kicking off processing
	180	entry.save()
	181
131b7495 CAW	182	# Various "submit to stuff" things, callbackurl and this silly urlgen
	183	# thing
	184	if callback_url:
	185	metadata = ProcessingMetaData()
	186	metadata.media_entry = entry
	187	metadata.callback_url = callback_url
	188	metadata.save()
	189
9e15c674	190	if urlgen:
d216d771 JT	191	# Generate the public_id, this is very importent, especially relating
	192	# to deletion, it allows the shell to be accessable post-delete!
	193	entry.get_public_id(urlgen)
	194
	195	# Generate the feed URL
9e15c674	196	feed_url = urlgen(
1779a070	197	'mediagoblin.user_pages.atom_feed',
9e15c674 CAW	198	qualified=True, user=user.username)
	199	else:
	200	feed_url = None
1779a070	201
bc2c06a1 JT	202	add_comment_subscription(user, entry)
	203
	204	# Create activity
0f3bf8d4	205	create_activity("post", entry, entry.actor)
bc2c06a1 JT	206	entry.save()
bc2c06a1 JT	207
a3c48024 SP	208	# add to collection
a3c48024 SP	209	if collection_slug:
f86dafe2 BB	210	collection = Collection.query.filter_by(slug=collection_slug,
f86dafe2 BB	211	actor=user.id).first()
e119aed2 SP	212	if collection:
e119aed2 SP	213	add_media_to_collection(collection, entry)
a3c48024	214
9e15c674 CAW	215	# Pass off to processing
	216	#
	217	# (... don't change entry after this point to avoid race
	218	# conditions with changes to the document via processing code)
	219	run_process_media(entry, feed_url)
1779a070	220
5d754da7 CAW	221	return entry
5d754da7 CAW	222
1779a070	223
b228d897 E	224	def prepare_queue_task(app, entry, filename):
	225	"""
	226	Prepare a MediaEntry for the processing queue and get a queue file
	227	"""
cec9648c	228	# We generate this ourselves so we know what the task id is for
8eb47d02 E	229	# retrieval later.
	230
	231	# (If we got it off the task's auto-generation, there'd be
	232	# a risk of a race condition when we'd save after sending
	233	# off the task)
e49b7e02	234	task_id = six.text_type(uuid.uuid4())
8eb47d02 E	235	entry.queued_task_id = task_id
	236
	237	# Now store generate the queueing related filename
b228d897	238	queue_filepath = app.queue_store.get_unique_filepath(
8eb47d02 E	239	['media_entries',
	240	task_id,
	241	secure_filename(filename)])
	242
	243	# queue appropriately
b228d897	244	queue_file = app.queue_store.get_file(
8eb47d02 E	245	queue_filepath, 'wb')
	246
	247	# Add queued filename to the entry
	248	entry.queued_media_file = queue_filepath
	249
	250	return queue_file
	251
	252
77ea4c9b	253	def run_process_media(entry, feed_url=None,
98d1fa3b	254	reprocess_action="initial", reprocess_info=None):
c7b3d070 SS	255	"""Process the media asynchronously
	256
	257	:param entry: MediaEntry() instance to be processed.
	258	:param feed_url: A string indicating the feed_url that the PuSH servers
	259	should be notified of. This will be sth like: `request.urlgen(
	260	'mediagoblin.user_pages.atom_feed',qualified=True,
9a2c66ca	261	user=request.user.username)`
77ea4c9b CAW	262	:param reprocess_action: What particular action should be run.
77ea4c9b CAW	263	:param reprocess_info: A dict containing all of the necessary reprocessing
9a2c66ca	264	info for the given media_type"""
81c59ef0	265
81c59ef0	266	entry, manager = get_entry_and_processing_manager(entry.id)
81c59ef0	267
86bb44ef	268	try:
33d5ac6c	269	wf = manager.workflow(entry, feed_url, reprocess_action, reprocess_info)
	270	if wf is None:
	271	ProcessMedia().apply_async(
	272	[entry.id, feed_url, reprocess_action, reprocess_info], {},
	273	task_id=entry.queued_task_id)
86bb44ef E	274	except BaseException as exc:
	275	# The purpose of this section is because when running in "lazy"
	276	# or always-eager-with-exceptions-propagated celery mode that
	277	# the failure handling won't happen on Celery end. Since we
	278	# expect a lot of users to run things in this way we have to
	279	# capture stuff here.
	280	#
	281	# ... not completely the diaper pattern because the
	282	# exception is re-raised :)
	283	mark_entry_failed(entry.id, exc)
	284	# re-raise the exception
	285	raise
5e5d4458 JT	286
	287
	288	def api_upload_request(request, file_data, entry):
	289	""" This handles a image upload request """
	290	# Use the same kind of method from mediagoblin/submit/views:submit_start
	291	entry.title = file_data.filename
9246a6ba JT	292
	293	# This will be set later but currently we just don't have enough information
	294	entry.slug = None
5e5d4458	295
64a456a4 JT	296	# This is a MUST.
	297	entry.get_public_id(request.urlgen)
	298
5e5d4458 JT	299	queue_file = prepare_queue_task(request.app, entry, file_data.filename)
	300	with queue_file:
	301	queue_file.write(request.data)
	302
	303	entry.save()
	304	return json_response(entry.serialize(request))
	305
	306	def api_add_to_feed(request, entry):
	307	""" Add media to Feed """
5e5d4458 JT	308	feed_url = request.urlgen(
5e5d4458 JT	309	'mediagoblin.user_pages.atom_feed',
9246a6ba JT	310	qualified=True, user=request.user.username
9246a6ba JT	311	)
5e5d4458	312
5e5d4458	313	add_comment_subscription(request.user, entry)
6d36f75f	314
b9492011	315	# Create activity
35885226	316	activity = create_activity(
5436d980 JT	317	verb="post",
5436d980 JT	318	obj=entry,
0f3bf8d4	319	actor=entry.actor,
5436d980 JT	320	generator=create_generator(request)
5436d980 JT	321	)
6d36f75f	322	entry.save()
bc2c06a1	323	run_process_media(entry, feed_url)
b9492011	324
35885226	325	return activity