Merge branch 'master' of gitorious.org:mediagoblin/mediagoblin into metadata

author tilly-Q <nattilypigeonfowl@gmail.com>

Tue, 13 May 2014 19:24:59 +0000 (15:24 -0400)

committer tilly-Q <nattilypigeonfowl@gmail.com>

Tue, 13 May 2014 19:24:59 +0000 (15:24 -0400)
author tilly-Q <nattilypigeonfowl@gmail.com>
Tue, 13 May 2014 19:24:59 +0000 (15:24 -0400)
committer tilly-Q <nattilypigeonfowl@gmail.com>
Tue, 13 May 2014 19:24:59 +0000 (15:24 -0400)
diff --cc docs/source/siteadmin/commandline-upload.rst

index d67c19dd60aa53135bca8466a281365e94c199e6,be19df5835b555619c6a2af61901da303f9a7ec7..742c0cb2009b20589c8c07b5bb12be86cb6d7bbf
--- 1/docs/source/siteadmin/commandline-upload.rst
--- 2/docs/source/siteadmin/commandline-upload.rst
+++ b/docs/source/siteadmin/commandline-upload.rst
@@@ -39,54 -39,3 +39,54 @@@ You can also pass in the `--celery` opt
   your media be passed over to celery to be processed rather than be
   processed immediately.
   
- The media:location column is the one column that is absolutely necessary for 
+ +============================
+ +Command-line batch uploading
+ +============================
+ +
+ +There's another way to submit media, and it can be much more powerful, although
+ +it is a bit more complex.
+ +
+ +  ./bin/gmg batchaddmedia admin /path/to/your/metadata.csv
+ +
+ +This is an example of what a script may look like. The important part here is
+ +that you have to create the 'metadata.csv' file.::
+ +
+ +  media:location,dcterms:title,dcterms:creator,dcterms:type
+ +  "http://www.example.net/path/to/nap.png","Goblin taking a nap",,"Image"
+ +  "http://www.example.net/path/to/snore.ogg","Goblin Snoring","Me","Audio"
+ +
+ +The above is an example of a very simple metadata.csv file. The batchaddmedia
+ +script would read this and attempt to upload only two pieces of media, and would
+ +be able to automatically name them appropriately.
+ +
+ +The csv file
+ +============
+ +The media:location column
+ +-------------------------
- can either a path to a local file or a direct link to remote media (with the 
++The media:location column is the one column that is absolutely necessary for
+ +uploading your media. This gives a path to each piece of media you upload. This
- Our metadata system accepts any information provided for in the 
++can either a path to a local file or a direct link to remote media (with the
+ +link in http format). As you can see in the example above the (fake) media was
+ +stored remotely on "www.example.net".
+ +
+ +Other columns
+ +-------------
+ +Other columns can be used to provide detailed metadata about each media entry.
- You can of course, change these values later. 
++Our metadata system accepts any information provided for in the
+ +`RDFa Core Initial Context`_, and the batchupload script recognizes all of the
+ +resources provided within it.
+ +
+ +.. _RDFa Core Initial Context: http://www.w3.org/2011/rdfa-context/rdfa-1.1
+ +
+ +The uploader may include the metadata for each piece of media, or
+ +leave them blank if they want to. A few columns from `Dublin Core`_ are
+ +notable because the batchaddmedia script uses them to set the default
+ +information of uploaded media entries.
+ +
+ +.. _Dublin Core: http://wiki.dublincore.org/index.php/User_Guide
+ +
+ +- **dc:title** sets a title for your media entry. If this is left blank, the media entry will be named according to the filename of the file being uploaded.
+ +- **dc:description** sets a description of your media entry. If this is left blank the media entry's description will not be filled in.
+ +- **dc:rights** will set a license for your media entry `if` the data provided is a valid URI. If this is left blank 'All Rights Reserved' will be selected.
+ +
++You can of course, change these values later.
diff --cc mediagoblin/gmg_commands/__init__.py

index 55e85116c2f934b49a94f3f60062cdbc368452c2,9de4130ebcfb072b7e4c9660d3c7dfda2de3393a..fd546aac1e1bc6b84471e53b470f6d0d3e9c672d
--- 1/mediagoblin/gmg_commands/__init__.py
--- 2/mediagoblin/gmg_commands/__init__.py
+++ b/mediagoblin/gmg_commands/__init__.py
@@@ -53,10 -53,10 +53,14 @@@ SUBCOMMAND_MAP = 
           'setup': 'mediagoblin.gmg_commands.addmedia:parser_setup',
           'func': 'mediagoblin.gmg_commands.addmedia:addmedia',
           'help': 'Reprocess media entries'},
-         'help': 'Add many media entries at once'}
+     'deletemedia': {
+         'setup': 'mediagoblin.gmg_commands.deletemedia:parser_setup',
+         'func': 'mediagoblin.gmg_commands.deletemedia:deletemedia',
+         'help': 'Delete media entries'},
+ +    'batchaddmedia': {
+ +        'setup': 'mediagoblin.gmg_commands.batchaddmedia:parser_setup',
+ +        'func': 'mediagoblin.gmg_commands.batchaddmedia:batchaddmedia',
++        'help': 'Add many media entries at once'},
       # 'theme': {
       #     'setup': 'mediagoblin.gmg_commands.theme:theme_parser_setup',
       #     'func': 'mediagoblin.gmg_commands.theme:theme',
diff --cc mediagoblin/plugins/metadata_display/templates/mediagoblin/plugins/metadata_display/bits/metadata_extra_head.html

index 84eedf18729dc7cc800ad58c21915deb447c312f,0000000000000000000000000000000000000000..4a38029926c3db611cf911996637a456de605cac

mode 100644,000000..100644
--- 1/mediagoblin/plugins/metadata_display/templates/mediagoblin/plugins/metadata_display/bits/metadata_extra_head.html
--- /dev/null
+++ b/mediagoblin/plugins/metadata_display/templates/mediagoblin/plugins/metadata_display/bits/metadata_extra_head.html
@@@ -1,3 -1,0 +1,3 @@@
-           href="{{ request.staticdirect('css/metadata_display.css', 
+ +    <link rel="stylesheet" type="text/css"
++          href="{{ request.staticdirect('css/metadata_display.css',
+ +                                        'metadata_display') }}"/>
diff --cc mediagoblin/tools/metadata.py

index 3f10e9d12c23b4de6a1aeaaeea98a6e083cb5cb3,0000000000000000000000000000000000000000..b0cad9dafdf5c6bdddd811c27772ee1bfe4e919b

mode 100644,000000..100644
--- 1/mediagoblin/tools/metadata.py
--- /dev/null
+++ b/mediagoblin/tools/metadata.py
@@@ -1,218 -1,0 +1,218 @@@
-         return _CONTEXT_CACHE[url]        
+ +# GNU MediaGoblin -- federated, autonomous media hosting
+ +# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+ +#
+ +# This program is free software: you can redistribute it and/or modify
+ +# it under the terms of the GNU Affero General Public License as published by
+ +# the Free Software Foundation, either version 3 of the License, or
+ +# (at your option) any later version.
+ +#
+ +# This program is distributed in the hope that it will be useful,
+ +# but WITHOUT ANY WARRANTY; without even the implied warranty of
+ +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ +# GNU Affero General Public License for more details.
+ +#
+ +# You should have received a copy of the GNU Affero General Public License
+ +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ +
+ +
+ +import os
+ +import copy
+ +import json
+ +import re
+ +from pkg_resources import resource_filename
+ +
+ +import dateutil.parser
+ +from pyld import jsonld
+ +from jsonschema import validate, FormatChecker, draft4_format_checker
+ +from jsonschema.compat import str_types
+ +
+ +from mediagoblin.tools.pluginapi import hook_handle
+ +
+ +
+ +
+ +########################################################
+ +## Set up the MediaGoblin format checker for json-schema
+ +########################################################
+ +
+ +URL_REGEX = re.compile(
+ +    r'^[a-z]+://([^/:]+|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?$',
+ +    re.IGNORECASE)
+ +
+ +def is_uri(instance):
+ +    """
+ +    jsonschema uri validator
+ +    """
+ +    if not isinstance(instance, str_types):
+ +        return True
+ +
+ +    return URL_REGEX.match(instance)
+ +
+ +def is_datetime(instance):
+ +    """
+ +    Is a date or datetime readable string.
+ +    """
+ +    if not isinstance(instance, str_types):
+ +        return True
+ +
+ +    return dateutil.parser.parse(instance)
+ +
+ +
+ +class DefaultChecker(FormatChecker):
+ +    """
+ +    Default MediaGoblin format checker... extended to include a few extra things
+ +    """
+ +    checkers = copy.deepcopy(draft4_format_checker.checkers)
+ +
+ +
+ +DefaultChecker.checkers[u"uri"] = (is_uri, ())
+ +DefaultChecker.checkers[u"date-time"] = (is_datetime, (ValueError, TypeError))
+ +DEFAULT_CHECKER = DefaultChecker()
+ +
+ +# Crappy default schema, checks for things we deem important
+ +
+ +DEFAULT_SCHEMA = {
+ +    "$schema": "http://json-schema.org/schema#",
+ +
+ +    "type": "object",
+ +    "properties": {
+ +        "license": {
+ +            "format": "uri",
+ +            "type": "string",
+ +        },
+ +        "dcterms:created": {
+ +            "format": "date-time",
+ +            "type": "string",
+ +        }
+ +    },
+ +}
+ +
+ +
+ +def load_resource(package, resource_path):
+ +    """
+ +    Load a resource, return it as a string.
+ +
+ +    Args:
+ +    - package: package or module name.  Eg "mediagoblin.media_types.audio"
+ +    - resource_path: path to get to this resource, a list of
+ +      directories and finally a filename.  Will be joined with
+ +      os.path.sep.
+ +    """
+ +    filename = resource_filename(package, os.path.sep.join(resource_path))
+ +    return file(filename).read()
+ +
+ +def load_resource_json(package, resource_path):
+ +    """
+ +    Load a resource json file, return a dictionary.
+ +
+ +    Args:
+ +    - package: package or module name.  Eg "mediagoblin.media_types.audio"
+ +    - resource_path: path to get to this resource, a list of
+ +      directories and finally a filename.  Will be joined with
+ +      os.path.sep.
+ +    """
+ +    return json.loads(load_resource(package, resource_path))
+ +
+ +
+ +##################################
+ +## Load the MediaGoblin core files
+ +##################################
+ +
+ +
+ +BUILTIN_CONTEXTS = {
+ +    "http://www.w3.org/2013/json-ld-context/rdfa11": load_resource(
+ +        "mediagoblin", ["static", "metadata", "rdfa11.jsonld"])}
+ +
+ +
+ +_CONTEXT_CACHE = {}
+ +
+ +def load_context(url):
+ +    """
+ +    A self-aware document loader.  For those contexts MediaGoblin
+ +    stores internally, load them from disk.
+ +    """
+ +    if url in _CONTEXT_CACHE:
++        return _CONTEXT_CACHE[url]
+ +
+ +    # See if it's one of our basic ones
+ +    document = BUILTIN_CONTEXTS.get(url, None)
+ +
+ +    # No?  See if we have an internal schema for this
+ +    if document is None:
+ +        document = hook_handle(("context_url_data", url))
+ +
+ +    # Okay, if we've gotten a document by now... let's package it up
+ +    if document is not None:
+ +        document = {'contextUrl': None,
+ +                    'documentUrl': url,
+ +                    'document': document}
+ +
+ +    # Otherwise, use jsonld.load_document
+ +    else:
+ +        document = jsonld.load_document(url)
+ +
+ +    # cache
+ +    _CONTEXT_CACHE[url] = document
+ +    return document
+ +
+ +
+ +DEFAULT_CONTEXT = "http://www.w3.org/2013/json-ld-context/rdfa11"
+ +
+ +def compact_json(metadata, context=DEFAULT_CONTEXT):
+ +    """
+ +    Compact json with supplied context.
+ +
+ +    Note: Free floating" nodes are removed (eg a key just named
+ +    "bazzzzzz" which isn't specified in the context... something like
+ +    bazzzzzz:blerp will stay though.  This is jsonld.compact behavior.
+ +    """
+ +    compacted = jsonld.compact(
+ +        metadata, context,
+ +        options={
+ +            "documentLoader": load_context,
+ +            # This allows for things like "license" and etc to be preserved
+ +            "expandContext": context,
+ +            "keepFreeFloatingNodes": False})
+ +
+ +    return compacted
+ +
+ +
+ +def compact_and_validate(metadata, context=DEFAULT_CONTEXT,
+ +                         schema=DEFAULT_SCHEMA):
+ +    """
+ +    compact json with supplied context, check against schema for errors
+ +
+ +    raises an exception (jsonschema.exceptions.ValidationError) if
+ +    there's an error.
+ +
+ +    Note: Free floating" nodes are removed (eg a key just named
+ +    "bazzzzzz" which isn't specified in the context... something like
+ +    bazzzzzz:blerp will stay though.  This is jsonld.compact behavior.
+ +
+ +    You may wish to do this validation yourself... this is just for convenience.
+ +    """
+ +    compacted = compact_json(metadata, context)
+ +    validate(metadata, schema, format_checker=DEFAULT_CHECKER)
+ +
+ +    return compacted
+ +
+ +
+ +def expand_json(metadata, context=DEFAULT_CONTEXT):
+ +    """
+ +    Expand json, but be sure to use our documentLoader.
+ +
+ +    By default this expands with DEFAULT_CONTEXT, but if you do not need this,
+ +    you can safely set this to None.
+ +
+ +    # @@: Is the above a good idea?  Maybe it should be set to None by
+ +    #   default.
+ +    """
+ +    options = {
+ +        "documentLoader": load_context}
+ +    if context is not None:
+ +        options["expandContext"] = context
+ +    return jsonld.expand(metadata, options=options)
+ +
+ +
+ +def rdfa_to_readable(rdfa_predicate):
+ +    readable = rdfa_predicate.split(u":")[1].capitalize()
+ +    return readable
author	tilly-Q <nattilypigeonfowl@gmail.com>
	Tue, 13 May 2014 19:24:59 +0000 (15:24 -0400)
committer	tilly-Q <nattilypigeonfowl@gmail.com>
	Tue, 13 May 2014 19:24:59 +0000 (15:24 -0400)
		1	2
docs/source/siteadmin/commandline-upload.rst	patch \|	diff1 \|	diff2 \|	blob \| history
mediagoblin/gmg_commands/__init__.py	patch \|	diff1 \|	diff2 \|	blob \| history
mediagoblin/plugins/metadata_display/templates/mediagoblin/plugins/metadata_display/bits/metadata_extra_head.html	patch \|	diff1 \|	\|	blob \| history
mediagoblin/tools/metadata.py	patch \|	diff1 \|	\|	blob \| history