Starting to add metadata tools, as well as mediagoblin's schema
authorChristopher Allan Webber <cwebber@dustycloud.org>
Wed, 7 May 2014 23:41:34 +0000 (18:41 -0500)
committerChristopher Allan Webber <cwebber@dustycloud.org>
Wed, 7 May 2014 23:41:34 +0000 (18:41 -0500)
mediagoblin/static/metadata/mediagoblin-0.1.dev.jsonld [new file with mode: 0644]
mediagoblin/tools/metadata.py [new file with mode: 0644]

diff --git a/mediagoblin/static/metadata/mediagoblin-0.1.dev.jsonld b/mediagoblin/static/metadata/mediagoblin-0.1.dev.jsonld
new file mode 100644 (file)
index 0000000..20a71b5
--- /dev/null
@@ -0,0 +1,47 @@
+{
+  "@context": {
+    "qb": "http://purl.org/linked-data/cube#",
+    "grddl": "http://www.w3.org/2003/g/data-view#",
+    "ma": "http://www.w3.org/ns/ma-ont#",
+    "owl": "http://www.w3.org/2002/07/owl#",
+    "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+    "rdfa": "http://www.w3.org/ns/rdfa#",
+    "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+    "rif": "http://www.w3.org/2007/rif#",
+    "rr": "http://www.w3.org/ns/r2rml#",
+    "skos": "http://www.w3.org/2004/02/skos/core#",
+    "skosxl": "http://www.w3.org/2008/05/skos-xl#",
+    "wdr": "http://www.w3.org/2007/05/powder#",
+    "void": "http://rdfs.org/ns/void#",
+    "wdrs": "http://www.w3.org/2007/05/powder-s#",
+    "xhv": "http://www.w3.org/1999/xhtml/vocab#",
+    "xml": "http://www.w3.org/XML/1998/namespace",
+    "xsd": "http://www.w3.org/2001/XMLSchema#",
+    "prov": "http://www.w3.org/ns/prov#",
+    "sd": "http://www.w3.org/ns/sparql-service-description#",
+    "org": "http://www.w3.org/ns/org#",
+    "gldp": "http://www.w3.org/ns/people#",
+    "cnt": "http://www.w3.org/2008/content#",
+    "dcat": "http://www.w3.org/ns/dcat#",
+    "earl": "http://www.w3.org/ns/earl#",
+    "ht": "http://www.w3.org/2006/http#",
+    "ptr": "http://www.w3.org/2009/pointers#",
+    "cc": "http://creativecommons.org/ns#",
+    "ctag": "http://commontag.org/ns#",
+    "dc": "http://purl.org/dc/terms/",
+    "dc11": "http://purl.org/dc/elements/1.1/",
+    "dcterms": "http://purl.org/dc/terms/",
+    "foaf": "http://xmlns.com/foaf/0.1/",
+    "gr": "http://purl.org/goodrelations/v1#",
+    "ical": "http://www.w3.org/2002/12/cal/icaltzd#",
+    "og": "http://ogp.me/ns#",
+    "rev": "http://purl.org/stuff/rev#",
+    "sioc": "http://rdfs.org/sioc/ns#",
+    "v": "http://rdf.data-vocabulary.org/#",
+    "vcard": "http://www.w3.org/2006/vcard/ns#",
+    "schema": "http://schema.org/",
+    "describedby": "http://www.w3.org/2007/05/powder-s#describedby",
+    "license": "http://www.w3.org/1999/xhtml/vocab#license",
+    "role": "http://www.w3.org/1999/xhtml/vocab#role"
+  }
+}
diff --git a/mediagoblin/tools/metadata.py b/mediagoblin/tools/metadata.py
new file mode 100644 (file)
index 0000000..428e425
--- /dev/null
@@ -0,0 +1,106 @@
+# GNU MediaGoblin -- federated, autonomous media hosting
+# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+import os
+import copy
+import json
+import re
+from pkg_resources import resource_filename
+
+import dateutil.parser
+from pyld import jsonld
+from jsonschema import validate, FormatChecker, draft4_format_checker
+from jsonschema.compat import str_types
+
+
+MEDIAGOBLIN_CONTEXT_PATH = resource_filename(
+    "mediagoblin",
+    os.path.sep.join(["static", "metadata", "mediagoblin-0.1.dev.jsonld"]))
+MEDIAGOBLIN_CONTEXT = json.loads(file(MEDIAGOBLIN_CONTEXT_PATH).read())
+
+
+########################################################
+## Set up the MediaGoblin format checker for json-schema
+########################################################
+
+URL_REGEX = re.compile(
+    r'^[a-z]+://([^/:]+|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?$',
+    re.IGNORECASE)
+
+def is_uri(instance):
+    """
+    jsonschema uri validator
+    """
+    if not isinstance(instance, str_types):
+        return True
+
+    return URL_REGEX.match(instance)
+
+def is_datetime(instance):
+    """
+    Is a date or datetime readable string.
+    """
+    if not isinstance(instance, str_types):
+        return True
+
+    return dateutil.parser.parse(instance)
+
+
+class DefaultChecker(FormatChecker):
+    """
+    Default MediaGoblin format checker... extended to include a few extra things
+    """
+    checkers = copy.deepcopy(draft4_format_checker.checkers)
+
+
+DefaultChecker.checkers[u"uri"] = (is_uri, ())
+DefaultChecker.checkers[u"date-time"] = (is_datetime, (ValueError, TypeError))
+DEFAULT_CHECKER = DefaultChecker()
+
+# Crappy default schema, checks for things we deem important
+
+DEFAULT_SCHEMA = {
+    "$schema": "http://json-schema.org/schema#",
+
+    "type": "object",
+    "properties": {
+        "dcterms:rights": {
+            "format": "uri",
+            "type": "string",
+        },
+        "dcterms:created": {
+            "format": "date-time",
+            "type": "string",
+        }
+    },
+}
+
+
+def compact_and_validate(metadata, context=MEDIAGOBLIN_CONTEXT,
+                         schema=DEFAULT_SCHEMA):
+    """
+    compact json with supplied context, check against schema for errors
+
+    raises an exception (jsonschema.exceptions.ValidationError) if
+    there's an error.
+
+    You may wish to do this validation yourself... this is just for convenience.
+    """
+    compacted = jsonld.compact(metadata, context)
+    validate(metadata, schema, format_checker=DEFAULT_CHECKER)
+
+    return compacted