1 # GNU MediaGoblin -- federated, autonomous media hosting
2 # Copyright (C) 2011, 2012 MediaGoblin contributors. See AUTHORS.
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
22 from pkg_resources
import resource_filename
24 import dateutil
.parser
25 from pyld
import jsonld
26 from jsonschema
import validate
, FormatChecker
, draft4_format_checker
27 from jsonschema
.compat
import str_types
29 from mediagoblin
.tools
.pluginapi
import hook_handle
33 ########################################################
34 ## Set up the MediaGoblin format checker for json-schema
35 ########################################################
37 URL_REGEX
= re
.compile(
38 r
'^[a-z]+://([^/:]+|([0-9]{1,3}\.){3}[0-9]{1,3})(:[0-9]+)?(\/.*)?$',
43 jsonschema uri validator
45 if not isinstance(instance
, str_types
):
48 return URL_REGEX
.match(instance
)
50 def is_datetime(instance
):
52 Is a date or datetime readable string.
54 if not isinstance(instance
, str_types
):
57 return dateutil
.parser
.parse(instance
)
60 class DefaultChecker(FormatChecker
):
62 Default MediaGoblin format checker... extended to include a few extra things
64 checkers
= copy
.deepcopy(draft4_format_checker
.checkers
)
67 DefaultChecker
.checkers
[u
"uri"] = (is_uri
, ())
68 DefaultChecker
.checkers
[u
"date-time"] = (is_datetime
, (ValueError, TypeError))
69 DEFAULT_CHECKER
= DefaultChecker()
71 # Crappy default schema, checks for things we deem important
74 "$schema": "http://json-schema.org/schema#",
83 "format": "date-time",
87 "format": "date-time",
94 def load_resource(package
, resource_path
):
96 Load a resource, return it as a string.
99 - package: package or module name. Eg "mediagoblin.media_types.audio"
100 - resource_path: path to get to this resource, a list of
101 directories and finally a filename. Will be joined with
104 filename
= resource_filename(package
, os
.path
.sep
.join(resource_path
))
105 return file(filename
).read()
107 def load_resource_json(package
, resource_path
):
109 Load a resource json file, return a dictionary.
112 - package: package or module name. Eg "mediagoblin.media_types.audio"
113 - resource_path: path to get to this resource, a list of
114 directories and finally a filename. Will be joined with
117 return json
.loads(load_resource(package
, resource_path
))
120 ##################################
121 ## Load the MediaGoblin core files
122 ##################################
126 "http://www.w3.org/2013/json-ld-context/rdfa11": load_resource(
127 "mediagoblin", ["static", "metadata", "rdfa11.jsonld"])}
132 def load_context(url
):
134 A self-aware document loader. For those contexts MediaGoblin
135 stores internally, load them from disk.
137 if url
in _CONTEXT_CACHE
:
138 return _CONTEXT_CACHE
[url
]
140 # See if it's one of our basic ones
141 document
= BUILTIN_CONTEXTS
.get(url
, None)
143 # No? See if we have an internal schema for this
145 document
= hook_handle(("context_url_data", url
))
147 # Okay, if we've gotten a document by now... let's package it up
148 if document
is not None:
149 document
= {'contextUrl': None,
151 'document': document
}
153 # Otherwise, use jsonld.load_document
155 document
= jsonld
.load_document(url
)
158 _CONTEXT_CACHE
[url
] = document
162 DEFAULT_CONTEXT
= "http://www.w3.org/2013/json-ld-context/rdfa11"
164 def compact_json(metadata
, context
=DEFAULT_CONTEXT
):
166 Compact json with supplied context.
168 Note: Free floating" nodes are removed (eg a key just named
169 "bazzzzzz" which isn't specified in the context... something like
170 bazzzzzz:blerp will stay though. This is jsonld.compact behavior.
172 compacted
= jsonld
.compact(
175 "documentLoader": load_context
,
176 # This allows for things like "license" and etc to be preserved
177 "expandContext": context
,
178 "keepFreeFloatingNodes": False})
183 def compact_and_validate(metadata
, context
=DEFAULT_CONTEXT
,
184 schema
=DEFAULT_SCHEMA
):
186 compact json with supplied context, check against schema for errors
188 raises an exception (jsonschema.exceptions.ValidationError) if
191 Note: Free floating" nodes are removed (eg a key just named
192 "bazzzzzz" which isn't specified in the context... something like
193 bazzzzzz:blerp will stay though. This is jsonld.compact behavior.
195 You may wish to do this validation yourself... this is just for convenience.
197 compacted
= compact_json(metadata
, context
)
198 validate(metadata
, schema
, format_checker
=DEFAULT_CHECKER
)
203 def expand_json(metadata
, context
=DEFAULT_CONTEXT
):
205 Expand json, but be sure to use our documentLoader.
207 By default this expands with DEFAULT_CONTEXT, but if you do not need this,
208 you can safely set this to None.
210 # @@: Is the above a good idea? Maybe it should be set to None by
214 "documentLoader": load_context
}
215 if context
is not None:
216 options
["expandContext"] = context
217 return jsonld
.expand(metadata
, options
=options
)
220 def rdfa_to_readable(rdfa_predicate
):
221 readable
= rdfa_predicate
.split(u
":")[1].capitalize()