From 94fe202d1b633f5f472a237df0875e6207055138 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 25 Oct 2017 11:05:19 -0700 Subject: [PATCH] pull: Add {scheme}/{id}.json hardlinks Putting additional metadata in the licenses.json index is a slippery-slope. This commit strips it down to an array of IDs, and *all* per-license metadata must be fetched via a single-license endpoint. The new per-scheme links allow folks to retrieve per-license metadata using their chosen scheme (assuming they trust the mapping maintained in this API) without having to iterate over licenses.json retrieving {FSF-id}.json until they find a match. The OSI API uses the same approach [1], although they currently have a fatter index [2,3,4,5]. os.link is implemented on Unix and Windows [6]. The ** recursive glob pattern is new in Python 3.5 [7]; for older Pythons I'm falling back to two non-recursive glob calls. [1]: https://github.com/OpenSourceOrg/api/blob/c903651ef26c35202d6561b61b97d29ead1e08c5/doc/endpoints.md#licenseschemeidentifier [2]: https://github.com/OpenSourceOrg/api/blob/c903651ef26c35202d6561b61b97d29ead1e08c5/doc/endpoints.md#licenses [3]: https://github.com/OpenSourceOrg/api/blob/c903651ef26c35202d6561b61b97d29ead1e08c5/api.go#L52 [4]: https://github.com/OpenSourceOrg/api/blob/c903651ef26c35202d6561b61b97d29ead1e08c5/reload.go#L28 [5]: https://github.com/OpenSourceOrg/api/blob/c903651ef26c35202d6561b61b97d29ead1e08c5/license/license.go#L67 [6]: https://docs.python.org/3.6/library/os.html#os.link [7]: https://docs.python.org/3.6/library/glob.html#glob.glob --- README.md | 18 +++++++++++++++++- pull.py | 26 ++++++++++++++++++-------- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ac90c56..0cdc83a 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,22 @@ Ideally we'll hand this repository over to the FSF once they're ready to maintai You can pull the set of identifiers from [https://wking.github.io/fsf-api/licenses.json](https://wking.github.io/fsf-api/licenses.json). -You can pull an individual license from `https://wking.github.io/fsf-api/{id}.json`, for example [https://wking.github.io/fsf-api/Expat.json](https://wking.github.io/fsf-api/Expat.json). +You can pull an individual license from a few places: + +* Using their FSF ID: + + https://wking.github.io/fsf-api/{id}.json + + For example [https://wking.github.io/fsf-api/Expat.json](https://wking.github.io/fsf-api/Expat.json). + +* Using a non-FSF ID, according to the mapping between other scheme and the FSF scheme asserted by this API: + + https://wking.github.io/fsf-api/{scheme}/{id}.json + + For example [https://wking.github.io/fsf-api/spdx/MIT.json](https://wking.github.io/fsf-api/spdx/MIT.json). + This API currently [attempts](#caveats) to maintain the following mappings: + + * `spdx`, using [the SPDX identifiers][spdx-list]. ## Caveats @@ -41,3 +56,4 @@ Until these hacks are addressed, license IDs and the `identifiers` field should [osi-api-non-canon-2]: https://github.com/OpenSourceOrg/licenses/issues/47 [osi-api-noncanon-1]: https://github.com/OpenSourceOrg/licenses/tree/f7ff223f9694ca0d5114fc82e43c74b5c5087891#is-this-authoritative [osi-api]: https://api.opensource.org/ +[spdx-list]: https://spdx.org/licenses/ diff --git a/pull.py b/pull.py index 3e0c81c..7ad74fe 100755 --- a/pull.py +++ b/pull.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: MIT import glob +import itertools import json import os import sys @@ -158,23 +159,32 @@ def extract(root, base_uri=None): def save(licenses, dir=os.curdir): os.makedirs(dir, exist_ok=True) - for path in glob.glob(os.path.join(dir, '*.json')): + if sys.version_info >= (3, 5): + paths = glob.glob(os.path.join(dir, '**', '*.json'), recursive=True) + else: + paths = itertools.chain( + glob.glob(os.path.join(dir, '*.json')), + glob.glob(os.path.join(dir, '*', '*.json')), + ) + for path in paths: os.remove(path) - index = {} - for id, license in licenses.items(): - index[id] = {'name': license['name']} - if 'identifiers' in license: - index[id]['identifiers'] = license['identifiers'] + index = sorted(licenses.keys()) with open(os.path.join(dir, 'licenses.json'), 'w') as f: - json.dump(obj=index, fp=f, indent=2, sort_keys=True) + json.dump(obj=index, fp=f, indent=2) f.write('\n') for id, license in licenses.items(): license = license.copy() if 'tags' in license: license['tags'] = sorted(license['tags']) - with open(os.path.join(dir, '{}.json'.format(id)), 'w') as f: + license_path = os.path.join(dir, '{}.json'.format(id)) + with open(license_path, 'w') as f: json.dump(obj=license, fp=f, indent=2, sort_keys=True) f.write('\n') + for scheme, identifier in license.get('identifiers', {}).items(): + scheme_dir = os.path.join(dir, scheme) + os.makedirs(scheme_dir, exist_ok=True) + id_path = os.path.join(scheme_dir, '{}.json'.format(identifier)) + os.link(license_path, id_path) if __name__ == '__main__': -- 2.25.1