From 4122acc769c78dbd5d07e72254b4f8b13936d179 Mon Sep 17 00:00:00 2001 From: "C.A.M. Gerlach" Date: Mon, 6 Sep 2021 18:43:43 -0500 Subject: [PATCH] Add basic CLI and allow passing source and target URIs as args --- pull.py | 51 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/pull.py b/pull.py index 60e2280..f67d2e7 100755 --- a/pull.py +++ b/pull.py @@ -4,20 +4,21 @@ """Generate the FSF license API JSON data from the FSF license list page.""" +import argparse import glob import html import io import json import os import re -import sys import urllib.parse import urllib.request import lxml.etree -URI = 'https://www.gnu.org/licenses/license-list.html' +SOURCE_URI = 'https://www.gnu.org/licenses/license-list.html' +API_BASE_URI = 'https://wking.github.io/fsf-api/' TAGS = { 'blue': {'viewpoint'}, @@ -387,16 +388,15 @@ def save(licenses, base_uri, output_dir=os.curdir): f.write('\n') -def main(sys_argv=None): +def generate_api( + output_dir=os.curdir, + source_uri=SOURCE_URI, + api_base_uri=API_BASE_URI, +): """Load the license list page, parse it and generate the API output.""" - if sys_argv is None: - sys_argv = sys.argv - output_dir = os.curdir - if sys_argv and len(sys_argv) > 1: - output_dir = sys_argv[1] - tree = get(uri=URI) + tree = get(uri=source_uri) root = tree.getroot() - licenses = extract(root=root, base_uri=URI) + licenses = extract(root=root, base_uri=source_uri) unused_identifiers = {key for key in IDENTIFIERS if key not in licenses} if unused_identifiers: raise ValueError( @@ -406,10 +406,39 @@ def main(sys_argv=None): ) save( licenses=licenses, - base_uri='https://wking.github.io/fsf-api/', + base_uri=api_base_uri, output_dir=output_dir, ) +def generate_arg_parser(): + """Create the CLI argument parser object for the script.""" + parser_main = argparse.ArgumentParser( + description='Generate the FSF license API JSON data.', + argument_default=argparse.SUPPRESS, + ) + parser_main.add_argument( + 'output_dir', + nargs='?', + help='The directory to output the API data to, the CWD by default', + ) + parser_main.add_argument( + '--source-uri', + help='A custom source URI to load the FSF license list page from', + ) + parser_main.add_argument( + '--api-base-uri', + help='A custom base URL for the output API', + ) + return parser_main + + +def main(sys_argv=None): + """Run the API generation script with the specified CLI options.""" + arg_parser = generate_arg_parser() + cli_args = arg_parser.parse_args(sys_argv) + generate_api(**vars(cli_args)) + + if __name__ == '__main__': main() -- 2.25.1