From 53f60487ed3f4fca0097dc81234b62f7007155c3 Mon Sep 17 00:00:00 2001
From: "C.A.M. Gerlach" <CAM.Gerlach@Gerlach.CAM>
Date: Mon, 6 Sep 2021 18:15:02 -0500
Subject: [PATCH] Add basic docstrings to all functions and the module itself

---
 pull.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pull.py b/pull.py
index 97ea77e..60e2280 100755
--- a/pull.py
+++ b/pull.py
@@ -2,6 +2,8 @@
 #
 # SPDX-License-Identifier: MIT
 
+"""Generate the FSF license API JSON data from the FSF license list page."""
+
 import glob
 import html
 import io
@@ -229,6 +231,7 @@ IDENTIFIERS = {
 
 
 def convert_html_escapes_to_xml(html_text):
+    """Avoid XML parsing errors by converting HTML escape codes to XML."""
     html_entities = set(
         re.findall(r'&(?!quot|lt|gt|amp|apos)[a-zA-Z]{1,30};', html_text)
     )
@@ -238,6 +241,7 @@ def convert_html_escapes_to_xml(html_text):
 
 
 def get(uri):
+    """Get the license list page data from the FSF web site."""
     parser = lxml.etree.XMLParser(ns_clean=True, resolve_entities=False)
     with urllib.request.urlopen(uri) as response:
         response_data = response.read().decode()
@@ -247,6 +251,7 @@ def get(uri):
 
 
 def extract(root, base_uri=None):
+    """Parse the license list page and extract the needed license data."""
     oids = set()
     licenses = {}
     for dl in root.iter(tag='{http://www.w3.org/1999/xhtml}dl'):
@@ -296,6 +301,7 @@ def extract(root, base_uri=None):
 
 
 def save(licenses, base_uri, output_dir=os.curdir):
+    """Save the license data to a files in the appropriate JSON schema."""
     schema_dir = os.path.join(output_dir, 'schema')
     os.makedirs(schema_dir, exist_ok=True)
     paths = glob.glob(os.path.join(output_dir, '**', '*.json'), recursive=True)
@@ -382,6 +388,7 @@ def save(licenses, base_uri, output_dir=os.curdir):
 
 
 def main(sys_argv=None):
+    """Load the license list page, parse it and generate the API output."""
     if sys_argv is None:
         sys_argv = sys.argv
     output_dir = os.curdir
-- 
2.25.1