From be8b5a8aeb1a9fead1ce67bb71deb9fb9765937d Mon Sep 17 00:00:00 2001
From: Marc Mueller <30130371+cdce8p@users.noreply.github.com>
Date: Fri, 25 Oct 2024 12:41:05 +0200
Subject: [PATCH] Add option to extract licenses [ci] (#129095)

---
 .github/workflows/ci.yaml |  16 +++--
 requirements_test.txt     |   1 -
 script/licenses.py        | 142 ++++++++++++++++++++++++++++++--------
 3 files changed, 122 insertions(+), 37 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index e812016bf64..e5b5e1a042d 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -615,6 +615,10 @@ jobs:
         && github.event.inputs.mypy-only != 'true'
         || github.event.inputs.audit-licenses-only == 'true')
       && needs.info.outputs.requirements == 'true'
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ${{ fromJson(needs.info.outputs.python_versions) }}
     steps:
       - name: Check out code from GitHub
         uses: actions/checkout@v4.2.2
@@ -633,19 +637,19 @@ jobs:
           key: >-
             ${{ runner.os }}-${{ steps.python.outputs.python-version }}-${{
             needs.info.outputs.python_cache_key }}
-      - name: Run pip-licenses
+      - name: Extract license data
         run: |
           . venv/bin/activate
-          pip-licenses --format=json --output-file=licenses.json
+          python -m script.licenses extract --output-file=licenses-${{ matrix.python-version }}.json
       - name: Upload licenses
         uses: actions/upload-artifact@v4.4.3
         with:
-          name: licenses
-          path: licenses.json
-      - name: Process licenses
+          name: licenses-${{ github.run_number }}-${{ matrix.python-version }}
+          path: licenses-${{ matrix.python-version }}.json
+      - name: Check licenses
         run: |
           . venv/bin/activate
-          python -m script.licenses licenses.json
+          python -m script.licenses check licenses-${{ matrix.python-version }}.json
 
   pylint:
     name: Check pylint
diff --git a/requirements_test.txt b/requirements_test.txt
index 9d63c10c500..2950b178406 100644
--- a/requirements_test.txt
+++ b/requirements_test.txt
@@ -17,7 +17,6 @@ pydantic==1.10.18
 pylint==3.3.1
 pylint-per-file-ignores==1.3.2
 pipdeptree==2.23.4
-pip-licenses==5.0.0
 pytest-asyncio==0.24.0
 pytest-aiohttp==1.0.5
 pytest-cov==5.0.0
diff --git a/script/licenses.py b/script/licenses.py
index 52a4883bfe9..10fcebb7808 100644
--- a/script/licenses.py
+++ b/script/licenses.py
@@ -2,16 +2,28 @@
 
 from __future__ import annotations
 
-from argparse import ArgumentParser
+from argparse import ArgumentParser, Namespace
 from collections.abc import Sequence
 from dataclasses import dataclass
+from importlib import metadata
 import json
 from pathlib import Path
 import sys
+from typing import TypedDict, cast
 
 from awesomeversion import AwesomeVersion
 
 
+class PackageMetadata(TypedDict):
+    """Package metadata."""
+
+    name: str
+    version: str
+    license_expression: str | None
+    license_metadata: str | None
+    license_classifier: list[str]
+
+
 @dataclass
 class PackageDefinition:
     """Package definition."""
@@ -21,12 +33,16 @@ class PackageDefinition:
     version: AwesomeVersion
 
     @classmethod
-    def from_dict(cls, data: dict[str, str]) -> PackageDefinition:
-        """Create a package definition from a dictionary."""
+    def from_dict(cls, data: PackageMetadata) -> PackageDefinition:
+        """Create a package definition from PackageMetadata."""
+        if not (license_str := "; ".join(data["license_classifier"])):
+            license_str = (
+                data["license_metadata"] or data["license_expression"] or "UNKNOWN"
+            )
         return cls(
-            license=data["License"],
-            name=data["Name"],
-            version=AwesomeVersion(data["Version"]),
+            license=license_str,
+            name=data["name"],
+            version=AwesomeVersion(data["version"]),
         )
 
 
@@ -128,7 +144,6 @@ EXCEPTIONS = {
     "aioecowitt",  # https://github.com/home-assistant-libs/aioecowitt/pull/180
     "aioopenexchangerates",  # https://github.com/MartinHjelmare/aioopenexchangerates/pull/94
     "aiooui",  # https://github.com/Bluetooth-Devices/aiooui/pull/8
-    "aioruuvigateway",  # https://github.com/akx/aioruuvigateway/pull/6
     "apple_weatherkit",  # https://github.com/tjhorner/python-weatherkit/pull/3
     "asyncio",  # PSF License
     "chacha20poly1305",  # LGPL
@@ -159,14 +174,10 @@ EXCEPTIONS = {
     "pyvera",  # https://github.com/maximvelichko/pyvera/pull/164
     "pyxeoma",  # https://github.com/jeradM/pyxeoma/pull/11
     "repoze.lru",
-    "ruuvitag-ble",  # https://github.com/Bluetooth-Devices/ruuvitag-ble/pull/10
-    "sensirion-ble",  # https://github.com/akx/sensirion-ble/pull/9
     "sharp_aquos_rc",  # https://github.com/jmoore987/sharp_aquos_rc/pull/14
     "tapsaff",  # https://github.com/bazwilliams/python-taps-aff/pull/5
     "vincenty",  # Public domain
     "zeversolar",  # https://github.com/kvanzuijlen/zeversolar/pull/46
-    # Using License-Expression (with hatchling)
-    "ftfy",  # Apache-2.0
 }
 
 TODO = {
@@ -176,22 +187,9 @@ TODO = {
 }
 
 
-def main(argv: Sequence[str] | None = None) -> int:
-    """Run the main script."""
+def check_licenses(args: CheckArgs) -> int:
+    """Check licenses are OSI approved."""
     exit_code = 0
-
-    parser = ArgumentParser()
-    parser.add_argument(
-        "path",
-        nargs="?",
-        metavar="PATH",
-        default="licenses.json",
-        help="Path to json licenses file",
-    )
-
-    argv = argv or sys.argv[1:]
-    args = parser.parse_args(argv)
-
     raw_licenses = json.loads(Path(args.path).read_text())
     package_definitions = [PackageDefinition.from_dict(data) for data in raw_licenses]
     for package in package_definitions:
@@ -244,8 +242,92 @@ def main(argv: Sequence[str] | None = None) -> int:
     return exit_code
 
 
+def extract_licenses(args: ExtractArgs) -> int:
+    """Extract license data for installed packages."""
+    licenses = sorted(
+        [get_package_metadata(dist) for dist in list(metadata.distributions())],
+        key=lambda dist: dist["name"],
+    )
+    Path(args.output_file).write_text(json.dumps(licenses, indent=2))
+    return 0
+
+
+def get_package_metadata(dist: metadata.Distribution) -> PackageMetadata:
+    """Get package metadata for distribution."""
+    return {
+        "name": dist.name,
+        "version": dist.version,
+        "license_expression": dist.metadata.get("License-Expression"),
+        "license_metadata": dist.metadata.get("License"),
+        "license_classifier": extract_license_classifier(
+            dist.metadata.get_all("Classifier")
+        ),
+    }
+
+
+def extract_license_classifier(classifiers: list[str] | None) -> list[str]:
+    """Extract license from list of classifiers.
+
+    E.g. 'License :: OSI Approved :: MIT License' -> 'MIT License'.
+    Filter out bare 'License :: OSI Approved'.
+    """
+    return [
+        license_classifier
+        for classifier in classifiers or ()
+        if classifier.startswith("License")
+        and (license_classifier := classifier.rpartition(" :: ")[2])
+        and license_classifier != "OSI Approved"
+    ]
+
+
+class ExtractArgs(Namespace):
+    """Extract arguments."""
+
+    output_file: str
+
+
+class CheckArgs(Namespace):
+    """Check arguments."""
+
+    path: str
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+    """Run the main script."""
+    parser = ArgumentParser()
+    subparsers = parser.add_subparsers(title="Subcommands", required=True)
+
+    parser_extract = subparsers.add_parser("extract")
+    parser_extract.set_defaults(action="extract")
+    parser_extract.add_argument(
+        "--output-file",
+        default="licenses.json",
+        help="Path to store the licenses file",
+    )
+
+    parser_check = subparsers.add_parser("check")
+    parser_check.set_defaults(action="check")
+    parser_check.add_argument(
+        "path",
+        nargs="?",
+        metavar="PATH",
+        default="licenses.json",
+        help="Path to json licenses file",
+    )
+
+    argv = argv or sys.argv[1:]
+    args = parser.parse_args(argv)
+
+    if args.action == "extract":
+        args = cast(ExtractArgs, args)
+        return extract_licenses(args)
+    if args.action == "check":
+        args = cast(CheckArgs, args)
+        if (exit_code := check_licenses(args)) == 0:
+            print("All licenses are approved!")
+        return exit_code
+    return 0
+
+
 if __name__ == "__main__":
-    exit_code = main()
-    if exit_code == 0:
-        print("All licenses are approved!")
-    sys.exit(exit_code)
+    sys.exit(main())
-- 
GitLab