summaryrefslogtreecommitdiff
path: root/lib/python/qmk/cli/license_check.py
blob: 4bda272ec9bb7a9bdeb0a2e9e904d383e8cbcaec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Copyright 2023 Nick Brassel (@tzarc)
# SPDX-License-Identifier: GPL-2.0-or-later
import re
from pathlib import Path
from milc import cli
from qmk.constants import LICENSE_TEXTS

L_PAREN = re.compile(r'\(\[\{\<')
R_PAREN = re.compile(r'\)\]\}\>')
PUNCTUATION = re.compile(r'[\.,;:]+')
TRASH_PREFIX = re.compile(r'^(\s|/|\*|#)+')
TRASH_SUFFIX = re.compile(r'(\s|/|\*|#|\\)+$')
SPACE = re.compile(r'\s+')
SUFFIXES = ['.c', '.h', '.cpp', '.cxx', '.hpp', '.hxx']


def _simplify_text(input):
    lines = input.lower().split('\n')
    lines = [PUNCTUATION.sub('', line) for line in lines]
    lines = [TRASH_PREFIX.sub('', line) for line in lines]
    lines = [TRASH_SUFFIX.sub('', line) for line in lines]
    lines = [SPACE.sub(' ', line) for line in lines]
    lines = [L_PAREN.sub('(', line) for line in lines]
    lines = [R_PAREN.sub(')', line) for line in lines]
    lines = [line.strip() for line in lines]
    lines = [line for line in lines if line is not None and line != '']
    return ' '.join(lines)


def _detect_license_from_file_contents(filename, absolute=False):
    data = filename.read_text(encoding='utf-8', errors='ignore')
    filename_out = str(filename.absolute()) if absolute else str(filename)

    if 'SPDX-License-Identifier:' in data:
        res = data.split('SPDX-License-Identifier:')
        license = re.split(r'\s|//|\*', res[1].strip())[0].strip()
        found = False
        for short_license, _ in LICENSE_TEXTS:
            if license.lower() == short_license.lower():
                license = short_license
                found = True
                break

        if not found:
            if cli.args.short:
                print(f'{filename_out} UNKNOWN')
            else:
                cli.log.error(f'{{fg_cyan}}{filename_out}{{fg_reset}} -- unknown license, or no license detected!')
            return False

        if cli.args.short:
            print(f'{filename_out} {license}')
        else:
            cli.log.info(f'{{fg_cyan}}{filename_out}{{fg_reset}} -- license detected: {license} (SPDX License Identifier)')
        return True

    else:
        simple_text = _simplify_text(data)
        for short_license, long_licenses in LICENSE_TEXTS:
            for long_license in long_licenses:
                if long_license in simple_text:
                    if cli.args.short:
                        print(f'{filename_out} {short_license}')
                    else:
                        cli.log.info(f'{{fg_cyan}}{filename_out}{{fg_reset}} -- license detected: {short_license} (Full text)')
                    return True

        if cli.args.short:
            print(f'{filename_out} UNKNOWN')
        else:
            cli.log.error(f'{{fg_cyan}}{filename_out}{{fg_reset}} -- unknown license, or no license detected!')

    return False


@cli.argument('inputs', nargs='*', arg_only=True, type=Path, help='List of input files or directories.')
@cli.argument('-s', '--short', action='store_true', help='Short output.')
@cli.argument('-a', '--absolute', action='store_true', help='Print absolute paths.')
@cli.argument('-e', '--extension', arg_only=True, action='append', default=[], help='Override list of extensions. Can be specified multiple times for multiple extensions.')
@cli.subcommand('File license check.', hidden=False if cli.config.user.developer else True)
def license_check(cli):
    def _default_suffix_condition(s):
        return s in SUFFIXES

    conditional = _default_suffix_condition

    if len(cli.args.extension) > 0:
        suffixes = [f'.{s}' if not s.startswith('.') else s for s in cli.args.extension]

        def _specific_suffix_condition(s):
            return s in suffixes

        conditional = _specific_suffix_condition

    # Pre-format all the licenses
    for _, long_licenses in LICENSE_TEXTS:
        for i in range(len(long_licenses)):
            long_licenses[i] = _simplify_text(long_licenses[i])

    check_list = set()
    for filename in sorted(cli.args.inputs):
        if filename.is_dir():
            for file in sorted(filename.rglob('*')):
                if file.is_file() and conditional(file.suffix):
                    check_list.add(file)
        elif filename.is_file():
            if conditional(filename.suffix):
                check_list.add(filename)

    failed = False
    for filename in sorted(check_list):
        if not _detect_license_from_file_contents(filename, absolute=cli.args.absolute):
            failed = True

    if failed:
        return False