monolisa-nerdfont-patch/bin/scripts/name_parser/FontnameTools.py

#!/usr/bin/env python
# coding=utf8

import re
import sys


class FontnameTools:
    """Deconstruct a font filename to get standardized name parts"""

    @staticmethod
    def front_upper(word):
        """Capitalize a string (but keep case of subsequent chars)"""
        return word[:1].upper() + word[1:]

    @staticmethod
    def camel_casify(word):
        """Remove blanks and use CamelCase for the new word"""
        return "".join(map(FontnameTools.front_upper, word.split(" ")))

    @staticmethod
    def camel_explode(word):
        """Explode CamelCase -> Camel Case"""
        # But do not explode "JetBrains" etc at string start...
        excludes = [
            "JetBrains",
            "DejaVu",
            "OpenDyslexicAlta",
            "OpenDyslexicMono",
            "OpenDyslexic",
            "DaddyTimeMono",
            "InconsolataGo",
            "ProFontWindows",
            "ProFont",
            "ProggyClean",
        ]
        m = re.match("(" + "|".join(excludes) + ")(.*)", word)
        (prefix, word) = m.group(1, 2) if m != None else ("", word)
        if len(word) == 0:
            return prefix
        parts = re.split("(?<=[a-z0-9])(?=[A-Z])", word)
        if len(prefix):
            parts.insert(0, prefix)
        return " ".join(parts)

    @staticmethod
    def drop_empty(l):
        """Remove empty strings from list of strings"""
        return [x for x in l if len(x) > 0]

    @staticmethod
    def concat(*all_things):
        """Flatten list of (strings or lists of strings) to a blank-separated string"""
        all = []
        for thing in all_things:
            if type(thing) is not list:
                all.append(thing)
            else:
                all += thing
        return " ".join(FontnameTools.drop_empty(all))

    @staticmethod
    def unify_style_names(style_name):
        """Substitude some known token with standard wording"""
        known_names = {
            # Source of the table is the current sourcefonts
            # Left side needs to be lower case
            "-": "",
            "book": "",
            "text": "",
            "ce": "CE",
            #'semibold':     'Demi',
            "ob": "Oblique",
            "it": "Italic",
            "i": "Italic",
            "b": "Bold",
            "normal": "Regular",
            "c": "Condensed",
            "r": "Regular",
            "m": "Medium",
            "l": "Light",
        }
        if style_name in known_names:
            return known_names[style_name.lower()]
        return style_name

    @staticmethod
    def find_in_dicts(key, dicts):
        """Find an entry in a list of dicts, return entry and in which list it was"""
        for i, d in enumerate(dicts):
            if key in d:
                return (d[key], i)
        return (None, 0)

    @staticmethod
    def get_shorten_form_idx(aggressive, prefix, form_if_prefixed):
        """Get the tuple index of known_* data tables"""
        if aggressive:
            return 0
        if len(prefix):
            return form_if_prefixed
        return 1

    @staticmethod
    def shorten_style_name(name, aggressive):
        """Substitude some known styles to short form"""
        # If aggressive is False create the mild short form
        # aggressive == True: Always use first form of everything
        # aggressive == False:
        #               - has no modifier: use the second form
        #               - has modifier: use second form of mod plus first form of weights2
        #               - has modifier: use second form of mod plus second form of widths
        name_rest = name
        name_pre = ""
        form = FontnameTools.get_shorten_form_idx(aggressive, "", 0)
        for mod in FontnameTools.known_modifiers:
            if name.startswith(mod) and len(name) > len(
                mod
            ):  # Second condition specifically for 'Demi'
                name_pre = FontnameTools.known_modifiers[mod][form]
                name_rest = name[len(mod) :]
                break
        subst, i = FontnameTools.find_in_dicts(
            name_rest, [FontnameTools.known_weights2, FontnameTools.known_widths]
        )
        form = FontnameTools.get_shorten_form_idx(aggressive, name_pre, i)
        if isinstance(subst, tuple):
            return name_pre + subst[form]
        if not len(name_pre):
            # The following sets do not allow modifiers
            subst, _ = FontnameTools.find_in_dicts(
                name_rest, [FontnameTools.known_weights1, FontnameTools.known_slopes]
            )
            if isinstance(subst, tuple):
                return subst[form]
        return name

    @staticmethod
    def short_styles(lists, aggressive):
        """Shorten all style names in a list or a list of lists"""
        if not len(lists) or not isinstance(lists[0], list):
            return list(
                map(lambda x: FontnameTools.shorten_style_name(x, aggressive), lists)
            )
        return [
            list(map(lambda x: FontnameTools.shorten_style_name(x, aggressive), styles))
            for styles in lists
        ]

    @staticmethod
    def make_oblique_style(weights, styles):
        """Move "Oblique" from weights to styles for font naming purposes"""
        if "Oblique" in weights:
            weights = list(weights)
            weights.remove("Oblique")
            styles = list(styles)
            styles.append("Oblique")
        return (weights, styles)

    @staticmethod
    def get_name_token(name, tokens, allow_regex_token=False):
        """Try to find any case insensitive token from tokens in the name, return tuple with found token-list and rest"""
        # The default mode (allow_regex_token = False) will try to find any verbatim string in the
        # tokens list (case insensitive matching) and give that tokens list item back with
        # unchanged case (i.e. [ 'Bold' ] will match "bold" and return it as [ 'Bold', ]
        # In the regex mode (allow_regex_token = True) it will use the tokens elements as
        # regexes and return the original (i.e. from name) case.
        #
        # Token are always used in a regex and may not capture, use non capturing
        # grouping if needed (?: ... )
        lower_tokens = [t.lower() for t in tokens]
        not_matched = ""
        all_tokens = []
        j = 1
        regex = re.compile("(.*?)(" + "|".join(tokens) + ")(.*)", re.IGNORECASE)
        while j:
            j = regex.match(name)
            if not j:
                break
            if len(j.groups()) != 3:
                sys.exit("Malformed regex in FontnameTools.get_name_token()")
            not_matched += (
                " " + j.groups()[0]
            )  # Blanc prevents unwanted concatenation of unmatched substrings
            tok = j.groups()[1].lower()
            if tok in lower_tokens:
                tok = tokens[lower_tokens.index(tok)]
            tok = FontnameTools.unify_style_names(tok)
            if len(tok):
                all_tokens.append(tok)
            name = j.groups()[2]  # Recurse rest
        not_matched += " " + name
        return (not_matched.strip(), all_tokens)

    @staticmethod
    def postscript_char_filter(name):
        """Filter out characters that are not allowed in Postscript names"""
        # The name string must be restricted to the printable ASCII subset, codes 33 to 126,
        # except for the 10 characters '[', ']', '(', ')', '{', '}', '<', '>', '/', '%'
        out = ""
        for c in name:
            if c in "[](){}<>/%" or ord(c) < 33 or ord(c) > 126:
                continue
            out += c
        return out

    SIL_TABLE = [
        ("(a)nonymous", r"\1nonymice"),
        ("(b)itstream( ?)(v)era( ?sans ?mono)?", r"\1itstrom\2Wera"),
        ("(s)ource", r"\1auce"),
        ("(h)ermit", r"\1urmit"),
        ("(h)asklig", r"\1asklug"),
        ("(s)hare", r"\1hure"),
        ("IBM[- ]?plex", r"Blex"),  # We do not keep the case here
        ("(t)erminus", r"\1erminess"),
        ("(l)iberation", r"\1iteration"),
        ("iA([- ]?)writer", r"iM\1Writing"),
        ("(a)nka/(c)oder", r"\1na\2onder"),
        ("(c)ascadia( ?)(c)ode", r"\1askaydia\2\3ove"),
        ("(c)ascadia( ?)(m)ono", r"\1askaydia\2\3ono"),
        ("(m)( ?)plus", r"\1+"),  # Added this, because they use a plus symbol :->
        ("Gohufont", r"GohuFont"),  # Correct to CamelCase
        # Noone cares that font names starting with a digit are forbidden:
        ("IBM 3270", r"3270"),  # for historical reasons and 'IBM' is a TM or something
        # Some name parts that are too long for us
        ("(.*sans ?m)ono", r"\1"),  # Various SomenameSansMono fonts
        ("(.*code ?lat)in Expanded", r"\1X"),  # for 'M PLUS Code Latin Expanded'
        ("(.*code ?lat)in", r"\1"),  # for 'M PLUS Code Latin'
        ("(b)ig( ?)(b)lue( ?)(t)erminal", r"\1ig\3lue\5erm"),  # Shorten BigBlueTerminal
        ("(.*)437TT", r"\g<1>437"),  # Shorten BigBlueTerminal 437 TT even further
        ("(.*dyslexic ?alt)a", r"\1"),  # Open Dyslexic Alta -> Open Dyslexic Alt
        ("(.*dyslexic ?m)ono", r"\1"),  # Open Dyslexic Mono -> Open Dyslexic M
        ("(overpass ?m)ono", r"\1"),  # Overpass Mono -> Overpass M
        ("(proggyclean) ?tt", r"\1"),  # Remove TT from ProggyClean
        (
            "(terminess) ?\(ttf\)",
            r"\1",
        ),  # Remove TTF from Terminus (after renamed to Terminess)
        ("(im ?writing ?q)uattro", r"\1uat"),  # Rename iM Writing Quattro to Quat
        (
            "(im ?writing ?(mono|duo|quat)) ?s",
            r"\1",
        ),  # Remove S from all iM Writing styles
    ]

    # From https://adobe-type-tools.github.io/font-tech-notes/pdfs/5088.FontNames.pdf
    # The first short variant is from the linked table.
    # The second (longer) short variant is from diverse fonts like Noto.
    # We can
    # - use the long form
    # - use the very short form (first)
    # - use mild short form:
    #   - has no modifier: use the second form
    #   - has modifier: use second form of mod plus first form of weights2
    #   - has modifier: use second form of mod plus second form of widths
    # This is encoded in get_shorten_form_idx()
    known_weights1 = {  # can not take modifiers
        "Medium": ("Md", "Med"),
        "Nord": ("Nd", "Nord"),
        "Book": ("Bk", "Book"),
        "Poster": ("Po", "Poster"),
        "Demi": (
            "Dm",
            "Demi",
        ),  # Demi is sometimes used as a weight, sometimes as a modifier
        "Regular": ("Rg", "Reg"),
        "Display": ("DS", "Disp"),
        "Super": ("Su", "Sup"),
        "Retina": ("Rt", "Ret"),
    }
    known_weights2 = {  # can take modifiers
        "Black": ("Blk", "Black"),
        "Bold": ("Bd", "Bold"),
        "Heavy": ("Hv", "Heavy"),
        "Thin": ("Th", "Thin"),
        "Light": ("Lt", "Light"),
        " ": (),  # Just for CodeClimate :-/
    }
    known_widths = {  # can take modifiers
        "Compressed": ("Cm", "Comp"),
        "Extended": ("Ex", "Extd"),
        "Condensed": ("Cn", "Cond"),
        "Narrow": ("Nr", "Narrow"),
        "Compact": ("Ct", "Compact"),
    }
    known_slopes = {  # can not take modifiers
        "Inclined": ("Ic", "Incl"),
        "Oblique": ("Obl", "Obl"),
        "Italic": ("It", "Italic"),
        "Upright": ("Up", "Uprght"),
        "Kursiv": ("Ks", "Kurs"),
        "Sloped": ("Sl", "Slop"),
    }
    known_modifiers = {
        "Demi": ("Dm", "Dem"),
        "Ultra": ("Ult", "Ult"),
        "Semi": ("Sm", "Sem"),
        "Extra": ("X", "Ext"),
    }

    @staticmethod
    def is_keep_regular(basename):
        """This has been decided by the font designers, we need to mimic that (for comparison purposes)"""
        KEEP_REGULAR = [
            "Agave",
            "Arimo",
            "Aurulent",
            "Cascadia",
            "Cousine",
            "Fantasque",
            "Fira",
            "Overpass",
            "Lilex",
            "Inconsolata$",  # not InconsolataGo
            "IAWriter",
            "Meslo",
            "Monoid",
            "Mononoki",
            "Hack",
            "JetBrains Mono",
            "Noto Sans",
            "Noto Serif",
            "Victor",
        ]
        for kr in KEEP_REGULAR:
            if (basename.rstrip() + "$").startswith(kr):
                return True
        return False

    @staticmethod
    def _parse_simple_font_name(name):
        """Parse a filename that does not follow the 'FontFamilyName-FontStyle' pattern"""
        # No dash in name, maybe we have blanc separated filename?
        if " " in name:
            return FontnameTools.parse_font_name(name.replace(" ", "-"))
        # Do we have a number-name boundary?
        p = re.split("(?<=[0-9])(?=[a-zA-Z])", name)
        if len(p) > 1:
            return FontnameTools.parse_font_name("-".join(p))
        # Or do we have CamelCase?
        n = FontnameTools.camel_explode(name)
        if n != name:
            return FontnameTools.parse_font_name(n.replace(" ", "-"))
        return (False, FontnameTools.camel_casify(name), [], [], [], "")

    @staticmethod
    def parse_font_name(name):
        """Expects a filename following the 'FontFamilyName-FontStyle' pattern and returns ... parts"""
        name = re.sub(
            r"\bsemi-condensed\b", "SemiCondensed", name, 1, re.IGNORECASE
        )  # Just for "3270 Semi-Condensed" :-/
        name = re.sub("[_\s]+", " ", name)
        matches = re.match(r"([^-]+)(?:-(.*))?", name)
        familyname = FontnameTools.camel_casify(matches.group(1))
        style = matches.group(2)

        if not style:
            return FontnameTools._parse_simple_font_name(name)

        # These are the FontStyle keywords we know, in three categories
        # Weights end up as Typographic Family parts ('after the dash')
        # Styles end up as Family parts (for classic grouping of four)
        # Others also end up in Typographic Family ('before the dash')
        weights = (
            [
                m + s
                for s in list(FontnameTools.known_weights2)
                + list(FontnameTools.known_widths)
                for m in list(FontnameTools.known_modifiers) + [""]
                if m != s
            ]
            + list(FontnameTools.known_weights1)
            + list(FontnameTools.known_slopes)
        )
        styles = [
            "Bold",
            "Italic",
            "Regular",
            "Normal",
        ]
        weights = [w for w in weights if w not in styles]
        # Some font specialities:
        other = [
            "-",
            "Book",
            "For",
            "Powerline",
            "Text",  # Plex
            "IIx",  # Profont IIx
            "LGC",  # Inconsolata LGC
            r"\bCE\b",  # ProggycleanTT CE
            r"[12][cmp]n?",  # MPlus
            r"(?:uni-)?1[14]",  # GohuFont uni
        ]

        # Sometimes used abbreviations
        weight_abbrevs = [
            "ob",
            "c",
            "m",
            "l",
        ]
        style_abbrevs = [
            "it",
            "r",
            "b",
            "i",
        ]

        (style, weight_token) = FontnameTools.get_name_token(style, weights)
        (style, style_token) = FontnameTools.get_name_token(style, styles)
        (style, other_token) = FontnameTools.get_name_token(style, other, True)
        if (
            len(style) < 4 and style.lower() != "pro"
        ):  # Prevent 'r' of Pro to be detected as style_abbrev
            (style, weight_token_abbrevs) = FontnameTools.get_name_token(
                style, weight_abbrevs
            )
            (style, style_token_abbrevs) = FontnameTools.get_name_token(
                style, style_abbrevs
            )
            weight_token += weight_token_abbrevs
            style_token += style_token_abbrevs
        while "Regular" in style_token and len(style_token) > 1:
            # Correct situation where "Regular" and something else is given
            style_token.remove("Regular")

        # Recurse to see if unmatched stuff between dashes can belong to familyname
        matches2 = re.match(r"(\w+)-(.*)", style)
        if matches2:
            return FontnameTools.parse_font_name(
                familyname + matches2.group(1) + "-" + matches2.group(2)
            )

        style = re.sub(
            r"(^|\s)\d+(\.\d+)+(\s|$)", r"\1\3", style
        )  # Remove (free standing) version numbers
        style_parts = FontnameTools.drop_empty(style.split(" "))
        style = " ".join(map(FontnameTools.front_upper, style_parts))
        familyname = FontnameTools.camel_explode(familyname)
        return (True, familyname, weight_token, style_token, other_token, style)
feat: upgrade to nf v3.0.0 2023-05-02 14:57:12 -05:00			`#!/usr/bin/env python`
			`# coding=utf8`

			`import re`
			`import sys`


			`class FontnameTools:`
			`"""Deconstruct a font filename to get standardized name parts"""`

			`@staticmethod`
			`def front_upper(word):`
			`"""Capitalize a string (but keep case of subsequent chars)"""`
			`return word[:1].upper() + word[1:]`

			`@staticmethod`
			`def camel_casify(word):`
			`"""Remove blanks and use CamelCase for the new word"""`
			`return "".join(map(FontnameTools.front_upper, word.split(" ")))`

			`@staticmethod`
			`def camel_explode(word):`
			`"""Explode CamelCase -> Camel Case"""`
			`# But do not explode "JetBrains" etc at string start...`
			`excludes = [`
			`"JetBrains",`
			`"DejaVu",`
			`"OpenDyslexicAlta",`
			`"OpenDyslexicMono",`
			`"OpenDyslexic",`
			`"DaddyTimeMono",`
			`"InconsolataGo",`
			`"ProFontWindows",`
			`"ProFont",`
			`"ProggyClean",`
			`]`
			`m = re.match("(" + "\|".join(excludes) + ")(.*)", word)`
			`(prefix, word) = m.group(1, 2) if m != None else ("", word)`
			`if len(word) == 0:`
			`return prefix`
			`parts = re.split("(?<=[a-z0-9])(?=[A-Z])", word)`
			`if len(prefix):`
			`parts.insert(0, prefix)`
			`return " ".join(parts)`

			`@staticmethod`
			`def drop_empty(l):`
			`"""Remove empty strings from list of strings"""`
			`return [x for x in l if len(x) > 0]`

			`@staticmethod`
			`def concat(*all_things):`
			`"""Flatten list of (strings or lists of strings) to a blank-separated string"""`
			`all = []`
			`for thing in all_things:`
			`if type(thing) is not list:`
			`all.append(thing)`
			`else:`
			`all += thing`
			`return " ".join(FontnameTools.drop_empty(all))`

			`@staticmethod`
			`def unify_style_names(style_name):`
			`"""Substitude some known token with standard wording"""`
			`known_names = {`
			`# Source of the table is the current sourcefonts`
			`# Left side needs to be lower case`
			`"-": "",`
			`"book": "",`
			`"text": "",`
			`"ce": "CE",`
			`#'semibold': 'Demi',`
			`"ob": "Oblique",`
			`"it": "Italic",`
			`"i": "Italic",`
			`"b": "Bold",`
			`"normal": "Regular",`
			`"c": "Condensed",`
			`"r": "Regular",`
			`"m": "Medium",`
			`"l": "Light",`
			`}`
			`if style_name in known_names:`
			`return known_names[style_name.lower()]`
			`return style_name`

			`@staticmethod`
			`def find_in_dicts(key, dicts):`
			`"""Find an entry in a list of dicts, return entry and in which list it was"""`
			`for i, d in enumerate(dicts):`
			`if key in d:`
			`return (d[key], i)`
			`return (None, 0)`

			`@staticmethod`
			`def get_shorten_form_idx(aggressive, prefix, form_if_prefixed):`
			`"""Get the tuple index of known_* data tables"""`
			`if aggressive:`
			`return 0`
			`if len(prefix):`
			`return form_if_prefixed`
			`return 1`

			`@staticmethod`
			`def shorten_style_name(name, aggressive):`
			`"""Substitude some known styles to short form"""`
			`# If aggressive is False create the mild short form`
			`# aggressive == True: Always use first form of everything`
			`# aggressive == False:`
			`# - has no modifier: use the second form`
			`# - has modifier: use second form of mod plus first form of weights2`
			`# - has modifier: use second form of mod plus second form of widths`
			`name_rest = name`
			`name_pre = ""`
			`form = FontnameTools.get_shorten_form_idx(aggressive, "", 0)`
			`for mod in FontnameTools.known_modifiers:`
			`if name.startswith(mod) and len(name) > len(`
			`mod`
			`): # Second condition specifically for 'Demi'`
			`name_pre = FontnameTools.known_modifiers[mod][form]`
			`name_rest = name[len(mod) :]`
			`break`
			`subst, i = FontnameTools.find_in_dicts(`
			`name_rest, [FontnameTools.known_weights2, FontnameTools.known_widths]`
			`)`
			`form = FontnameTools.get_shorten_form_idx(aggressive, name_pre, i)`
			`if isinstance(subst, tuple):`
			`return name_pre + subst[form]`
			`if not len(name_pre):`
			`# The following sets do not allow modifiers`
			`subst, _ = FontnameTools.find_in_dicts(`
			`name_rest, [FontnameTools.known_weights1, FontnameTools.known_slopes]`
			`)`
			`if isinstance(subst, tuple):`
			`return subst[form]`
			`return name`

			`@staticmethod`
			`def short_styles(lists, aggressive):`
			`"""Shorten all style names in a list or a list of lists"""`
			`if not len(lists) or not isinstance(lists[0], list):`
			`return list(`
			`map(lambda x: FontnameTools.shorten_style_name(x, aggressive), lists)`
			`)`
			`return [`
			`list(map(lambda x: FontnameTools.shorten_style_name(x, aggressive), styles))`
			`for styles in lists`
			`]`

			`@staticmethod`
			`def make_oblique_style(weights, styles):`
			`"""Move "Oblique" from weights to styles for font naming purposes"""`
			`if "Oblique" in weights:`
			`weights = list(weights)`
			`weights.remove("Oblique")`
			`styles = list(styles)`
			`styles.append("Oblique")`
			`return (weights, styles)`

			`@staticmethod`
			`def get_name_token(name, tokens, allow_regex_token=False):`
			`"""Try to find any case insensitive token from tokens in the name, return tuple with found token-list and rest"""`
			`# The default mode (allow_regex_token = False) will try to find any verbatim string in the`
			`# tokens list (case insensitive matching) and give that tokens list item back with`
			`# unchanged case (i.e. [ 'Bold' ] will match "bold" and return it as [ 'Bold', ]`
			`# In the regex mode (allow_regex_token = True) it will use the tokens elements as`
			`# regexes and return the original (i.e. from name) case.`
			`#`
			`# Token are always used in a regex and may not capture, use non capturing`
			`# grouping if needed (?: ... )`
			`lower_tokens = [t.lower() for t in tokens]`
			`not_matched = ""`
			`all_tokens = []`
			`j = 1`
			`regex = re.compile("(.?)(" + "\|".join(tokens) + ")(.)", re.IGNORECASE)`
			`while j:`
			`j = regex.match(name)`
			`if not j:`
			`break`
			`if len(j.groups()) != 3:`
			`sys.exit("Malformed regex in FontnameTools.get_name_token()")`
			`not_matched += (`
			`" " + j.groups()[0]`
			`) # Blanc prevents unwanted concatenation of unmatched substrings`
			`tok = j.groups()[1].lower()`
			`if tok in lower_tokens:`
			`tok = tokens[lower_tokens.index(tok)]`
			`tok = FontnameTools.unify_style_names(tok)`
			`if len(tok):`
			`all_tokens.append(tok)`
			`name = j.groups()[2] # Recurse rest`
			`not_matched += " " + name`
			`return (not_matched.strip(), all_tokens)`

			`@staticmethod`
			`def postscript_char_filter(name):`
			`"""Filter out characters that are not allowed in Postscript names"""`
			`# The name string must be restricted to the printable ASCII subset, codes 33 to 126,`
			`# except for the 10 characters '[', ']', '(', ')', '{', '}', '<', '>', '/', '%'`
			`out = ""`
			`for c in name:`
			`if c in "[](){}<>/%" or ord(c) < 33 or ord(c) > 126:`
			`continue`
			`out += c`
			`return out`

			`SIL_TABLE = [`
			`("(a)nonymous", r"\1nonymice"),`
			`("(b)itstream( ?)(v)era( ?sans ?mono)?", r"\1itstrom\2Wera"),`
			`("(s)ource", r"\1auce"),`
			`("(h)ermit", r"\1urmit"),`
			`("(h)asklig", r"\1asklug"),`
			`("(s)hare", r"\1hure"),`
			`("IBM[- ]?plex", r"Blex"), # We do not keep the case here`
			`("(t)erminus", r"\1erminess"),`
			`("(l)iberation", r"\1iteration"),`
			`("iA([- ]?)writer", r"iM\1Writing"),`
			`("(a)nka/(c)oder", r"\1na\2onder"),`
			`("(c)ascadia( ?)(c)ode", r"\1askaydia\2\3ove"),`
			`("(c)ascadia( ?)(m)ono", r"\1askaydia\2\3ono"),`
			`("(m)( ?)plus", r"\1+"), # Added this, because they use a plus symbol :->`
			`("Gohufont", r"GohuFont"), # Correct to CamelCase`
			`# Noone cares that font names starting with a digit are forbidden:`
			`("IBM 3270", r"3270"), # for historical reasons and 'IBM' is a TM or something`
			`# Some name parts that are too long for us`
			`("(.*sans ?m)ono", r"\1"), # Various SomenameSansMono fonts`
			`("(.*code ?lat)in Expanded", r"\1X"), # for 'M PLUS Code Latin Expanded'`
			`("(.*code ?lat)in", r"\1"), # for 'M PLUS Code Latin'`
			`("(b)ig( ?)(b)lue( ?)(t)erminal", r"\1ig\3lue\5erm"), # Shorten BigBlueTerminal`
			`("(.*)437TT", r"\g<1>437"), # Shorten BigBlueTerminal 437 TT even further`
			`("(.*dyslexic ?alt)a", r"\1"), # Open Dyslexic Alta -> Open Dyslexic Alt`
			`("(.*dyslexic ?m)ono", r"\1"), # Open Dyslexic Mono -> Open Dyslexic M`
			`("(overpass ?m)ono", r"\1"), # Overpass Mono -> Overpass M`
			`("(proggyclean) ?tt", r"\1"), # Remove TT from ProggyClean`
			`(`
			`"(terminess) ?\(ttf\)",`
			`r"\1",`
			`), # Remove TTF from Terminus (after renamed to Terminess)`
			`("(im ?writing ?q)uattro", r"\1uat"), # Rename iM Writing Quattro to Quat`
			`(`
			`"(im ?writing ?(mono\|duo\|quat)) ?s",`
			`r"\1",`
			`), # Remove S from all iM Writing styles`
			`]`

			`# From https://adobe-type-tools.github.io/font-tech-notes/pdfs/5088.FontNames.pdf`
			`# The first short variant is from the linked table.`
			`# The second (longer) short variant is from diverse fonts like Noto.`
			`# We can`
			`# - use the long form`
			`# - use the very short form (first)`
			`# - use mild short form:`
			`# - has no modifier: use the second form`
			`# - has modifier: use second form of mod plus first form of weights2`
			`# - has modifier: use second form of mod plus second form of widths`
			`# This is encoded in get_shorten_form_idx()`
			`known_weights1 = { # can not take modifiers`
			`"Medium": ("Md", "Med"),`
			`"Nord": ("Nd", "Nord"),`
			`"Book": ("Bk", "Book"),`
			`"Poster": ("Po", "Poster"),`
			`"Demi": (`
			`"Dm",`
			`"Demi",`
			`), # Demi is sometimes used as a weight, sometimes as a modifier`
			`"Regular": ("Rg", "Reg"),`
			`"Display": ("DS", "Disp"),`
			`"Super": ("Su", "Sup"),`
			`"Retina": ("Rt", "Ret"),`
			`}`
			`known_weights2 = { # can take modifiers`
			`"Black": ("Blk", "Black"),`
			`"Bold": ("Bd", "Bold"),`
			`"Heavy": ("Hv", "Heavy"),`
			`"Thin": ("Th", "Thin"),`
			`"Light": ("Lt", "Light"),`
			`" ": (), # Just for CodeClimate :-/`
			`}`
			`known_widths = { # can take modifiers`
			`"Compressed": ("Cm", "Comp"),`
			`"Extended": ("Ex", "Extd"),`
			`"Condensed": ("Cn", "Cond"),`
			`"Narrow": ("Nr", "Narrow"),`
			`"Compact": ("Ct", "Compact"),`
			`}`
			`known_slopes = { # can not take modifiers`
			`"Inclined": ("Ic", "Incl"),`
			`"Oblique": ("Obl", "Obl"),`
			`"Italic": ("It", "Italic"),`
			`"Upright": ("Up", "Uprght"),`
			`"Kursiv": ("Ks", "Kurs"),`
			`"Sloped": ("Sl", "Slop"),`
			`}`
			`known_modifiers = {`
			`"Demi": ("Dm", "Dem"),`
			`"Ultra": ("Ult", "Ult"),`
			`"Semi": ("Sm", "Sem"),`
			`"Extra": ("X", "Ext"),`
			`}`

			`@staticmethod`
			`def is_keep_regular(basename):`
			`"""This has been decided by the font designers, we need to mimic that (for comparison purposes)"""`
			`KEEP_REGULAR = [`
			`"Agave",`
			`"Arimo",`
			`"Aurulent",`
			`"Cascadia",`
			`"Cousine",`
			`"Fantasque",`
			`"Fira",`
			`"Overpass",`
			`"Lilex",`
			`"Inconsolata$", # not InconsolataGo`
			`"IAWriter",`
			`"Meslo",`
			`"Monoid",`
			`"Mononoki",`
			`"Hack",`
			`"JetBrains Mono",`
			`"Noto Sans",`
			`"Noto Serif",`
			`"Victor",`
			`]`
			`for kr in KEEP_REGULAR:`
			`if (basename.rstrip() + "$").startswith(kr):`
			`return True`
			`return False`

			`@staticmethod`
			`def _parse_simple_font_name(name):`
			`"""Parse a filename that does not follow the 'FontFamilyName-FontStyle' pattern"""`
			`# No dash in name, maybe we have blanc separated filename?`
			`if " " in name:`
			`return FontnameTools.parse_font_name(name.replace(" ", "-"))`
			`# Do we have a number-name boundary?`
			`p = re.split("(?<=[0-9])(?=[a-zA-Z])", name)`
			`if len(p) > 1:`
			`return FontnameTools.parse_font_name("-".join(p))`
			`# Or do we have CamelCase?`
			`n = FontnameTools.camel_explode(name)`
			`if n != name:`
			`return FontnameTools.parse_font_name(n.replace(" ", "-"))`
			`return (False, FontnameTools.camel_casify(name), [], [], [], "")`

			`@staticmethod`
			`def parse_font_name(name):`
			`"""Expects a filename following the 'FontFamilyName-FontStyle' pattern and returns ... parts"""`
			`name = re.sub(`
			`r"\bsemi-condensed\b", "SemiCondensed", name, 1, re.IGNORECASE`
			`) # Just for "3270 Semi-Condensed" :-/`
			`name = re.sub("[_\s]+", " ", name)`
			`matches = re.match(r"([^-]+)(?:-(.*))?", name)`
			`familyname = FontnameTools.camel_casify(matches.group(1))`
			`style = matches.group(2)`

			`if not style:`
			`return FontnameTools._parse_simple_font_name(name)`

			`# These are the FontStyle keywords we know, in three categories`
			`# Weights end up as Typographic Family parts ('after the dash')`
			`# Styles end up as Family parts (for classic grouping of four)`
			`# Others also end up in Typographic Family ('before the dash')`
			`weights = (`
			`[`
			`m + s`
			`for s in list(FontnameTools.known_weights2)`
			`+ list(FontnameTools.known_widths)`
			`for m in list(FontnameTools.known_modifiers) + [""]`
			`if m != s`
			`]`
			`+ list(FontnameTools.known_weights1)`
			`+ list(FontnameTools.known_slopes)`
			`)`
			`styles = [`
			`"Bold",`
			`"Italic",`
			`"Regular",`
			`"Normal",`
			`]`
			`weights = [w for w in weights if w not in styles]`
			`# Some font specialities:`
			`other = [`
			`"-",`
			`"Book",`
			`"For",`
			`"Powerline",`
			`"Text", # Plex`
			`"IIx", # Profont IIx`
			`"LGC", # Inconsolata LGC`
			`r"\bCE\b", # ProggycleanTT CE`
			`r"[12][cmp]n?", # MPlus`
			`r"(?:uni-)?1[14]", # GohuFont uni`
			`]`

			`# Sometimes used abbreviations`
			`weight_abbrevs = [`
			`"ob",`
			`"c",`
			`"m",`
			`"l",`
			`]`
			`style_abbrevs = [`
			`"it",`
			`"r",`
			`"b",`
			`"i",`
			`]`

			`(style, weight_token) = FontnameTools.get_name_token(style, weights)`
			`(style, style_token) = FontnameTools.get_name_token(style, styles)`
			`(style, other_token) = FontnameTools.get_name_token(style, other, True)`
			`if (`
			`len(style) < 4 and style.lower() != "pro"`
			`): # Prevent 'r' of Pro to be detected as style_abbrev`
			`(style, weight_token_abbrevs) = FontnameTools.get_name_token(`
			`style, weight_abbrevs`
			`)`
			`(style, style_token_abbrevs) = FontnameTools.get_name_token(`
			`style, style_abbrevs`
			`)`
			`weight_token += weight_token_abbrevs`
			`style_token += style_token_abbrevs`
			`while "Regular" in style_token and len(style_token) > 1:`
			`# Correct situation where "Regular" and something else is given`
			`style_token.remove("Regular")`

			`# Recurse to see if unmatched stuff between dashes can belong to familyname`
			`matches2 = re.match(r"(\w+)-(.*)", style)`
			`if matches2:`
			`return FontnameTools.parse_font_name(`
			`familyname + matches2.group(1) + "-" + matches2.group(2)`
			`)`

			`style = re.sub(`
			`r"(^\|\s)\d+(\.\d+)+(\s\|$)", r"\1\3", style`
			`) # Remove (free standing) version numbers`
			`style_parts = FontnameTools.drop_empty(style.split(" "))`
			`style = " ".join(map(FontnameTools.front_upper, style_parts))`
			`familyname = FontnameTools.camel_explode(familyname)`
			`return (True, familyname, weight_token, style_token, other_token, style)`