mirror of
https://github.com/daylinmorgan/monolisa-nerdfont-patch.git
synced 2024-11-09 16:43:14 -06:00
442 lines
16 KiB
Python
442 lines
16 KiB
Python
|
#!/usr/bin/env python
|
||
|
# coding=utf8
|
||
|
|
||
|
import re
|
||
|
import sys
|
||
|
|
||
|
|
||
|
class FontnameTools:
|
||
|
"""Deconstruct a font filename to get standardized name parts"""
|
||
|
|
||
|
@staticmethod
|
||
|
def front_upper(word):
|
||
|
"""Capitalize a string (but keep case of subsequent chars)"""
|
||
|
return word[:1].upper() + word[1:]
|
||
|
|
||
|
@staticmethod
|
||
|
def camel_casify(word):
|
||
|
"""Remove blanks and use CamelCase for the new word"""
|
||
|
return "".join(map(FontnameTools.front_upper, word.split(" ")))
|
||
|
|
||
|
@staticmethod
|
||
|
def camel_explode(word):
|
||
|
"""Explode CamelCase -> Camel Case"""
|
||
|
# But do not explode "JetBrains" etc at string start...
|
||
|
excludes = [
|
||
|
"JetBrains",
|
||
|
"DejaVu",
|
||
|
"OpenDyslexicAlta",
|
||
|
"OpenDyslexicMono",
|
||
|
"OpenDyslexic",
|
||
|
"DaddyTimeMono",
|
||
|
"InconsolataGo",
|
||
|
"ProFontWindows",
|
||
|
"ProFont",
|
||
|
"ProggyClean",
|
||
|
]
|
||
|
m = re.match("(" + "|".join(excludes) + ")(.*)", word)
|
||
|
(prefix, word) = m.group(1, 2) if m != None else ("", word)
|
||
|
if len(word) == 0:
|
||
|
return prefix
|
||
|
parts = re.split("(?<=[a-z0-9])(?=[A-Z])", word)
|
||
|
if len(prefix):
|
||
|
parts.insert(0, prefix)
|
||
|
return " ".join(parts)
|
||
|
|
||
|
@staticmethod
|
||
|
def drop_empty(l):
|
||
|
"""Remove empty strings from list of strings"""
|
||
|
return [x for x in l if len(x) > 0]
|
||
|
|
||
|
@staticmethod
|
||
|
def concat(*all_things):
|
||
|
"""Flatten list of (strings or lists of strings) to a blank-separated string"""
|
||
|
all = []
|
||
|
for thing in all_things:
|
||
|
if type(thing) is not list:
|
||
|
all.append(thing)
|
||
|
else:
|
||
|
all += thing
|
||
|
return " ".join(FontnameTools.drop_empty(all))
|
||
|
|
||
|
@staticmethod
|
||
|
def unify_style_names(style_name):
|
||
|
"""Substitude some known token with standard wording"""
|
||
|
known_names = {
|
||
|
# Source of the table is the current sourcefonts
|
||
|
# Left side needs to be lower case
|
||
|
"-": "",
|
||
|
"book": "",
|
||
|
"text": "",
|
||
|
"ce": "CE",
|
||
|
#'semibold': 'Demi',
|
||
|
"ob": "Oblique",
|
||
|
"it": "Italic",
|
||
|
"i": "Italic",
|
||
|
"b": "Bold",
|
||
|
"normal": "Regular",
|
||
|
"c": "Condensed",
|
||
|
"r": "Regular",
|
||
|
"m": "Medium",
|
||
|
"l": "Light",
|
||
|
}
|
||
|
if style_name in known_names:
|
||
|
return known_names[style_name.lower()]
|
||
|
return style_name
|
||
|
|
||
|
@staticmethod
|
||
|
def find_in_dicts(key, dicts):
|
||
|
"""Find an entry in a list of dicts, return entry and in which list it was"""
|
||
|
for i, d in enumerate(dicts):
|
||
|
if key in d:
|
||
|
return (d[key], i)
|
||
|
return (None, 0)
|
||
|
|
||
|
@staticmethod
|
||
|
def get_shorten_form_idx(aggressive, prefix, form_if_prefixed):
|
||
|
"""Get the tuple index of known_* data tables"""
|
||
|
if aggressive:
|
||
|
return 0
|
||
|
if len(prefix):
|
||
|
return form_if_prefixed
|
||
|
return 1
|
||
|
|
||
|
@staticmethod
|
||
|
def shorten_style_name(name, aggressive):
|
||
|
"""Substitude some known styles to short form"""
|
||
|
# If aggressive is False create the mild short form
|
||
|
# aggressive == True: Always use first form of everything
|
||
|
# aggressive == False:
|
||
|
# - has no modifier: use the second form
|
||
|
# - has modifier: use second form of mod plus first form of weights2
|
||
|
# - has modifier: use second form of mod plus second form of widths
|
||
|
name_rest = name
|
||
|
name_pre = ""
|
||
|
form = FontnameTools.get_shorten_form_idx(aggressive, "", 0)
|
||
|
for mod in FontnameTools.known_modifiers:
|
||
|
if name.startswith(mod) and len(name) > len(
|
||
|
mod
|
||
|
): # Second condition specifically for 'Demi'
|
||
|
name_pre = FontnameTools.known_modifiers[mod][form]
|
||
|
name_rest = name[len(mod) :]
|
||
|
break
|
||
|
subst, i = FontnameTools.find_in_dicts(
|
||
|
name_rest, [FontnameTools.known_weights2, FontnameTools.known_widths]
|
||
|
)
|
||
|
form = FontnameTools.get_shorten_form_idx(aggressive, name_pre, i)
|
||
|
if isinstance(subst, tuple):
|
||
|
return name_pre + subst[form]
|
||
|
if not len(name_pre):
|
||
|
# The following sets do not allow modifiers
|
||
|
subst, _ = FontnameTools.find_in_dicts(
|
||
|
name_rest, [FontnameTools.known_weights1, FontnameTools.known_slopes]
|
||
|
)
|
||
|
if isinstance(subst, tuple):
|
||
|
return subst[form]
|
||
|
return name
|
||
|
|
||
|
@staticmethod
|
||
|
def short_styles(lists, aggressive):
|
||
|
"""Shorten all style names in a list or a list of lists"""
|
||
|
if not len(lists) or not isinstance(lists[0], list):
|
||
|
return list(
|
||
|
map(lambda x: FontnameTools.shorten_style_name(x, aggressive), lists)
|
||
|
)
|
||
|
return [
|
||
|
list(map(lambda x: FontnameTools.shorten_style_name(x, aggressive), styles))
|
||
|
for styles in lists
|
||
|
]
|
||
|
|
||
|
@staticmethod
|
||
|
def make_oblique_style(weights, styles):
|
||
|
"""Move "Oblique" from weights to styles for font naming purposes"""
|
||
|
if "Oblique" in weights:
|
||
|
weights = list(weights)
|
||
|
weights.remove("Oblique")
|
||
|
styles = list(styles)
|
||
|
styles.append("Oblique")
|
||
|
return (weights, styles)
|
||
|
|
||
|
@staticmethod
|
||
|
def get_name_token(name, tokens, allow_regex_token=False):
|
||
|
"""Try to find any case insensitive token from tokens in the name, return tuple with found token-list and rest"""
|
||
|
# The default mode (allow_regex_token = False) will try to find any verbatim string in the
|
||
|
# tokens list (case insensitive matching) and give that tokens list item back with
|
||
|
# unchanged case (i.e. [ 'Bold' ] will match "bold" and return it as [ 'Bold', ]
|
||
|
# In the regex mode (allow_regex_token = True) it will use the tokens elements as
|
||
|
# regexes and return the original (i.e. from name) case.
|
||
|
#
|
||
|
# Token are always used in a regex and may not capture, use non capturing
|
||
|
# grouping if needed (?: ... )
|
||
|
lower_tokens = [t.lower() for t in tokens]
|
||
|
not_matched = ""
|
||
|
all_tokens = []
|
||
|
j = 1
|
||
|
regex = re.compile("(.*?)(" + "|".join(tokens) + ")(.*)", re.IGNORECASE)
|
||
|
while j:
|
||
|
j = regex.match(name)
|
||
|
if not j:
|
||
|
break
|
||
|
if len(j.groups()) != 3:
|
||
|
sys.exit("Malformed regex in FontnameTools.get_name_token()")
|
||
|
not_matched += (
|
||
|
" " + j.groups()[0]
|
||
|
) # Blanc prevents unwanted concatenation of unmatched substrings
|
||
|
tok = j.groups()[1].lower()
|
||
|
if tok in lower_tokens:
|
||
|
tok = tokens[lower_tokens.index(tok)]
|
||
|
tok = FontnameTools.unify_style_names(tok)
|
||
|
if len(tok):
|
||
|
all_tokens.append(tok)
|
||
|
name = j.groups()[2] # Recurse rest
|
||
|
not_matched += " " + name
|
||
|
return (not_matched.strip(), all_tokens)
|
||
|
|
||
|
@staticmethod
|
||
|
def postscript_char_filter(name):
|
||
|
"""Filter out characters that are not allowed in Postscript names"""
|
||
|
# The name string must be restricted to the printable ASCII subset, codes 33 to 126,
|
||
|
# except for the 10 characters '[', ']', '(', ')', '{', '}', '<', '>', '/', '%'
|
||
|
out = ""
|
||
|
for c in name:
|
||
|
if c in "[](){}<>/%" or ord(c) < 33 or ord(c) > 126:
|
||
|
continue
|
||
|
out += c
|
||
|
return out
|
||
|
|
||
|
SIL_TABLE = [
|
||
|
("(a)nonymous", r"\1nonymice"),
|
||
|
("(b)itstream( ?)(v)era( ?sans ?mono)?", r"\1itstrom\2Wera"),
|
||
|
("(s)ource", r"\1auce"),
|
||
|
("(h)ermit", r"\1urmit"),
|
||
|
("(h)asklig", r"\1asklug"),
|
||
|
("(s)hare", r"\1hure"),
|
||
|
("IBM[- ]?plex", r"Blex"), # We do not keep the case here
|
||
|
("(t)erminus", r"\1erminess"),
|
||
|
("(l)iberation", r"\1iteration"),
|
||
|
("iA([- ]?)writer", r"iM\1Writing"),
|
||
|
("(a)nka/(c)oder", r"\1na\2onder"),
|
||
|
("(c)ascadia( ?)(c)ode", r"\1askaydia\2\3ove"),
|
||
|
("(c)ascadia( ?)(m)ono", r"\1askaydia\2\3ono"),
|
||
|
("(m)( ?)plus", r"\1+"), # Added this, because they use a plus symbol :->
|
||
|
("Gohufont", r"GohuFont"), # Correct to CamelCase
|
||
|
# Noone cares that font names starting with a digit are forbidden:
|
||
|
("IBM 3270", r"3270"), # for historical reasons and 'IBM' is a TM or something
|
||
|
# Some name parts that are too long for us
|
||
|
("(.*sans ?m)ono", r"\1"), # Various SomenameSansMono fonts
|
||
|
("(.*code ?lat)in Expanded", r"\1X"), # for 'M PLUS Code Latin Expanded'
|
||
|
("(.*code ?lat)in", r"\1"), # for 'M PLUS Code Latin'
|
||
|
("(b)ig( ?)(b)lue( ?)(t)erminal", r"\1ig\3lue\5erm"), # Shorten BigBlueTerminal
|
||
|
("(.*)437TT", r"\g<1>437"), # Shorten BigBlueTerminal 437 TT even further
|
||
|
("(.*dyslexic ?alt)a", r"\1"), # Open Dyslexic Alta -> Open Dyslexic Alt
|
||
|
("(.*dyslexic ?m)ono", r"\1"), # Open Dyslexic Mono -> Open Dyslexic M
|
||
|
("(overpass ?m)ono", r"\1"), # Overpass Mono -> Overpass M
|
||
|
("(proggyclean) ?tt", r"\1"), # Remove TT from ProggyClean
|
||
|
(
|
||
|
"(terminess) ?\(ttf\)",
|
||
|
r"\1",
|
||
|
), # Remove TTF from Terminus (after renamed to Terminess)
|
||
|
("(im ?writing ?q)uattro", r"\1uat"), # Rename iM Writing Quattro to Quat
|
||
|
(
|
||
|
"(im ?writing ?(mono|duo|quat)) ?s",
|
||
|
r"\1",
|
||
|
), # Remove S from all iM Writing styles
|
||
|
]
|
||
|
|
||
|
# From https://adobe-type-tools.github.io/font-tech-notes/pdfs/5088.FontNames.pdf
|
||
|
# The first short variant is from the linked table.
|
||
|
# The second (longer) short variant is from diverse fonts like Noto.
|
||
|
# We can
|
||
|
# - use the long form
|
||
|
# - use the very short form (first)
|
||
|
# - use mild short form:
|
||
|
# - has no modifier: use the second form
|
||
|
# - has modifier: use second form of mod plus first form of weights2
|
||
|
# - has modifier: use second form of mod plus second form of widths
|
||
|
# This is encoded in get_shorten_form_idx()
|
||
|
known_weights1 = { # can not take modifiers
|
||
|
"Medium": ("Md", "Med"),
|
||
|
"Nord": ("Nd", "Nord"),
|
||
|
"Book": ("Bk", "Book"),
|
||
|
"Poster": ("Po", "Poster"),
|
||
|
"Demi": (
|
||
|
"Dm",
|
||
|
"Demi",
|
||
|
), # Demi is sometimes used as a weight, sometimes as a modifier
|
||
|
"Regular": ("Rg", "Reg"),
|
||
|
"Display": ("DS", "Disp"),
|
||
|
"Super": ("Su", "Sup"),
|
||
|
"Retina": ("Rt", "Ret"),
|
||
|
}
|
||
|
known_weights2 = { # can take modifiers
|
||
|
"Black": ("Blk", "Black"),
|
||
|
"Bold": ("Bd", "Bold"),
|
||
|
"Heavy": ("Hv", "Heavy"),
|
||
|
"Thin": ("Th", "Thin"),
|
||
|
"Light": ("Lt", "Light"),
|
||
|
" ": (), # Just for CodeClimate :-/
|
||
|
}
|
||
|
known_widths = { # can take modifiers
|
||
|
"Compressed": ("Cm", "Comp"),
|
||
|
"Extended": ("Ex", "Extd"),
|
||
|
"Condensed": ("Cn", "Cond"),
|
||
|
"Narrow": ("Nr", "Narrow"),
|
||
|
"Compact": ("Ct", "Compact"),
|
||
|
}
|
||
|
known_slopes = { # can not take modifiers
|
||
|
"Inclined": ("Ic", "Incl"),
|
||
|
"Oblique": ("Obl", "Obl"),
|
||
|
"Italic": ("It", "Italic"),
|
||
|
"Upright": ("Up", "Uprght"),
|
||
|
"Kursiv": ("Ks", "Kurs"),
|
||
|
"Sloped": ("Sl", "Slop"),
|
||
|
}
|
||
|
known_modifiers = {
|
||
|
"Demi": ("Dm", "Dem"),
|
||
|
"Ultra": ("Ult", "Ult"),
|
||
|
"Semi": ("Sm", "Sem"),
|
||
|
"Extra": ("X", "Ext"),
|
||
|
}
|
||
|
|
||
|
@staticmethod
|
||
|
def is_keep_regular(basename):
|
||
|
"""This has been decided by the font designers, we need to mimic that (for comparison purposes)"""
|
||
|
KEEP_REGULAR = [
|
||
|
"Agave",
|
||
|
"Arimo",
|
||
|
"Aurulent",
|
||
|
"Cascadia",
|
||
|
"Cousine",
|
||
|
"Fantasque",
|
||
|
"Fira",
|
||
|
"Overpass",
|
||
|
"Lilex",
|
||
|
"Inconsolata$", # not InconsolataGo
|
||
|
"IAWriter",
|
||
|
"Meslo",
|
||
|
"Monoid",
|
||
|
"Mononoki",
|
||
|
"Hack",
|
||
|
"JetBrains Mono",
|
||
|
"Noto Sans",
|
||
|
"Noto Serif",
|
||
|
"Victor",
|
||
|
]
|
||
|
for kr in KEEP_REGULAR:
|
||
|
if (basename.rstrip() + "$").startswith(kr):
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
@staticmethod
|
||
|
def _parse_simple_font_name(name):
|
||
|
"""Parse a filename that does not follow the 'FontFamilyName-FontStyle' pattern"""
|
||
|
# No dash in name, maybe we have blanc separated filename?
|
||
|
if " " in name:
|
||
|
return FontnameTools.parse_font_name(name.replace(" ", "-"))
|
||
|
# Do we have a number-name boundary?
|
||
|
p = re.split("(?<=[0-9])(?=[a-zA-Z])", name)
|
||
|
if len(p) > 1:
|
||
|
return FontnameTools.parse_font_name("-".join(p))
|
||
|
# Or do we have CamelCase?
|
||
|
n = FontnameTools.camel_explode(name)
|
||
|
if n != name:
|
||
|
return FontnameTools.parse_font_name(n.replace(" ", "-"))
|
||
|
return (False, FontnameTools.camel_casify(name), [], [], [], "")
|
||
|
|
||
|
@staticmethod
|
||
|
def parse_font_name(name):
|
||
|
"""Expects a filename following the 'FontFamilyName-FontStyle' pattern and returns ... parts"""
|
||
|
name = re.sub(
|
||
|
r"\bsemi-condensed\b", "SemiCondensed", name, 1, re.IGNORECASE
|
||
|
) # Just for "3270 Semi-Condensed" :-/
|
||
|
name = re.sub("[_\s]+", " ", name)
|
||
|
matches = re.match(r"([^-]+)(?:-(.*))?", name)
|
||
|
familyname = FontnameTools.camel_casify(matches.group(1))
|
||
|
style = matches.group(2)
|
||
|
|
||
|
if not style:
|
||
|
return FontnameTools._parse_simple_font_name(name)
|
||
|
|
||
|
# These are the FontStyle keywords we know, in three categories
|
||
|
# Weights end up as Typographic Family parts ('after the dash')
|
||
|
# Styles end up as Family parts (for classic grouping of four)
|
||
|
# Others also end up in Typographic Family ('before the dash')
|
||
|
weights = (
|
||
|
[
|
||
|
m + s
|
||
|
for s in list(FontnameTools.known_weights2)
|
||
|
+ list(FontnameTools.known_widths)
|
||
|
for m in list(FontnameTools.known_modifiers) + [""]
|
||
|
if m != s
|
||
|
]
|
||
|
+ list(FontnameTools.known_weights1)
|
||
|
+ list(FontnameTools.known_slopes)
|
||
|
)
|
||
|
styles = [
|
||
|
"Bold",
|
||
|
"Italic",
|
||
|
"Regular",
|
||
|
"Normal",
|
||
|
]
|
||
|
weights = [w for w in weights if w not in styles]
|
||
|
# Some font specialities:
|
||
|
other = [
|
||
|
"-",
|
||
|
"Book",
|
||
|
"For",
|
||
|
"Powerline",
|
||
|
"Text", # Plex
|
||
|
"IIx", # Profont IIx
|
||
|
"LGC", # Inconsolata LGC
|
||
|
r"\bCE\b", # ProggycleanTT CE
|
||
|
r"[12][cmp]n?", # MPlus
|
||
|
r"(?:uni-)?1[14]", # GohuFont uni
|
||
|
]
|
||
|
|
||
|
# Sometimes used abbreviations
|
||
|
weight_abbrevs = [
|
||
|
"ob",
|
||
|
"c",
|
||
|
"m",
|
||
|
"l",
|
||
|
]
|
||
|
style_abbrevs = [
|
||
|
"it",
|
||
|
"r",
|
||
|
"b",
|
||
|
"i",
|
||
|
]
|
||
|
|
||
|
(style, weight_token) = FontnameTools.get_name_token(style, weights)
|
||
|
(style, style_token) = FontnameTools.get_name_token(style, styles)
|
||
|
(style, other_token) = FontnameTools.get_name_token(style, other, True)
|
||
|
if (
|
||
|
len(style) < 4 and style.lower() != "pro"
|
||
|
): # Prevent 'r' of Pro to be detected as style_abbrev
|
||
|
(style, weight_token_abbrevs) = FontnameTools.get_name_token(
|
||
|
style, weight_abbrevs
|
||
|
)
|
||
|
(style, style_token_abbrevs) = FontnameTools.get_name_token(
|
||
|
style, style_abbrevs
|
||
|
)
|
||
|
weight_token += weight_token_abbrevs
|
||
|
style_token += style_token_abbrevs
|
||
|
while "Regular" in style_token and len(style_token) > 1:
|
||
|
# Correct situation where "Regular" and something else is given
|
||
|
style_token.remove("Regular")
|
||
|
|
||
|
# Recurse to see if unmatched stuff between dashes can belong to familyname
|
||
|
matches2 = re.match(r"(\w+)-(.*)", style)
|
||
|
if matches2:
|
||
|
return FontnameTools.parse_font_name(
|
||
|
familyname + matches2.group(1) + "-" + matches2.group(2)
|
||
|
)
|
||
|
|
||
|
style = re.sub(
|
||
|
r"(^|\s)\d+(\.\d+)+(\s|$)", r"\1\3", style
|
||
|
) # Remove (free standing) version numbers
|
||
|
style_parts = FontnameTools.drop_empty(style.split(" "))
|
||
|
style = " ".join(map(FontnameTools.front_upper, style_parts))
|
||
|
familyname = FontnameTools.camel_explode(familyname)
|
||
|
return (True, familyname, weight_token, style_token, other_token, style)
|