refactor: add tomllib preference

This commit is contained in:
Daylin Morgan 2023-10-17 09:38:32 -05:00
parent f44f401410
commit 13acde3417
Signed by: daylin
GPG key ID: C1E52E7DD81DF79F

View file

@ -57,63 +57,64 @@ __version__ = "2023.1003-pep723"
##### START VENDORED TOMLI #####
# MODIFIED FROM https://github.com/hukkin/tomli
# see below for original license
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
# Licensed to PSF under a Contributor Agreement.
try:
from tomllib import loads as toml_loads
except ImportError:
# MODIFIED FROM https://github.com/hukkin/tomli
# see below for original license
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
# Licensed to PSF under a Contributor Agreement.
import string # noqa
from collections.abc import Iterable # noqa
from functools import lru_cache # noqa
from datetime import date, datetime, time, timedelta, timezone, tzinfo # noqa
from types import MappingProxyType # noqa
from typing import IO, Any, Callable, NamedTuple # noqa
import string # noqa
from collections.abc import Iterable # noqa
from functools import lru_cache # noqa
from datetime import date, datetime, time, timedelta, timezone, tzinfo # noqa
from types import MappingProxyType # noqa
from typing import IO, Any, Callable, NamedTuple # noqa
ParseFloat = Callable[[str], Any]
Key = Tuple[str, ...]
Pos = int
# - 00:32:00.999999
# - 00:32:00
__tomli___TIME_RE_STR = (
ParseFloat = Callable[[str], Any]
Key = Tuple[str, ...]
Pos = int
# - 00:32:00.999999
# - 00:32:00
__tomli___TIME_RE_STR = (
r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?"
)
__tomli__RE_NUMBER = re.compile(
)
__tomli__RE_NUMBER = re.compile(
r"""
0
(?:
0
(?:
x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
|
b[01](?:_?[01])* # bin
|
o[0-7](?:_?[0-7])* # oct
)
|
[+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
(?P<floatpart>
)
|
[+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
(?P<floatpart>
(?:\.[0-9](?:_?[0-9])*)? # optional fractional part
(?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
)
""",
)
""",
flags=re.VERBOSE,
)
__tomli__RE_LOCALTIME = re.compile(__tomli___TIME_RE_STR)
__tomli__RE_DATETIME = re.compile(
)
__tomli__RE_LOCALTIME = re.compile(__tomli___TIME_RE_STR)
__tomli__RE_DATETIME = re.compile(
rf"""
([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
(?:
([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
(?:
[Tt ]
{__tomli___TIME_RE_STR}
(?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
)?
""",
)?
""",
flags=re.VERBOSE,
)
)
def __tomli__match_to_datetime(match: re.Match) -> datetime | date:
"""Convert a `__tomli__RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
def __tomli__match_to_datetime(match: re.Match) -> datetime | date:
"""Convert a `__tomli__RE_DATETIME` match to `datetime.datetime`
or `datetime.date`.
Raises ValueError if the match does not correspond to a valid date
or datetime.
"""
@ -145,9 +146,8 @@ def __tomli__match_to_datetime(match: re.Match) -> datetime | date:
tz = None
return datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
@lru_cache(maxsize=None)
def __tomli__cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone:
@lru_cache(maxsize=None)
def __tomli__cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone:
sign = 1 if sign_str == "+" else -1
return timezone(
timedelta(
@ -156,35 +156,32 @@ def __tomli__cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezon
)
)
def __tomli__match_to_localtime(match: re.Match) -> time:
def __tomli__match_to_localtime(match: re.Match) -> time:
hour_str, minute_str, sec_str, micros_str = match.groups()
micros = int(micros_str.ljust(6, "0")) if micros_str else 0
return time(int(hour_str), int(minute_str), int(sec_str), micros)
def __tomli__match_to_number(match: re.Match, parse_float: ParseFloat) -> Any:
def __tomli__match_to_number(match: re.Match, parse_float: ParseFloat) -> Any:
if match.group("floatpart"):
return parse_float(match.group())
return int(match.group(), 0)
__tomli__ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
# Neither of these sets include quotation mark or backslash. They are
# currently handled as separate cases in the parser functions.
__tomli__ILLEGAL_BASIC_STR_CHARS = __tomli__ASCII_CTRL - frozenset("\t")
__tomli__ILLEGAL_MULTILINE_BASIC_STR_CHARS = __tomli__ASCII_CTRL - frozenset("\t\n")
__tomli__ILLEGAL_LITERAL_STR_CHARS = __tomli__ILLEGAL_BASIC_STR_CHARS
__tomli__ILLEGAL_MULTILINE_LITERAL_STR_CHARS = (
__tomli__ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
# Neither of these sets include quotation mark or backslash. They are
# currently handled as separate cases in the parser functions.
__tomli__ILLEGAL_BASIC_STR_CHARS = __tomli__ASCII_CTRL - frozenset("\t")
__tomli__ILLEGAL_MULTILINE_BASIC_STR_CHARS = __tomli__ASCII_CTRL - frozenset("\t\n")
__tomli__ILLEGAL_LITERAL_STR_CHARS = __tomli__ILLEGAL_BASIC_STR_CHARS
__tomli__ILLEGAL_MULTILINE_LITERAL_STR_CHARS = (
__tomli__ILLEGAL_MULTILINE_BASIC_STR_CHARS
)
__tomli__ILLEGAL_COMMENT_CHARS = __tomli__ILLEGAL_BASIC_STR_CHARS
__tomli__TOML_WS = frozenset(" \t")
__tomli__TOML_WS_AND_NEWLINE = __tomli__TOML_WS | frozenset("\n")
__tomli__BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_")
__tomli__KEY_INITIAL_CHARS = __tomli__BARE_KEY_CHARS | frozenset("\"'")
__tomli__HEXDIGIT_CHARS = frozenset(string.hexdigits)
__tomli__BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType(
)
__tomli__ILLEGAL_COMMENT_CHARS = __tomli__ILLEGAL_BASIC_STR_CHARS
__tomli__TOML_WS = frozenset(" \t")
__tomli__TOML_WS_AND_NEWLINE = __tomli__TOML_WS | frozenset("\n")
__tomli__BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_")
__tomli__KEY_INITIAL_CHARS = __tomli__BARE_KEY_CHARS | frozenset("\"'")
__tomli__HEXDIGIT_CHARS = frozenset(string.hexdigits)
__tomli__BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType(
{
"\\b": "\u0008", # backspace
"\\t": "\u0009", # tab
@ -194,16 +191,14 @@ __tomli__BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType(
'\\"': "\u0022", # quote
"\\\\": "\u005C", # backslash
}
)
)
class TOMLDecodeError(ValueError):
class TOMLDecodeError(ValueError):
"""An error raised if a document is not valid TOML."""
def __tomli__load(
def __tomli__load(
__fp: IO[bytes], *, parse_float: ParseFloat = float
) -> dict[str, Any]:
) -> dict[str, Any]:
"""Parse TOML from a binary file object."""
b = __fp.read()
try:
@ -214,10 +209,9 @@ def __tomli__load(
) from None
return __tomli__loads(s, parse_float=parse_float)
def __tomli__loads(
def __tomli__loads(
__s: str, *, parse_float: ParseFloat = float
) -> dict[str, Any]: # noqa: C901
) -> dict[str, Any]: # noqa: C901
"""Parse TOML from a string."""
# The spec allows converting "\r\n" to "\n", even in string
# literals. Let's do so to simplify parsing.
@ -276,8 +270,7 @@ def __tomli__loads(
pos += 1
return out.data.dict
class Flags:
class Flags:
"""Flags that map to parsed keys/namespaces."""
# Marks an immutable namespace (inline array or inline table).
@ -314,7 +307,11 @@ class Flags:
cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
cont = cont[k]["nested"]
if key_stem not in cont:
cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
cont[key_stem] = {
"flags": set(),
"recursive_flags": set(),
"nested": {},
}
cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag)
def is_(self, key: Key, flag: int) -> bool:
@ -334,8 +331,7 @@ class Flags:
return flag in cont["flags"] or flag in cont["recursive_flags"]
return False
class NestedDict:
class NestedDict:
def __init__(self) -> None:
# The parsed content of the TOML document
self.dict: dict[str, Any] = {}
@ -368,13 +364,11 @@ class NestedDict:
else:
cont[last_key] = [{}]
class Output(NamedTuple):
class Output(NamedTuple):
data: NestedDict
flags: Flags
def __tomli__skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
def __tomli__skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
try:
while src[pos] in chars:
pos += 1
@ -382,29 +376,31 @@ def __tomli__skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
pass
return pos
def __tomli__skip_until(
def __tomli__skip_until(
src: str,
pos: Pos,
expect: str,
*,
error_on: frozenset[str],
error_on_eof: bool,
) -> Pos:
) -> Pos:
try:
new_pos = src.index(expect, pos)
except ValueError:
new_pos = len(src)
if error_on_eof:
raise __tomli__suffixed_err(src, new_pos, f"Expected {expect!r}") from None
raise __tomli__suffixed_err(
src, new_pos, f"Expected {expect!r}"
) from None
if not error_on.isdisjoint(src[pos:new_pos]):
while src[pos] not in error_on:
pos += 1
raise __tomli__suffixed_err(src, pos, f"Found invalid character {src[pos]!r}")
raise __tomli__suffixed_err(
src, pos, f"Found invalid character {src[pos]!r}"
)
return new_pos
def __tomli__skip_comment(src: str, pos: Pos) -> Pos:
def __tomli__skip_comment(src: str, pos: Pos) -> Pos:
try:
char: str | None = src[pos]
except IndexError:
@ -419,8 +415,7 @@ def __tomli__skip_comment(src: str, pos: Pos) -> Pos:
)
return pos
def __tomli__skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
def __tomli__skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
while True:
pos_before_skip = pos
pos = __tomli__skip_chars(src, pos, __tomli__TOML_WS_AND_NEWLINE)
@ -428,8 +423,7 @@ def __tomli__skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
if pos == pos_before_skip:
return pos
def __tomli__create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
def __tomli__create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
pos += 1 # Skip "["
pos = __tomli__skip_chars(src, pos, __tomli__TOML_WS)
pos, key = __tomli__parse_key(src, pos)
@ -446,8 +440,7 @@ def __tomli__create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key
)
return pos + 1, key
def __tomli__create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
def __tomli__create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
pos += 2 # Skip "[["
pos = __tomli__skip_chars(src, pos, __tomli__TOML_WS)
pos, key = __tomli__parse_key(src, pos)
@ -469,10 +462,9 @@ def __tomli__create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key
)
return pos + 2, key
def __tomli__key_value_rule(
def __tomli__key_value_rule(
src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
) -> Pos:
) -> Pos:
pos, key, value = __tomli__parse_key_value_pair(src, pos, parse_float)
key_parent, key_stem = key[:-1], key[-1]
abs_key_parent = header + key_parent
@ -502,10 +494,9 @@ def __tomli__key_value_rule(
nest[key_stem] = value
return pos
def __tomli__parse_key_value_pair(
def __tomli__parse_key_value_pair(
src: str, pos: Pos, parse_float: ParseFloat
) -> tuple[Pos, Key, Any]:
) -> tuple[Pos, Key, Any]:
pos, key = __tomli__parse_key(src, pos)
try:
char: str | None = src[pos]
@ -520,8 +511,7 @@ def __tomli__parse_key_value_pair(
pos, value = __tomli__parse_value(src, pos, parse_float)
return pos, key, value
def __tomli__parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
def __tomli__parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
pos, key_part = __tomli__parse_key_part(src, pos)
key: Key = (key_part,)
pos = __tomli__skip_chars(src, pos, __tomli__TOML_WS)
@ -538,8 +528,7 @@ def __tomli__parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
key += (key_part,)
pos = __tomli__skip_chars(src, pos, __tomli__TOML_WS)
def __tomli__parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
def __tomli__parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
try:
char: str | None = src[pos]
except IndexError:
@ -552,17 +541,17 @@ def __tomli__parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
return __tomli__parse_literal_str(src, pos)
if char == '"':
return __tomli__parse_one_line_basic_str(src, pos)
raise __tomli__suffixed_err(src, pos, "Invalid initial character for a key part")
raise __tomli__suffixed_err(
src, pos, "Invalid initial character for a key part"
)
def __tomli__parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
def __tomli__parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
pos += 1
return __tomli__parse_basic_str(src, pos, multiline=False)
def __tomli__parse_array(
def __tomli__parse_array(
src: str, pos: Pos, parse_float: ParseFloat
) -> tuple[Pos, list]:
) -> tuple[Pos, list]:
pos += 1
array: list = []
pos = __tomli__skip_comments_and_array_ws(src, pos)
@ -582,10 +571,9 @@ def __tomli__parse_array(
if src.startswith("]", pos):
return pos + 1, array
def __tomli__parse_inline_table(
def __tomli__parse_inline_table(
src: str, pos: Pos, parse_float: ParseFloat
) -> tuple[Pos, dict]:
) -> tuple[Pos, dict]:
pos += 1
nested_dict = NestedDict()
flags = Flags()
@ -602,7 +590,9 @@ def __tomli__parse_inline_table(
try:
nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
except KeyError:
raise __tomli__suffixed_err(src, pos, "Cannot overwrite a value") from None
raise __tomli__suffixed_err(
src, pos, "Cannot overwrite a value"
) from None
if key_stem in nest:
raise __tomli__suffixed_err(
src, pos, f"Duplicate inline table key {key_stem!r}"
@ -619,10 +609,9 @@ def __tomli__parse_inline_table(
pos += 1
pos = __tomli__skip_chars(src, pos, __tomli__TOML_WS)
def __tomli__parse_basic_str_escape(
def __tomli__parse_basic_str_escape(
src: str, pos: Pos, *, multiline: bool = False
) -> tuple[Pos, str]:
) -> tuple[Pos, str]:
escape_id = src[pos : pos + 2]
pos += 2
if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}:
@ -646,14 +635,16 @@ def __tomli__parse_basic_str_escape(
try:
return pos, __tomli__BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
except KeyError:
raise __tomli__suffixed_err(src, pos, "Unescaped '\\' in a string") from None
raise __tomli__suffixed_err(
src, pos, "Unescaped '\\' in a string"
) from None
def __tomli__parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]:
def __tomli__parse_basic_str_escape_multiline(
src: str, pos: Pos
) -> tuple[Pos, str]:
return __tomli__parse_basic_str_escape(src, pos, multiline=True)
def __tomli__parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]:
def __tomli__parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]:
hex_str = src[pos : pos + hex_len]
if len(hex_str) != hex_len or not __tomli__HEXDIGIT_CHARS.issuperset(hex_str):
raise __tomli__suffixed_err(src, pos, "Invalid hex value")
@ -665,19 +656,21 @@ def __tomli__parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]
)
return pos, chr(hex_int)
def __tomli__parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]:
def __tomli__parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]:
pos += 1 # Skip starting apostrophe
start_pos = pos
pos = __tomli__skip_until(
src, pos, "'", error_on=__tomli__ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True
src,
pos,
"'",
error_on=__tomli__ILLEGAL_LITERAL_STR_CHARS,
error_on_eof=True,
)
return pos + 1, src[start_pos:pos] # Skip ending apostrophe
def __tomli__parse_multiline_str(
def __tomli__parse_multiline_str(
src: str, pos: Pos, *, literal: bool
) -> tuple[Pos, str]:
) -> tuple[Pos, str]:
pos += 3
if src.startswith("\n", pos):
pos += 1
@ -705,8 +698,9 @@ def __tomli__parse_multiline_str(
pos += 1
return pos, result + (delim * 2)
def __tomli__parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
def __tomli__parse_basic_str(
src: str, pos: Pos, *, multiline: bool
) -> tuple[Pos, str]:
if multiline:
error_on = __tomli__ILLEGAL_MULTILINE_BASIC_STR_CHARS
parse_escapes = __tomli__parse_basic_str_escape_multiline
@ -737,10 +731,9 @@ def __tomli__parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Po
raise __tomli__suffixed_err(src, pos, f"Illegal character {char!r}")
pos += 1
def __tomli__parse_value( # noqa: C901
def __tomli__parse_value( # noqa: C901
src: str, pos: Pos, parse_float: ParseFloat
) -> tuple[Pos, Any]:
) -> tuple[Pos, Any]:
try:
char: str | None = src[pos]
except IndexError:
@ -785,7 +778,9 @@ def __tomli__parse_value( # noqa: C901
# char, so needs to be located after handling of dates and times.
number_match = __tomli__RE_NUMBER.match(src, pos)
if number_match:
return number_match.end(), __tomli__match_to_number(number_match, parse_float)
return number_match.end(), __tomli__match_to_number(
number_match, parse_float
)
# Special floats
first_three = src[pos : pos + 3]
if first_three in {"inf", "nan"}:
@ -795,8 +790,7 @@ def __tomli__parse_value( # noqa: C901
return pos + 4, parse_float(first_four)
raise __tomli__suffixed_err(src, pos, "Invalid value")
def __tomli__suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError:
def __tomli__suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError:
"""Return a `TOMLDecodeError` where error message is suffixed with
coordinates in source."""
@ -812,12 +806,10 @@ def __tomli__suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError:
return TOMLDecodeError(f"{msg} (at {coord_repr(src, pos)})")
def __tomli__is_unicode_scalar_value(codepoint: int) -> bool:
def __tomli__is_unicode_scalar_value(codepoint: int) -> bool:
return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
def __tomli__make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat:
def __tomli__make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat:
"""A decorator to make `parse_float` safe.
`parse_float` must not return dicts or lists, because these types
would be mixed with parsed TOML tables and arrays, thus confusing
@ -836,16 +828,11 @@ def __tomli__make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat:
return safe_parse_float
toml_loads = __tomli__loads
##### END VENDORED TOMLI #####
# fmt: on
# fmt: on
class Spinner:
"""spinner modified from:
https://raw.githubusercontent.com/Tagar/stuff/master/spinner.py
@ -2012,7 +1999,7 @@ METADATA_BLOCK = (
)
def read_metadata_block(script: str) -> dict | None:
def read_metadata_block(script: str) -> dict:
name = "pyproject"
matches = list(
filter(lambda m: m.group("type") == name, re.finditer(METADATA_BLOCK, script))
@ -2020,11 +2007,11 @@ def read_metadata_block(script: str) -> dict | None:
if len(matches) > 1:
raise ValueError(f"Multiple {name} blocks found")
elif len(matches) == 1:
return __tomli__loads(
return toml_loads(
"\n".join((line[2:] for line in matches[0].group(0).splitlines()[1:-1]))
)
else:
return None
return {}
# DEPENDENCY_BLOCK_MARKER = r"(?i)^#\s+script\s+dependencies:\s*$"