"""
Contains logic for formatting statically / dynamically extracted information
into the final product.
"""
from os.path import join, exists
import textwrap
import logging
from mkinit import static_analysis as static
logger = logging.getLogger(__name__)
[docs]
def _ensure_options(given_options=None):
"""
Ensures dict contains all formatting options.
Defaults are:
with_attrs (bool): if True, generate module attribute from imports
(Default: True)
with_mods (bool): if True, generate module imports
(Default: True)
with_all (bool): if True, generate an __all__ variable
(Default: True)
relative (bool): if True, generate relative `.` imports
(Default: False)
"""
if given_options is None:
given_options = {}
default_options = {
"with_attrs": True,
"with_mods": True,
"with_all": True,
"relative": False,
"lazy_import": False,
"lazy_loader": False,
"lazy_loader_typed": False,
"lazy_boilerplate": None,
"use_black": False,
}
options = default_options.copy()
for k in given_options.keys():
if k not in default_options:
raise KeyError("options got bad key={}".format(k))
options.update(given_options)
return options
[docs]
def _insert_autogen_text(modpath, initstr, interface=False):
"""
Creates new text for `__init__.py` containing the autogenerated code.
If an `__init__.py` already exists in `modpath`, then it tries to
intelligently insert the code without clobbering too much. See
`_find_insert_points` for details on this process.
"""
# Get path to init file so we can overwrite it
init_fpath = join(modpath, "__init__.pyi" if interface else "__init__.py")
logger.debug("inserting initstr into: {!r}".format(init_fpath))
if exists(init_fpath):
with open(init_fpath, "r") as file_:
lines = file_.readlines()
else:
lines = []
startline, endline, init_indent = _find_insert_points(lines)
initstr_ = _indent(initstr, init_indent) + "\n"
QUICKFIX_REMOVE_LEADING_NEWLINES = 1
if QUICKFIX_REMOVE_LEADING_NEWLINES:
initstr_ = initstr_.lstrip("\n")
new_lines = lines[:startline] + [initstr_] + lines[endline:]
new_text = "".join(new_lines).rstrip() + "\n"
return init_fpath, new_text
[docs]
def _find_insert_points(lines):
r"""
Searches for the points to insert autogenerated text between.
If the `# <AUTOGEN_INIT>` directive exists, then it is preserved and new
text is inserted after it. This text clobbers all other text until the `#
<AUTOGEN_INIT>` is reached.
If the explicit tags are not specified, mkinit will only clobber text after
one of these patterns:
* A line beginning with a (#) comment
* A multiline (triple-quote) comment
* A line beginning with "from __future__"
* A line beginning with "__version__"
If neither explicit tags or implicit patterns exist, all text is clobbered.
Args:
lines (List[str]): lines of an `__init__.py` file.
Returns:
Tuple[int, int, str]:
insert points as starting line, ending line, and any required
indentation.
Examples:
>>> from mkinit.formatting import * # NOQA
>>> lines = textwrap.dedent(
'''
preserved1 = True
if True:
# <AUTOGEN_INIT>
clobbered2 = True
# </AUTOGEN_INIT>
preserved3 = True
''').strip('\n').split('\n')
>>> start, end, indent = _find_insert_points(lines)
>>> print(repr((start, end, indent)))
(3, 4, ' ')
Examples:
>>> from mkinit.formatting import * # NOQA
>>> lines = textwrap.dedent(
'''
preserved1 = True
__version__ = '1.0'
clobbered2 = True
''').strip('\n').split('\n')
>>> start, end, indent = _find_insert_points(lines)
>>> print(repr((start, end, indent)))
(2, 3, '')
"""
startline = 0
endline = len(lines)
explicit_flag = False
init_indent = ""
print = lambda x: None # NOQA
# print = logging.debug
print('Searching for points to insert autogenerated code')
# co-opt the xdoctest parser to break appart lines in the init file
# This lets us correctly skip to the end of a multiline expression
# A better solution might be to use the line-number aware parser
# to search for AUTOGEN_INIT comments and other relevant structures.
source_lines = [">>> " + p.rstrip("\n") for p in lines]
try:
ps1_lines, _ = static._locate_ps1_linenos(source_lines)
print('ps1_lines = {!r}'.format(ps1_lines))
except IndexError:
assert len(lines) == 0
ps1_lines = []
# Algorithm is similar to the old version, but we skip to the next PS1
# line if we encounter an implicit code pattern.
skipto = None
def _tryskip(lineno):
""" returns the next line to skip to if possible """
implicit_patterns = (
"from __future__",
"__version__",
"__submodules__",
"__external__",
"__private__",
"__protected__",
"#",
'"""',
"'''",
)
for lineno, line in enumerate(lines):
# Check explicit modes regardless of skipping
if line.strip().startswith("# <AUTOGEN_INIT>"): # allow tags too
print('[mkinit] FOUND START TAG ON LINE {}: {}'.format(lineno, line))
init_indent = line[: line.find("#")]
explicit_flag = True
startline = lineno + 1
if explicit_flag and line.strip().startswith("# </AUTOGEN_INIT>"):
print('[mkinit] FOUND END TAG ON LINE {}: {}'.format(lineno, line))
endline = lineno
if not explicit_flag:
if skipto is not None:
if lineno != skipto:
continue
else:
print('SKIPPED TO = {!r}'.format(lineno))
skipto = None
if line.strip().startswith(implicit_patterns):
print('[mkinit] RESPECTING LINE {}: {}'.format(lineno, line.rstrip('\n')))
startline = lineno + 1
try:
# Try and skip to the end of the expression
# (if it is a multiline case)
idx = ps1_lines.index(lineno)
skipto = ps1_lines[idx + 1]
startline = skipto
print('SKIPTO = {!r}'.format(skipto))
except ValueError:
print('NOT ON A PS1 LINE KEEP {}'.format(startline))
except IndexError:
print('LAST LINE MOVING TO END {}'.format(startline))
startline = endline
else:
# Even if we dont respect the lines, try not to end between
# PS1 lines.
try:
# Try and skip to the end of the expression
# (if it is a multiline case)
idx = ps1_lines.index(lineno)
skipto = ps1_lines[idx + 1]
except ValueError:
...
except IndexError:
...
# print('startline = {}'.format(startline))
# print('endline = {}'.format(endline))
assert startline <= endline
return startline, endline, init_indent
[docs]
def _indent(text, indent=" "):
new_text = indent + text.replace("\n", "\n" + indent)
# remove whitespace on blank lines
new_text = "\n".join([line.rstrip() for line in new_text.split("\n")])
return new_text
[docs]
def _initstr(
modname,
imports,
from_imports,
explicit=set(),
protected=set(),
private=set(),
options=None,
):
r"""
Calls the other string makers
CommandLine:
python -m mkinit.static_autogen _initstr
Args:
modname (str): the name of the module to generate the init str for
imports (List[str]): list of module-level imports
from_imports (List[Tuple[str, List[str]]]):
List of submodules and their imported attributes
options (dict): customize output
CommandLine:
python -m mkinit.formatting _initstr
Example:
>>> modname = 'foo'
>>> imports = ['.bar', '.baz']
>>> from_imports = [('.bar', ['func1', 'func2'])]
>>> initstr = _initstr(modname, imports, from_imports)
>>> print(initstr)
from foo import bar
from foo import baz
<BLANKLINE>
from foo.bar import (func1, func2,)
<BLANKLINE>
__all__ = ['bar', 'baz', 'func1', 'func2']
Example:
>>> modname = 'foo'
>>> imports = ['.bar', '.baz']
>>> from_imports = [('.bar', list(map(chr, range(97, 123))))]
>>> initstr = _initstr(modname, imports, from_imports)
>>> print(initstr)
from foo import bar
from foo import baz
<BLANKLINE>
from foo.bar import (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s,
t, u, v, w, x, y, z,)
<BLANKLINE>
__all__ = ['a', 'b', 'bar', 'baz', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
'y', 'z']
Example:
>>> import pytest
>>> import sys
>>> if sys.version_info < (3, 7):
>>> pytest.skip('lazy only works on 3.7+')
>>> modname = 'foo'
>>> imports = ['.bar', '.baz']
>>> from_imports = [('.bar', ['func1', 'func2'])]
>>> options = {'lazy_import': 1, 'lazy_boilerplate': None}
>>> initstr = _initstr(modname, imports, from_imports, options=options)
>>> print(initstr)
...
>>> options = {'lazy_import': 1, 'lazy_boilerplate': 'from importlib import lazy_import'}
>>> initstr = _initstr(modname, imports, from_imports, options=options)
>>> print(initstr.replace('\n\n', '\n'))
from importlib import lazy_import
__getattr__ = lazy_import(
__name__,
submodules={
'bar',
'baz',
},
submod_attrs={
'bar': [
'func1',
'func2',
],
},
)
def __dir__():
return __all__
__all__ = ['bar', 'baz', 'func1', 'func2']
"""
options = _ensure_options(options)
if options["relative"]:
modname = "."
explicit_exports = list(explicit)
exposed_from_imports = []
parts = []
# if options.get('with_header', False):
# parts.append(_make_module_header())
# map each submodule to its import statement
submod_to_import = {e.lstrip("."): e for e in imports}
submodules = set(submod_to_import.keys())
protected = set(protected)
private = set(private)
exposed_submodules = set()
exposed_all = set()
protected_submodules = submodules & protected
if options.get("with_mods", True):
exposed_submodules.update(submodules)
exposed_all.update(submodules)
exposed_submodules.update(protected_submodules)
exposed_all.update(protected_submodules)
from fnmatch import fnmatch
# TODO: allow pattern matching here
# step1: separate into explicit vs glob-pattern strings
private_pats = {p for p in private if "*" in p}
private_set = private - private_pats
protected_pats = {p for p in protected if "*" in p}
protected_set = protected - protected_pats
_pp_pats = protected_pats | private_pats
_pp_set = private_set | protected_set
def _private_matches(x):
x = x.lstrip(".")
return x in private_set or any(fnmatch(x, pat) for pat in private_pats)
def _pp_matches(x):
# TODO: standardize how explicit vs submodules are handled
x = x.lstrip(".")
return x in _pp_set or any(fnmatch(x, pat) for pat in _pp_pats)
raw_from_imports = [(m, sub) for m, sub in from_imports if not _pp_matches(m)]
if options.get("with_attrs", True):
exposed_from_imports = raw_from_imports
elif protected:
exposed_from_imports = [
(m, set(sub) & protected) for m, sub in raw_from_imports
]
exposed_from_imports = [(m, sub) for m, sub in exposed_from_imports if sub]
exposed_all.update(
{n for m, sub in exposed_from_imports for n in sub if not _private_matches(n)}
)
exposed_all.update(explicit)
exposed_all = sorted(exposed_all)
exposed_submodules = sorted(exposed_submodules)
def append_part(new_part):
""" appends a new part if it is nonempty """
if new_part:
if parts:
# separate from previous parts with a newline
parts.append("")
parts.append(new_part)
if options["lazy_loader"]:
default_lazy_boilerplate = textwrap.dedent(
r"""
import lazy_loader
"""
).rstrip("\n")
template = textwrap.dedent(
"""
__getattr__, __dir__, __all__ = lazy_loader.attach_stub(__name__, __file__)
"""
if options["lazy_loader_typed"] else
"""
__getattr__, __dir__, __all__ = lazy_loader.attach(
__name__,
submodules={submodules},
submod_attrs={submod_attrs},
)
"""
).rstrip("\n")
submod_attrs = {}
if exposed_from_imports:
for submod, attrs in exposed_from_imports:
submod = submod.lstrip(".")
submod_attrs[submod] = attrs
if explicit_exports:
submodules = submodules
print("submodules = {!r}".format(submodules))
else:
submodules = set()
# Currently this is the only use of ubelt, but urepr
# is easier to use in testing than pprint, so perhaps
# we can remove complexity and just use ubelt elsewhere
import ubelt as ub
# exposed_submodules = set(exposed_submodules)
submodules_repr = ub.urepr(exposed_submodules).replace("\n", "\n ")
# hack for python <3.7 tests
submodules_repr = submodules_repr.replace('[', '{').replace(']', '}')
initstr = template.format(
submodules=submodules_repr,
submod_attrs=ub.urepr(submod_attrs).replace("\n", "\n "),
)
# print("options = {!r}".format(options))
if options["lazy_boilerplate"] is None:
append_part(default_lazy_boilerplate)
else:
# Customize lazy boilerplate
append_part(options["lazy_boilerplate"])
append_part(initstr.rstrip())
elif options["lazy_import"]:
# NOTE: We are not using f-strings so the code can still be parsed
# in older versions of python.
# NOTE: We differentiate between submodule and submodule_attrs, as
# the keys in submodule_attrs aren't added by default.
default_lazy_boilerplate = textwrap.dedent(
r"""
def lazy_import(module_name, submodules, submod_attrs):
import importlib
import os
name_to_submod = {
func: mod for mod, funcs in submod_attrs.items()
for func in funcs
}
def __getattr__(name):
if name in submodules:
attr = importlib.import_module(
'{module_name}.{name}'.format(
module_name=module_name, name=name)
)
elif name in name_to_submod:
submodname = name_to_submod[name]
module = importlib.import_module(
'{module_name}.{submodname}'.format(
module_name=module_name, submodname=submodname)
)
attr = getattr(module, name)
else:
raise AttributeError(
'No {module_name} attribute {name}'.format(
module_name=module_name, name=name))
globals()[name] = attr
return attr
if os.environ.get('EAGER_IMPORT', ''):
for name in submodules:
__getattr__(name)
for attrs in submod_attrs.values():
for attr in attrs:
__getattr__(attr)
return __getattr__
"""
).rstrip("\n")
template = textwrap.dedent(
"""
__getattr__ = lazy_import(
__name__,
submodules={submodules},
submod_attrs={submod_attrs},
)
"""
).rstrip("\n")
submod_attrs = {}
if exposed_from_imports:
for submod, attrs in exposed_from_imports:
submod = submod.lstrip(".")
submod_attrs[submod] = attrs
if explicit_exports:
submodules = submodules
print("submodules = {!r}".format(submodules))
else:
submodules = set()
# Currently this is the only use of ubelt, but urepr
# is easier to use in testing than pprint, so perhaps
# we can remove complexity and just use ubelt elsewhere
import ubelt as ub
# exposed_submodules = set(exposed_submodules)
submodules_repr = ub.urepr(exposed_submodules).replace("\n", "\n ")
# hack for python <3.7 tests
submodules_repr = submodules_repr.replace('[', '{').replace(']', '}')
initstr = template.format(
submodules=submodules_repr,
submod_attrs=ub.urepr(submod_attrs).replace("\n", "\n "),
)
if options["lazy_boilerplate"] is None:
append_part(default_lazy_boilerplate)
else:
# Customize lazy boilerplate
append_part(options["lazy_boilerplate"])
append_part(initstr.rstrip())
else:
if exposed_submodules:
exposed_imports = [submod_to_import[k] for k in exposed_submodules]
append_part(_make_imports_str(exposed_imports, modname))
if exposed_from_imports:
attr_part = _make_fromimport_str(exposed_from_imports, modname)
append_part(attr_part)
if options.get("with_all", True):
if options["lazy_import"]:
append_part(
textwrap.dedent(
"""
def __dir__():
return __all__
"""
).rstrip()
)
exports_repr = ["'{}'".format(e) for e in sorted(exposed_all)]
rhs_body = ", ".join(exports_repr)
packed = _packed_rhs_text("__all__ = [", rhs_body + "]")
append_part(packed)
initstr = "\n".join([p for p in parts])
if options["use_black"]:
try:
import black
initstr = black.format_str(
initstr, mode=black.Mode(string_normalization=True)
)
except ImportError:
pass
return initstr
[docs]
def _make_imports_str(imports, rootmodname="."):
if False:
imports_fmtstr = "from {rootmodname} import %s".format(rootmodname=rootmodname)
return "\n".join([imports_fmtstr % (name,) for name in imports])
else:
imports_fmtstr = "from {rootmodname} import %s".format(rootmodname=rootmodname)
return "\n".join(
[
imports_fmtstr % (name.lstrip("."))
if name.startswith(".")
else "import %s" % (name,)
for name in imports
]
)
[docs]
def _packed_rhs_text(lhs_text, rhs_text):
"""
packs rhs text to have indentation that agrees with lhs text
Example:
>>> normname = 'this.is.a.module'
>>> fromlist = ['func{}'.format(d) for d in range(10)]
>>> indent = ''
>>> lhs_text = indent + 'from {normname} import ('.format(
>>> normname=normname)
>>> rhs_text = ', '.join(fromlist) + ',)'
>>> packstr = _packed_rhs_text(lhs_text, rhs_text)
>>> print(packstr)
>>> normname = 'this.is.a.very.long.modnamethatwilkeepgoingandgoing'
>>> fromlist = ['func{}'.format(d) for d in range(10)]
>>> indent = ''
>>> lhs_text = indent + 'from {normname} import ('.format(
>>> normname=normname)
>>> rhs_text = ', '.join(fromlist) + ',)'
>>> packstr = _packed_rhs_text(lhs_text, rhs_text)
>>> print(packstr)
>>> normname = 'this.is.a.very.long.modnamethatwilkeepgoingandgoingandgoingandgoingandgoingandgoing'
>>> fromlist = ['func{}'.format(d) for d in range(10)]
>>> indent = ''
>>> lhs_text = indent + 'from {normname} import ('.format(
>>> normname=normname)
>>> rhs_text = ', '.join(fromlist) + ',)'
>>> packstr = _packed_rhs_text(lhs_text, rhs_text)
>>> print(packstr)
"""
# FIXME: the parens get broken up wrong
# filler = '-' * (len(lhs_text) - 1) + ' '
# fill_text = filler + rhs_text
if 0:
# options['use_black']:
import black
raw_text = lhs_text + rhs_text
packstr = black.format_str(
raw_text, mode=black.Mode(string_normalization=False)
)
return packstr
else:
import re
# not sure why this isn't 76? >= maybe?
max_width = 79
# This is a hacky heuristic that could perhaps be more robust?
if len(lhs_text) > max_width * 0.7:
newline_prefix = " " * 4
else:
newline_prefix = " " * len(lhs_text)
raw_text = lhs_text + rhs_text
wrapped_lines = textwrap.wrap(
raw_text,
break_long_words=False,
width=79,
initial_indent="",
subsequent_indent=newline_prefix,
)
packstr = "\n".join(wrapped_lines)
FIX_FORMAT = 1
if FIX_FORMAT:
regex = r"\s*".join(list(map(re.escape, lhs_text.split(" "))))
assert re.match(regex, lhs_text)
match = re.search(regex, packstr)
span = match.span()
assert span[0] == 0
wrapped_lhs = match.string[: span[1]]
# If textwrap broke the LHS then do something slightly different
if "\n" in wrapped_lhs:
new_rhs = packstr[span[1] :]
new_packstr = lhs_text + "\n" + newline_prefix + new_rhs
packstr = new_packstr
return packstr
[docs]
def _make_fromimport_str(from_imports, rootmodname=".", indent=""):
"""
Args:
from_imports (list): each item is a tuple with module and a list of
imported with_attrs.
rootmodname (str): name of root module
indent (str): initial indentation
Example:
>>> from_imports = [
... ('.foo', list(map(chr, range(97, 123)))),
... ('.bar', []),
... ('.a_longer_package', list(map(chr, range(65, 91)))),
... ]
>>> from_str = _make_fromimport_str(from_imports, indent=' ' * 8)
>>> print(from_str)
from .foo import (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r,
s, t, u, v, w, x, y, z,)
from .a_longer_package import (A, B, C, D, E, F, G, H, I, J, K, L, M,
N, O, P, Q, R, S, T, U, V, W, X, Y, Z,)
"""
if rootmodname == ".": # nocover
# dot is already taken care of in fmtstr
rootmodname = ""
def _pack_fromimport(tup):
name, fromlist = tup[0], tup[1]
if name.startswith("."):
normname = rootmodname + name
else:
normname = name
if len(fromlist) > 0:
lhs_text = indent + "from {normname} import (".format(normname=normname)
rhs_text = ", ".join(fromlist) + ",)"
packstr = _packed_rhs_text(lhs_text, rhs_text)
else:
packstr = ""
return packstr
parts = [_pack_fromimport(t) for t in from_imports]
from_str = "\n".join([p for p in parts if p])
# Return unindented version for now
from_str = textwrap.dedent(from_str)
return from_str