Source code for mkinit.formatting

"""
Contains logic for formatting statically / dynamically extracted information
into the final product.
"""
from os.path import join, exists
import textwrap
import logging
from mkinit import static_analysis as static


logger = logging.getLogger(__name__)


[docs] def _ensure_options(given_options=None): """ Ensures dict contains all formatting options. Defaults are: with_attrs (bool): if True, generate module attribute from imports (Default: True) with_mods (bool): if True, generate module imports (Default: True) with_all (bool): if True, generate an __all__ variable (Default: True) relative (bool): if True, generate relative `.` imports (Default: False) """ if given_options is None: given_options = {} default_options = { "with_attrs": True, "with_mods": True, "with_all": True, "relative": False, "lazy_import": False, "lazy_loader": False, "lazy_loader_typed": False, "lazy_boilerplate": None, "use_black": False, } options = default_options.copy() for k in given_options.keys(): if k not in default_options: raise KeyError("options got bad key={}".format(k)) options.update(given_options) return options
[docs] def _insert_autogen_text(modpath, initstr, interface=False): """ Creates new text for `__init__.py` containing the autogenerated code. If an `__init__.py` already exists in `modpath`, then it tries to intelligently insert the code without clobbering too much. See `_find_insert_points` for details on this process. """ # Get path to init file so we can overwrite it init_fpath = join(modpath, "__init__.pyi" if interface else "__init__.py") logger.debug("inserting initstr into: {!r}".format(init_fpath)) if exists(init_fpath): with open(init_fpath, "r") as file_: lines = file_.readlines() else: lines = [] startline, endline, init_indent = _find_insert_points(lines) initstr_ = _indent(initstr, init_indent) + "\n" QUICKFIX_REMOVE_LEADING_NEWLINES = 1 if QUICKFIX_REMOVE_LEADING_NEWLINES: initstr_ = initstr_.lstrip("\n") new_lines = lines[:startline] + [initstr_] + lines[endline:] new_text = "".join(new_lines).rstrip() + "\n" return init_fpath, new_text
[docs] def _find_insert_points(lines): r""" Searches for the points to insert autogenerated text between. If the `# <AUTOGEN_INIT>` directive exists, then it is preserved and new text is inserted after it. This text clobbers all other text until the `# <AUTOGEN_INIT>` is reached. If the explicit tags are not specified, mkinit will only clobber text after one of these patterns: * A line beginning with a (#) comment * A multiline (triple-quote) comment * A line beginning with "from __future__" * A line beginning with "__version__" If neither explicit tags or implicit patterns exist, all text is clobbered. Args: lines (List[str]): lines of an `__init__.py` file. Returns: Tuple[int, int, str]: insert points as starting line, ending line, and any required indentation. Examples: >>> from mkinit.formatting import * # NOQA >>> lines = textwrap.dedent( ''' preserved1 = True if True: # <AUTOGEN_INIT> clobbered2 = True # </AUTOGEN_INIT> preserved3 = True ''').strip('\n').split('\n') >>> start, end, indent = _find_insert_points(lines) >>> print(repr((start, end, indent))) (3, 4, ' ') Examples: >>> from mkinit.formatting import * # NOQA >>> lines = textwrap.dedent( ''' preserved1 = True __version__ = '1.0' clobbered2 = True ''').strip('\n').split('\n') >>> start, end, indent = _find_insert_points(lines) >>> print(repr((start, end, indent))) (2, 3, '') """ startline = 0 endline = len(lines) explicit_flag = False init_indent = "" print = lambda x: None # NOQA # print = logging.debug print('Searching for points to insert autogenerated code') # co-opt the xdoctest parser to break appart lines in the init file # This lets us correctly skip to the end of a multiline expression # A better solution might be to use the line-number aware parser # to search for AUTOGEN_INIT comments and other relevant structures. source_lines = [">>> " + p.rstrip("\n") for p in lines] try: ps1_lines, _ = static._locate_ps1_linenos(source_lines) print('ps1_lines = {!r}'.format(ps1_lines)) except IndexError: assert len(lines) == 0 ps1_lines = [] # Algorithm is similar to the old version, but we skip to the next PS1 # line if we encounter an implicit code pattern. skipto = None def _tryskip(lineno): """ returns the next line to skip to if possible """ implicit_patterns = ( "from __future__", "__version__", "__submodules__", "__external__", "__private__", "__protected__", "#", '"""', "'''", ) for lineno, line in enumerate(lines): # Check explicit modes regardless of skipping if line.strip().startswith("# <AUTOGEN_INIT>"): # allow tags too print('[mkinit] FOUND START TAG ON LINE {}: {}'.format(lineno, line)) init_indent = line[: line.find("#")] explicit_flag = True startline = lineno + 1 if explicit_flag and line.strip().startswith("# </AUTOGEN_INIT>"): print('[mkinit] FOUND END TAG ON LINE {}: {}'.format(lineno, line)) endline = lineno if not explicit_flag: if skipto is not None: if lineno != skipto: continue else: print('SKIPPED TO = {!r}'.format(lineno)) skipto = None if line.strip().startswith(implicit_patterns): print('[mkinit] RESPECTING LINE {}: {}'.format(lineno, line.rstrip('\n'))) startline = lineno + 1 try: # Try and skip to the end of the expression # (if it is a multiline case) idx = ps1_lines.index(lineno) skipto = ps1_lines[idx + 1] startline = skipto print('SKIPTO = {!r}'.format(skipto)) except ValueError: print('NOT ON A PS1 LINE KEEP {}'.format(startline)) except IndexError: print('LAST LINE MOVING TO END {}'.format(startline)) startline = endline else: # Even if we dont respect the lines, try not to end between # PS1 lines. try: # Try and skip to the end of the expression # (if it is a multiline case) idx = ps1_lines.index(lineno) skipto = ps1_lines[idx + 1] except ValueError: ... except IndexError: ... # print('startline = {}'.format(startline)) # print('endline = {}'.format(endline)) assert startline <= endline return startline, endline, init_indent
[docs] def _indent(text, indent=" "): new_text = indent + text.replace("\n", "\n" + indent) # remove whitespace on blank lines new_text = "\n".join([line.rstrip() for line in new_text.split("\n")]) return new_text
[docs] def _initstr( modname, imports, from_imports, explicit=set(), protected=set(), private=set(), options=None, ): r""" Calls the other string makers CommandLine: python -m mkinit.static_autogen _initstr Args: modname (str): the name of the module to generate the init str for imports (List[str]): list of module-level imports from_imports (List[Tuple[str, List[str]]]): List of submodules and their imported attributes options (dict): customize output CommandLine: python -m mkinit.formatting _initstr Example: >>> modname = 'foo' >>> imports = ['.bar', '.baz'] >>> from_imports = [('.bar', ['func1', 'func2'])] >>> initstr = _initstr(modname, imports, from_imports) >>> print(initstr) from foo import bar from foo import baz <BLANKLINE> from foo.bar import (func1, func2,) <BLANKLINE> __all__ = ['bar', 'baz', 'func1', 'func2'] Example: >>> modname = 'foo' >>> imports = ['.bar', '.baz'] >>> from_imports = [('.bar', list(map(chr, range(97, 123))))] >>> initstr = _initstr(modname, imports, from_imports) >>> print(initstr) from foo import bar from foo import baz <BLANKLINE> from foo.bar import (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z,) <BLANKLINE> __all__ = ['a', 'b', 'bar', 'baz', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] Example: >>> import pytest >>> import sys >>> if sys.version_info < (3, 7): >>> pytest.skip('lazy only works on 3.7+') >>> modname = 'foo' >>> imports = ['.bar', '.baz'] >>> from_imports = [('.bar', ['func1', 'func2'])] >>> options = {'lazy_import': 1, 'lazy_boilerplate': None} >>> initstr = _initstr(modname, imports, from_imports, options=options) >>> print(initstr) ... >>> options = {'lazy_import': 1, 'lazy_boilerplate': 'from importlib import lazy_import'} >>> initstr = _initstr(modname, imports, from_imports, options=options) >>> print(initstr.replace('\n\n', '\n')) from importlib import lazy_import __getattr__ = lazy_import( __name__, submodules={ 'bar', 'baz', }, submod_attrs={ 'bar': [ 'func1', 'func2', ], }, ) def __dir__(): return __all__ __all__ = ['bar', 'baz', 'func1', 'func2'] """ options = _ensure_options(options) if options["relative"]: modname = "." explicit_exports = list(explicit) exposed_from_imports = [] parts = [] # if options.get('with_header', False): # parts.append(_make_module_header()) # map each submodule to its import statement submod_to_import = {e.lstrip("."): e for e in imports} submodules = set(submod_to_import.keys()) protected = set(protected) private = set(private) exposed_submodules = set() exposed_all = set() protected_submodules = submodules & protected if options.get("with_mods", True): exposed_submodules.update(submodules) exposed_all.update(submodules) exposed_submodules.update(protected_submodules) exposed_all.update(protected_submodules) from fnmatch import fnmatch # TODO: allow pattern matching here # step1: separate into explicit vs glob-pattern strings private_pats = {p for p in private if "*" in p} private_set = private - private_pats protected_pats = {p for p in protected if "*" in p} protected_set = protected - protected_pats _pp_pats = protected_pats | private_pats _pp_set = private_set | protected_set def _private_matches(x): x = x.lstrip(".") return x in private_set or any(fnmatch(x, pat) for pat in private_pats) def _pp_matches(x): # TODO: standardize how explicit vs submodules are handled x = x.lstrip(".") return x in _pp_set or any(fnmatch(x, pat) for pat in _pp_pats) raw_from_imports = [(m, sub) for m, sub in from_imports if not _pp_matches(m)] if options.get("with_attrs", True): exposed_from_imports = raw_from_imports elif protected: exposed_from_imports = [ (m, set(sub) & protected) for m, sub in raw_from_imports ] exposed_from_imports = [(m, sub) for m, sub in exposed_from_imports if sub] exposed_all.update( {n for m, sub in exposed_from_imports for n in sub if not _private_matches(n)} ) exposed_all.update(explicit) exposed_all = sorted(exposed_all) exposed_submodules = sorted(exposed_submodules) def append_part(new_part): """ appends a new part if it is nonempty """ if new_part: if parts: # separate from previous parts with a newline parts.append("") parts.append(new_part) if options["lazy_loader"]: default_lazy_boilerplate = textwrap.dedent( r""" import lazy_loader """ ).rstrip("\n") template = textwrap.dedent( """ __getattr__, __dir__, __all__ = lazy_loader.attach_stub(__name__, __file__) """ if options["lazy_loader_typed"] else """ __getattr__, __dir__, __all__ = lazy_loader.attach( __name__, submodules={submodules}, submod_attrs={submod_attrs}, ) """ ).rstrip("\n") submod_attrs = {} if exposed_from_imports: for submod, attrs in exposed_from_imports: submod = submod.lstrip(".") submod_attrs[submod] = attrs if explicit_exports: submodules = submodules print("submodules = {!r}".format(submodules)) else: submodules = set() # Currently this is the only use of ubelt, but urepr # is easier to use in testing than pprint, so perhaps # we can remove complexity and just use ubelt elsewhere import ubelt as ub # exposed_submodules = set(exposed_submodules) submodules_repr = ub.urepr(exposed_submodules).replace("\n", "\n ") # hack for python <3.7 tests submodules_repr = submodules_repr.replace('[', '{').replace(']', '}') initstr = template.format( submodules=submodules_repr, submod_attrs=ub.urepr(submod_attrs).replace("\n", "\n "), ) # print("options = {!r}".format(options)) if options["lazy_boilerplate"] is None: append_part(default_lazy_boilerplate) else: # Customize lazy boilerplate append_part(options["lazy_boilerplate"]) append_part(initstr.rstrip()) elif options["lazy_import"]: # NOTE: We are not using f-strings so the code can still be parsed # in older versions of python. # NOTE: We differentiate between submodule and submodule_attrs, as # the keys in submodule_attrs aren't added by default. default_lazy_boilerplate = textwrap.dedent( r""" def lazy_import(module_name, submodules, submod_attrs): import importlib import os name_to_submod = { func: mod for mod, funcs in submod_attrs.items() for func in funcs } def __getattr__(name): if name in submodules: attr = importlib.import_module( '{module_name}.{name}'.format( module_name=module_name, name=name) ) elif name in name_to_submod: submodname = name_to_submod[name] module = importlib.import_module( '{module_name}.{submodname}'.format( module_name=module_name, submodname=submodname) ) attr = getattr(module, name) else: raise AttributeError( 'No {module_name} attribute {name}'.format( module_name=module_name, name=name)) globals()[name] = attr return attr if os.environ.get('EAGER_IMPORT', ''): for name in submodules: __getattr__(name) for attrs in submod_attrs.values(): for attr in attrs: __getattr__(attr) return __getattr__ """ ).rstrip("\n") template = textwrap.dedent( """ __getattr__ = lazy_import( __name__, submodules={submodules}, submod_attrs={submod_attrs}, ) """ ).rstrip("\n") submod_attrs = {} if exposed_from_imports: for submod, attrs in exposed_from_imports: submod = submod.lstrip(".") submod_attrs[submod] = attrs if explicit_exports: submodules = submodules print("submodules = {!r}".format(submodules)) else: submodules = set() # Currently this is the only use of ubelt, but urepr # is easier to use in testing than pprint, so perhaps # we can remove complexity and just use ubelt elsewhere import ubelt as ub # exposed_submodules = set(exposed_submodules) submodules_repr = ub.urepr(exposed_submodules).replace("\n", "\n ") # hack for python <3.7 tests submodules_repr = submodules_repr.replace('[', '{').replace(']', '}') initstr = template.format( submodules=submodules_repr, submod_attrs=ub.urepr(submod_attrs).replace("\n", "\n "), ) if options["lazy_boilerplate"] is None: append_part(default_lazy_boilerplate) else: # Customize lazy boilerplate append_part(options["lazy_boilerplate"]) append_part(initstr.rstrip()) else: if exposed_submodules: exposed_imports = [submod_to_import[k] for k in exposed_submodules] append_part(_make_imports_str(exposed_imports, modname)) if exposed_from_imports: attr_part = _make_fromimport_str(exposed_from_imports, modname) append_part(attr_part) if options.get("with_all", True): if options["lazy_import"]: append_part( textwrap.dedent( """ def __dir__(): return __all__ """ ).rstrip() ) exports_repr = ["'{}'".format(e) for e in sorted(exposed_all)] rhs_body = ", ".join(exports_repr) packed = _packed_rhs_text("__all__ = [", rhs_body + "]") append_part(packed) initstr = "\n".join([p for p in parts]) if options["use_black"]: try: import black initstr = black.format_str( initstr, mode=black.Mode(string_normalization=True) ) except ImportError: pass return initstr
[docs] def _make_imports_str(imports, rootmodname="."): if False: imports_fmtstr = "from {rootmodname} import %s".format(rootmodname=rootmodname) return "\n".join([imports_fmtstr % (name,) for name in imports]) else: imports_fmtstr = "from {rootmodname} import %s".format(rootmodname=rootmodname) return "\n".join( [ imports_fmtstr % (name.lstrip(".")) if name.startswith(".") else "import %s" % (name,) for name in imports ] )
[docs] def _packed_rhs_text(lhs_text, rhs_text): """ packs rhs text to have indentation that agrees with lhs text Example: >>> normname = 'this.is.a.module' >>> fromlist = ['func{}'.format(d) for d in range(10)] >>> indent = '' >>> lhs_text = indent + 'from {normname} import ('.format( >>> normname=normname) >>> rhs_text = ', '.join(fromlist) + ',)' >>> packstr = _packed_rhs_text(lhs_text, rhs_text) >>> print(packstr) >>> normname = 'this.is.a.very.long.modnamethatwilkeepgoingandgoing' >>> fromlist = ['func{}'.format(d) for d in range(10)] >>> indent = '' >>> lhs_text = indent + 'from {normname} import ('.format( >>> normname=normname) >>> rhs_text = ', '.join(fromlist) + ',)' >>> packstr = _packed_rhs_text(lhs_text, rhs_text) >>> print(packstr) >>> normname = 'this.is.a.very.long.modnamethatwilkeepgoingandgoingandgoingandgoingandgoingandgoing' >>> fromlist = ['func{}'.format(d) for d in range(10)] >>> indent = '' >>> lhs_text = indent + 'from {normname} import ('.format( >>> normname=normname) >>> rhs_text = ', '.join(fromlist) + ',)' >>> packstr = _packed_rhs_text(lhs_text, rhs_text) >>> print(packstr) """ # FIXME: the parens get broken up wrong # filler = '-' * (len(lhs_text) - 1) + ' ' # fill_text = filler + rhs_text if 0: # options['use_black']: import black raw_text = lhs_text + rhs_text packstr = black.format_str( raw_text, mode=black.Mode(string_normalization=False) ) return packstr else: import re # not sure why this isn't 76? >= maybe? max_width = 79 # This is a hacky heuristic that could perhaps be more robust? if len(lhs_text) > max_width * 0.7: newline_prefix = " " * 4 else: newline_prefix = " " * len(lhs_text) raw_text = lhs_text + rhs_text wrapped_lines = textwrap.wrap( raw_text, break_long_words=False, width=79, initial_indent="", subsequent_indent=newline_prefix, ) packstr = "\n".join(wrapped_lines) FIX_FORMAT = 1 if FIX_FORMAT: regex = r"\s*".join(list(map(re.escape, lhs_text.split(" ")))) assert re.match(regex, lhs_text) match = re.search(regex, packstr) span = match.span() assert span[0] == 0 wrapped_lhs = match.string[: span[1]] # If textwrap broke the LHS then do something slightly different if "\n" in wrapped_lhs: new_rhs = packstr[span[1] :] new_packstr = lhs_text + "\n" + newline_prefix + new_rhs packstr = new_packstr return packstr
[docs] def _make_fromimport_str(from_imports, rootmodname=".", indent=""): """ Args: from_imports (list): each item is a tuple with module and a list of imported with_attrs. rootmodname (str): name of root module indent (str): initial indentation Example: >>> from_imports = [ ... ('.foo', list(map(chr, range(97, 123)))), ... ('.bar', []), ... ('.a_longer_package', list(map(chr, range(65, 91)))), ... ] >>> from_str = _make_fromimport_str(from_imports, indent=' ' * 8) >>> print(from_str) from .foo import (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z,) from .a_longer_package import (A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z,) """ if rootmodname == ".": # nocover # dot is already taken care of in fmtstr rootmodname = "" def _pack_fromimport(tup): name, fromlist = tup[0], tup[1] if name.startswith("."): normname = rootmodname + name else: normname = name if len(fromlist) > 0: lhs_text = indent + "from {normname} import (".format(normname=normname) rhs_text = ", ".join(fromlist) + ",)" packstr = _packed_rhs_text(lhs_text, rhs_text) else: packstr = "" return packstr parts = [_pack_fromimport(t) for t in from_imports] from_str = "\n".join([p for p in parts if p]) # Return unindented version for now from_str = textwrap.dedent(from_str) return from_str