Source code for mkinit.static_analysis

"""
A paired down version of static_anslysis from xdoctest
"""
import os
import sys
import ast
from collections import OrderedDict
from mkinit.util import util_import
from os.path import join, exists, splitext, isfile


IS_PY_GE_308 = sys.version_info[0] >= 3 and sys.version_info[1] >= 8
IS_PY_GE_312 = sys.version_info[0] >= 3 and sys.version_info[1] >= 12

if IS_PY_GE_312:
    from mkinit import _tokenize as tokenize
else:
    import tokenize


[docs] def _parse_static_node_value(node): """ Extract a constant value from a node if possible """ # TODO: ast.Constant for 3.8 if (isinstance(node, ast.Constant) and isinstance(node.value, (int, float)) if IS_PY_GE_308 else isinstance(node, ast.Num)): value = node.value if IS_PY_GE_308 else node.n elif (isinstance(node, ast.Constant) and isinstance(node.value, str) if IS_PY_GE_308 else isinstance(node, ast.Str)): value = node.value if IS_PY_GE_308 else node.s elif isinstance(node, ast.List): value = list(map(_parse_static_node_value, node.elts)) elif isinstance(node, ast.Tuple): value = tuple(map(_parse_static_node_value, node.elts)) elif isinstance(node, (ast.Dict)): keys = map(_parse_static_node_value, node.keys) values = map(_parse_static_node_value, node.values) value = OrderedDict(zip(keys, values)) # value = dict(zip(keys, values)) elif isinstance(node, (ast.NameConstant)): value = node.value else: print(node.__dict__) raise TypeError('Cannot parse a static value from non-static node ' 'of type: {!r}'.format(type(node))) return value
[docs] def parse_static_value(key, source=None, fpath=None): """ Statically parse a constant variable's value from python code. TODO: This does not belong here. Move this to an external static analysis library. Args: key (str): name of the variable source (str | None): python text fpath (str | None): filepath to read if source is not specified Example: >>> key = 'foo' >>> source = 'foo = 123' >>> assert parse_static_value(key, source=source) == 123 >>> source = 'foo = "123"' >>> assert parse_static_value(key, source=source) == '123' >>> source = 'foo = [1, 2, 3]' >>> assert parse_static_value(key, source=source) == [1, 2, 3] >>> source = 'foo = (1, 2, "3")' >>> assert parse_static_value(key, source=source) == (1, 2, "3") >>> source = 'foo = {1: 2, 3: 4}' >>> assert parse_static_value(key, source=source) == {1: 2, 3: 4} >>> #parse_static_value('bar', source=source) >>> #parse_static_value('bar', source='foo=1; bar = [1, foo]') """ if source is None: # pragma: no branch with open(fpath, "rb") as file_: source = file_.read().decode("utf-8") pt = ast.parse(source) class AssignentVisitor(ast.NodeVisitor): def visit_Assign(self, node): for target in node.targets: target_id = getattr(target, "id", None) if target_id == key: try: self.value = _parse_static_node_value(node.value) except TypeError as ex: import warnings warnings.warn(repr(ex)) sentinal = object() visitor = AssignentVisitor() visitor.value = sentinal visitor.visit(pt) if visitor.value is sentinal: raise NameError("No static variable named {!r}".format(key)) return visitor.value
[docs] def package_modpaths( pkgpath, with_pkg=False, with_mod=True, followlinks=True, recursive=True, with_libs=False, check=True, ): r""" Finds sub-packages and sub-modules belonging to a package. Args: pkgpath (str): path to a module or package with_pkg (bool): if True includes package __init__ files (default = False) with_mod (bool): if True includes module files (default = True) exclude (list): ignores any module that matches any of these patterns recursive (bool): if False, then only child modules are included with_libs (bool): if True then compiled shared libs will be returned as well check (bool): if False, then then pkgpath is considered a module even if it does not contain an __init__ file. Yields: str: module names belonging to the package References: http://stackoverflow.com/questions/1707709/list-modules-in-py-package Example: >>> from mkinit.static_analysis import * >>> pkgpath = util_import.modname_to_modpath('mkinit') >>> paths = list(package_modpaths(pkgpath)) >>> print('\n'.join(paths)) >>> names = list(map(util_import.modpath_to_modname, paths)) >>> assert 'mkinit.static_mkinit' in names >>> assert 'mkinit.__main__' in names >>> assert 'mkinit' not in names >>> print('\n'.join(names)) """ if isfile(pkgpath): # If input is a file, just return it yield pkgpath else: if with_pkg: root_path = join(pkgpath, "__init__.py") if not check or exists(root_path): yield root_path valid_exts = [".py"] if with_libs: valid_exts += util_import._platform_pylib_exts() for dpath, dnames, fnames in os.walk(pkgpath, followlinks=followlinks): ispkg = exists(join(dpath, "__init__.py")) if ispkg or not check: check = True # always check subdirs if with_mod: for fname in fnames: if splitext(fname)[1] in valid_exts: # dont yield inits. Handled in pkg loop. if fname != "__init__.py": path = join(dpath, fname) yield path if with_pkg: for dname in dnames: path = join(dpath, dname, "__init__.py") if exists(path): yield path else: # Stop recursing when we are out of the package del dnames[:] if not recursive: break
[docs] def is_balanced_statement(lines): """ Checks if the lines have balanced parens, brakets, curlies and strings Args: lines (list): list of strings Returns: bool: False if the statement is not balanced Doctest: >>> assert is_balanced_statement(['print(foobar)']) >>> assert is_balanced_statement(['foo = bar']) is True >>> assert is_balanced_statement(['foo = (']) is False >>> assert is_balanced_statement(['foo = (', "')(')"]) is True >>> assert is_balanced_statement( ... ['foo = (', "'''", ")]'''", ')']) is True >>> #assert is_balanced_statement(['foo = ']) is False >>> #assert is_balanced_statement(['== ']) is False """ from io import StringIO block = "\n".join(lines) stream = StringIO() stream.write(block) stream.seek(0) try: for t in tokenize.generate_tokens(stream.readline): pass except tokenize.TokenError as ex: message = ex.args[0] if message.startswith("EOF in multi-line"): return False raise else: # Note: trying to use ast.parse(block) will not work # here because it breaks in try, except, else return True
[docs] def _locate_ps1_linenos(source_lines): """ Determines which lines in the source begin a "logical block" of code. Note: implementation taken from xdoctest.parser Args: source_lines (list): lines belonging only to the doctest src these will be unindented, prefixed, and without any want. Returns: (list, bool): a list of indices indicating which lines are considered "PS1" and a flag indicating if the final line should be considered for a got/want assertion. Example: >>> source_lines = ['>>> def foo():', '>>> return 0', '>>> 3'] >>> linenos, eval_final = _locate_ps1_linenos(source_lines) >>> assert linenos == [0, 2] >>> assert eval_final is True Example: >>> source_lines = ['>>> x = [1, 2, ', '>>> 3, 4]', '>>> print(len(x))'] >>> linenos, eval_final = _locate_ps1_linenos(source_lines) >>> assert linenos == [0, 2] >>> assert eval_final is True """ # print('source_lines = {!r}'.format(source_lines)) # Strip indentation (and PS1 / PS2 from source) exec_source_lines = [p[4:] for p in source_lines] # Hack to make comments appear like executable statements # note, this hack never leaves this function because we only are # returning line numbers. exec_source_lines = [ "_._ = None" if p.startswith("#") else p for p in exec_source_lines ] source_block = "\n".join(exec_source_lines) try: pt = ast.parse(source_block, filename="<source_block>") except SyntaxError as syn_ex: # Assign missing information to the syntax error. if syn_ex.text is None: if syn_ex.lineno is not None: # Grab the line where the error occurs # (why is this not populated in SyntaxError by default?) # (because filename does not point to a valid loc) line = source_block.split("\n")[syn_ex.lineno - 1] syn_ex.text = line + "\n" raise syn_ex statement_nodes = pt.body ps1_linenos = [node.lineno - 1 for node in statement_nodes] NEED_16806_WORKAROUND = True if NEED_16806_WORKAROUND: # pragma: nobranch ps1_linenos = _workaround_16806(ps1_linenos, exec_source_lines) # Respect any line explicitly defined as PS2 (via its prefix) ps2_linenos = {x for x, p in enumerate(source_lines) if p[:4] != ">>> "} ps1_linenos = sorted(ps1_linenos.difference(ps2_linenos)) if len(statement_nodes) == 0: eval_final = False else: # Is the last statement evaluatable? if sys.version_info.major == 2: # nocover eval_final = isinstance(statement_nodes[-1], (ast.Expr, ast.Print)) else: # This should just be an Expr in python3 # (todo: ensure this is true) eval_final = isinstance(statement_nodes[-1], ast.Expr) return ps1_linenos, eval_final
[docs] def _workaround_16806(ps1_linenos, exec_source_lines): """ workaround for python issue 16806 (https://bugs.python.org/issue16806) Issue causes lineno for multiline strings to give the line they end on, not the line they start on. A patch for this issue exists `https://github.com/python/cpython/pull/1800` Note: Starting from the end look at consecutive pairs of indices to inspect the statment it corresponds to. (the first statment goes from ps1_linenos[-1] to the end of the line list. Implementation taken from xdoctest.parser """ new_ps1_lines = [] b = len(exec_source_lines) for a in ps1_linenos[::-1]: # the position of `b` is correct, but `a` may be wrong # is_balanced_statement will be False iff `a` is wrong. while not is_balanced_statement(exec_source_lines[a:b]): # shift `a` down until it becomes correct a -= 1 # push the new correct value back into the list new_ps1_lines.append(a) # set the end position of the next string to be `a` , # note, because this `a` is correct, the next `b` is # must also be correct. b = a ps1_linenos = set(new_ps1_lines) return ps1_linenos