"""
A paired down version of static_anslysis from xdoctest
"""
import os
import sys
import ast
from collections import OrderedDict
from mkinit.util import util_import
from os.path import join, exists, splitext, isfile
IS_PY_GE_308 = sys.version_info[0] >= 3 and sys.version_info[1] >= 8
IS_PY_GE_312 = sys.version_info[0] >= 3 and sys.version_info[1] >= 12
if IS_PY_GE_312:
from mkinit import _tokenize as tokenize
else:
import tokenize
[docs]
def _parse_static_node_value(node):
"""
Extract a constant value from a node if possible
"""
# TODO: ast.Constant for 3.8
if (isinstance(node, ast.Constant) and isinstance(node.value, (int, float)) if IS_PY_GE_308 else isinstance(node, ast.Num)):
value = node.value if IS_PY_GE_308 else node.n
elif (isinstance(node, ast.Constant) and isinstance(node.value, str) if IS_PY_GE_308 else isinstance(node, ast.Str)):
value = node.value if IS_PY_GE_308 else node.s
elif isinstance(node, ast.List):
value = list(map(_parse_static_node_value, node.elts))
elif isinstance(node, ast.Tuple):
value = tuple(map(_parse_static_node_value, node.elts))
elif isinstance(node, (ast.Dict)):
keys = map(_parse_static_node_value, node.keys)
values = map(_parse_static_node_value, node.values)
value = OrderedDict(zip(keys, values))
# value = dict(zip(keys, values))
elif isinstance(node, (ast.NameConstant)):
value = node.value
else:
print(node.__dict__)
raise TypeError('Cannot parse a static value from non-static node '
'of type: {!r}'.format(type(node)))
return value
[docs]
def parse_static_value(key, source=None, fpath=None):
"""
Statically parse a constant variable's value from python code.
TODO: This does not belong here. Move this to an external static analysis
library.
Args:
key (str): name of the variable
source (str | None): python text
fpath (str | None): filepath to read if source is not specified
Example:
>>> key = 'foo'
>>> source = 'foo = 123'
>>> assert parse_static_value(key, source=source) == 123
>>> source = 'foo = "123"'
>>> assert parse_static_value(key, source=source) == '123'
>>> source = 'foo = [1, 2, 3]'
>>> assert parse_static_value(key, source=source) == [1, 2, 3]
>>> source = 'foo = (1, 2, "3")'
>>> assert parse_static_value(key, source=source) == (1, 2, "3")
>>> source = 'foo = {1: 2, 3: 4}'
>>> assert parse_static_value(key, source=source) == {1: 2, 3: 4}
>>> #parse_static_value('bar', source=source)
>>> #parse_static_value('bar', source='foo=1; bar = [1, foo]')
"""
if source is None: # pragma: no branch
with open(fpath, "rb") as file_:
source = file_.read().decode("utf-8")
pt = ast.parse(source)
class AssignentVisitor(ast.NodeVisitor):
def visit_Assign(self, node):
for target in node.targets:
target_id = getattr(target, "id", None)
if target_id == key:
try:
self.value = _parse_static_node_value(node.value)
except TypeError as ex:
import warnings
warnings.warn(repr(ex))
sentinal = object()
visitor = AssignentVisitor()
visitor.value = sentinal
visitor.visit(pt)
if visitor.value is sentinal:
raise NameError("No static variable named {!r}".format(key))
return visitor.value
[docs]
def package_modpaths(
pkgpath,
with_pkg=False,
with_mod=True,
followlinks=True,
recursive=True,
with_libs=False,
check=True,
):
r"""
Finds sub-packages and sub-modules belonging to a package.
Args:
pkgpath (str): path to a module or package
with_pkg (bool): if True includes package __init__ files (default =
False)
with_mod (bool): if True includes module files (default = True)
exclude (list): ignores any module that matches any of these patterns
recursive (bool): if False, then only child modules are included
with_libs (bool): if True then compiled shared libs will be returned as well
check (bool): if False, then then pkgpath is considered a module even
if it does not contain an __init__ file.
Yields:
str: module names belonging to the package
References:
http://stackoverflow.com/questions/1707709/list-modules-in-py-package
Example:
>>> from mkinit.static_analysis import *
>>> pkgpath = util_import.modname_to_modpath('mkinit')
>>> paths = list(package_modpaths(pkgpath))
>>> print('\n'.join(paths))
>>> names = list(map(util_import.modpath_to_modname, paths))
>>> assert 'mkinit.static_mkinit' in names
>>> assert 'mkinit.__main__' in names
>>> assert 'mkinit' not in names
>>> print('\n'.join(names))
"""
if isfile(pkgpath):
# If input is a file, just return it
yield pkgpath
else:
if with_pkg:
root_path = join(pkgpath, "__init__.py")
if not check or exists(root_path):
yield root_path
valid_exts = [".py"]
if with_libs:
valid_exts += util_import._platform_pylib_exts()
for dpath, dnames, fnames in os.walk(pkgpath, followlinks=followlinks):
ispkg = exists(join(dpath, "__init__.py"))
if ispkg or not check:
check = True # always check subdirs
if with_mod:
for fname in fnames:
if splitext(fname)[1] in valid_exts:
# dont yield inits. Handled in pkg loop.
if fname != "__init__.py":
path = join(dpath, fname)
yield path
if with_pkg:
for dname in dnames:
path = join(dpath, dname, "__init__.py")
if exists(path):
yield path
else:
# Stop recursing when we are out of the package
del dnames[:]
if not recursive:
break
[docs]
def is_balanced_statement(lines):
"""
Checks if the lines have balanced parens, brakets, curlies and strings
Args:
lines (list): list of strings
Returns:
bool: False if the statement is not balanced
Doctest:
>>> assert is_balanced_statement(['print(foobar)'])
>>> assert is_balanced_statement(['foo = bar']) is True
>>> assert is_balanced_statement(['foo = (']) is False
>>> assert is_balanced_statement(['foo = (', "')(')"]) is True
>>> assert is_balanced_statement(
... ['foo = (', "'''", ")]'''", ')']) is True
>>> #assert is_balanced_statement(['foo = ']) is False
>>> #assert is_balanced_statement(['== ']) is False
"""
from io import StringIO
block = "\n".join(lines)
stream = StringIO()
stream.write(block)
stream.seek(0)
try:
for t in tokenize.generate_tokens(stream.readline):
pass
except tokenize.TokenError as ex:
message = ex.args[0]
if message.startswith("EOF in multi-line"):
return False
raise
else:
# Note: trying to use ast.parse(block) will not work
# here because it breaks in try, except, else
return True
[docs]
def _locate_ps1_linenos(source_lines):
"""
Determines which lines in the source begin a "logical block" of code.
Note:
implementation taken from xdoctest.parser
Args:
source_lines (list): lines belonging only to the doctest src
these will be unindented, prefixed, and without any want.
Returns:
(list, bool): a list of indices indicating which lines
are considered "PS1" and a flag indicating if the final line
should be considered for a got/want assertion.
Example:
>>> source_lines = ['>>> def foo():', '>>> return 0', '>>> 3']
>>> linenos, eval_final = _locate_ps1_linenos(source_lines)
>>> assert linenos == [0, 2]
>>> assert eval_final is True
Example:
>>> source_lines = ['>>> x = [1, 2, ', '>>> 3, 4]', '>>> print(len(x))']
>>> linenos, eval_final = _locate_ps1_linenos(source_lines)
>>> assert linenos == [0, 2]
>>> assert eval_final is True
"""
# print('source_lines = {!r}'.format(source_lines))
# Strip indentation (and PS1 / PS2 from source)
exec_source_lines = [p[4:] for p in source_lines]
# Hack to make comments appear like executable statements
# note, this hack never leaves this function because we only are
# returning line numbers.
exec_source_lines = [
"_._ = None" if p.startswith("#") else p for p in exec_source_lines
]
source_block = "\n".join(exec_source_lines)
try:
pt = ast.parse(source_block, filename="<source_block>")
except SyntaxError as syn_ex:
# Assign missing information to the syntax error.
if syn_ex.text is None:
if syn_ex.lineno is not None:
# Grab the line where the error occurs
# (why is this not populated in SyntaxError by default?)
# (because filename does not point to a valid loc)
line = source_block.split("\n")[syn_ex.lineno - 1]
syn_ex.text = line + "\n"
raise syn_ex
statement_nodes = pt.body
ps1_linenos = [node.lineno - 1 for node in statement_nodes]
NEED_16806_WORKAROUND = True
if NEED_16806_WORKAROUND: # pragma: nobranch
ps1_linenos = _workaround_16806(ps1_linenos, exec_source_lines)
# Respect any line explicitly defined as PS2 (via its prefix)
ps2_linenos = {x for x, p in enumerate(source_lines) if p[:4] != ">>> "}
ps1_linenos = sorted(ps1_linenos.difference(ps2_linenos))
if len(statement_nodes) == 0:
eval_final = False
else:
# Is the last statement evaluatable?
if sys.version_info.major == 2: # nocover
eval_final = isinstance(statement_nodes[-1], (ast.Expr, ast.Print))
else:
# This should just be an Expr in python3
# (todo: ensure this is true)
eval_final = isinstance(statement_nodes[-1], ast.Expr)
return ps1_linenos, eval_final
[docs]
def _workaround_16806(ps1_linenos, exec_source_lines):
"""
workaround for python issue 16806 (https://bugs.python.org/issue16806)
Issue causes lineno for multiline strings to give the line they end on,
not the line they start on. A patch for this issue exists
`https://github.com/python/cpython/pull/1800`
Note:
Starting from the end look at consecutive pairs of indices to
inspect the statment it corresponds to. (the first statment goes
from ps1_linenos[-1] to the end of the line list.
Implementation taken from xdoctest.parser
"""
new_ps1_lines = []
b = len(exec_source_lines)
for a in ps1_linenos[::-1]:
# the position of `b` is correct, but `a` may be wrong
# is_balanced_statement will be False iff `a` is wrong.
while not is_balanced_statement(exec_source_lines[a:b]):
# shift `a` down until it becomes correct
a -= 1
# push the new correct value back into the list
new_ps1_lines.append(a)
# set the end position of the next string to be `a` ,
# note, because this `a` is correct, the next `b` is
# must also be correct.
b = a
ps1_linenos = set(new_ps1_lines)
return ps1_linenos