From 3c6e0e401523673c1531951a0d88e90e8770cef9 Mon Sep 17 00:00:00 2001 From: Anthony Johnson Date: Fri, 7 Apr 2017 11:46:57 -0700 Subject: [PATCH] Start altering the pydocstyle AST parser for our needs This forks some of the pydocstyle AST parser into out mapper. Eventually, some of the other operations such as parsing arguments and performing full name lookup can be moved in as well. For now, this is not doing any extra assignment tracking/etc, several of these operations will just throw this information out. Refs #99 --- .travis.yml | 19 +++-- autoapi/mappers/python.py | 147 ++++++++++++++++++++++++++++++++++-- tests/test_python_parser.py | 68 +++++++++++++++++ tox.ini | 8 +- 4 files changed, 226 insertions(+), 16 deletions(-) create mode 100644 tests/test_python_parser.py diff --git a/.travis.yml b/.travis.yml index dd3055d..660e047 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,20 @@ language: python python: - - 2.7 + - 2.7 + - 3.4 + - 3.5 + - 3.6 sudo: false -env: - - TOX_ENV=py27 - - TOX_ENV=docs - - TOX_ENV=lint +matrix: + include: + - python: 2.7 + script: tox -e docs + - python: 2.7 + script: tox -e lint install: - - pip install tox + - pip install tox-travis script: - - tox -e $TOX_ENV + - tox notifications: slack: rooms: diff --git a/autoapi/mappers/python.py b/autoapi/mappers/python.py index cf5639f..1e1aa60 100644 --- a/autoapi/mappers/python.py +++ b/autoapi/mappers/python.py @@ -2,8 +2,10 @@ import sys import os import textwrap import ast +import tokenize as tk from collections import defaultdict -from pydocstyle.parser import Parser + +from pydocstyle import parser from .base import PythonMapperBase, SphinxMapperBase from ..utils import slugify @@ -42,7 +44,7 @@ class PythonSphinxMapper(SphinxMapperBase): :param path: Path of file to read """ try: - parsed_data = Parser()(open(path), path) + parsed_data = ParserExtra()(open(path), path) return parsed_data except (IOError, TypeError, ImportError): self.app.warn('Error reading file: {0}'.format(path)) @@ -86,9 +88,7 @@ class PythonPythonMapper(PythonMapperBase): self.args = [] if self.is_callable: self.args = self._get_arguments(obj) - self.docstring = obj.docstring or '' - self.docstring = textwrap.dedent(self.docstring) - self.docstring = self.docstring.replace("'''", '').replace('"""', '') + self.docstring = obj.docstring if getattr(obj, 'parent'): self.inheritance = [obj.parent.name] else: @@ -99,15 +99,18 @@ class PythonPythonMapper(PythonMapperBase): @property def is_undoc_member(self): - return self.docstring == '' + return bool(self.docstring) @property def is_private_member(self): - return self.short_name[0] == '_' + return not self.obj.is_public @property def is_special_member(self): - return self.short_name[0:2] == '__' + return ( + (isinstance(self.obj, parser.Method) and self.obj.is_magic) or + (self.obj.name.startswith('__') and self.obj.name.endswith('__')) + ) @property def display(self): @@ -260,3 +263,131 @@ class PythonPackage(PythonPythonMapper): class PythonClass(PythonPythonMapper): type = 'class' + + +# Parser +class ParserExtra(parser.Parser): + + """Extend Parser object to provide customized return""" + + def parse_object_identifier(self): + """Parse object identifier""" + assert self.current.kind == tk.NAME + identifier = '' + while True: + is_identifier = ( + self.current.kind == tk.NAME or + ( + self.current.kind == tk.OP and + self.current.value == '.' + ) + ) + if is_identifier: + identifier += self.current.value + self.stream.move() + else: + break + return identifier + + def parse_string(self): + """Clean up STRING nodes""" + val = self.current.value + self.consume(tk.STRING) + return val.lstrip('\'"').rstrip('\'"') + + def parse_number(self): + """Parse a NUMBER node to either a ``float`` or ``int``""" + val = self.current.value + self.consume(tk.NUMBER) + normalized_val = float(val) + try: + normalized_val = int(val) + except ValueError: + pass + return normalized_val + + def parse_iterable(self): + """Recursively parse an iterable object + + This will return a local representation of the parsed data, except for + NAME nodes. This does not currently attempt to perform lookup on the + object names defined in an iterable. + + This is mostly a naive implementation and won't handle complex + structures. This is only currently meant to parse simple iterables, such + as ``__all__`` and class parent classes on class definition. + """ + content = None + while self.current is not None: + if self.current.kind == tk.OP and self.current.value in '[(': + self.stream.move() + if content is None: + content = [] + else: + content.append(self.parse_iterable()) + continue + elif self.current.kind == tk.OP and self.current.value in '])': + self.stream.move() + return content + elif self.current.kind == tk.STRING: + content.append(self.parse_string()) + elif self.current.kind == tk.NUMBER: + content.append(self.parse_number()) + elif self.current.kind == tk.NAME: + # TODO this is dropped for now, but will can be handled with an + # object lookup in the future, if we decide to track assignment. + # content.append(self.parse_object_identifier()) + pass + else: + self.stream.move() + + def parse_docstring(self): + """Clean up object docstring""" + docstring = super(ParserExtra, self).parse_docstring() + if not docstring: + docstring = '' + docstring = textwrap.dedent(docstring) + docstring = docstring.replace("'''", '').replace('"""', '') + return docstring + + def parse_all(self): + """Parse __all__ assignment + + This differs from the default __all__ assignment processing by: + + * Accepting multiple __all__ assignments + * Doesn't throw exceptions on edge cases + * Parses NAME nodes (but throws them out for now + """ + assert self.current.value == '__all__' + self.consume(tk.NAME) + if self.current.kind != tk.OP or self.current.value not in ['=', '+=']: + return + assign_op = self.current.value + self.consume(tk.OP) + + if self.all is None: + self.all = [] + + all_content = [] + # Support [], [] + [], and [] + foo.__all__ by iterating of list + # assignments + while True: + if self.current.kind == tk.OP and self.current.value in '([': + content = self.parse_iterable() + all_content.extend(content) + elif self.current.kind == tk.NAME: + name = self.parse_object_identifier() + # TODO Skip these for now. In the future, this name should be + # converted to an object that will be resolved after we've + # parsed at a later stage in the mapping process. + #all_content.append(name) + if self.current.kind == tk.OP and self.current.value == '+': + self.stream.move() + else: + break + + if assign_op == '=': + self.all = all_content + elif assign_op == '+=': + self.all += all_content diff --git a/tests/test_python_parser.py b/tests/test_python_parser.py new file mode 100644 index 0000000..c0f5705 --- /dev/null +++ b/tests/test_python_parser.py @@ -0,0 +1,68 @@ +# coding=utf8 + +"""Test Python parser""" + +import sys +import unittest +from textwrap import dedent + +from autoapi.mappers.python import ParserExtra + +if sys.version_info < (3, 0): + from StringIO import StringIO +else: + from io import StringIO + + +class PythonParserTests(unittest.TestCase): + + def parse(self, source): + in_h = StringIO(dedent(source)) + return ParserExtra()(in_h, '/dev/null') + + def test_parses_basic_file(self): + source = """ + def foo(bar): + pass + """ + self.assertIsNone(self.parse(source).all) + + def test_parses_all(self): + source = """ + __all__ = ['Foo', 5.0] + """ + self.assertEqual(self.parse(source).all, ['Foo', 5.0]) + + def test_parses_all_with_list_addition(self): + source = """ + __all__ = ['Foo'] + [] + """ + self.assertEqual(self.parse(source).all, ['Foo']) + + def test_parses_all_with_name_addtion(self): + source = """ + __all__ = ['Foo'] + bar.__all__ + """ + self.assertEqual(self.parse(source).all, ['Foo']) + + def test_parses_all_with_multiple_name_addtions(self): + source = """ + __all__ = foo + bar + __all__ += boop + __all__ += ['foo'] + """ + self.assertEqual(self.parse(source).all, ['foo']) + source = """ + __all__ = ['foo'] + __all__ = foo + """ + self.assertEqual(self.parse(source).all, []) + + def test_parses_all_multiline(self): + source = """ + __all__ = [ + 'foo', + 'bar', + ] + """ + self.assertEqual(self.parse(source).all, ['foo', 'bar']) diff --git a/tox.ini b/tox.ini index afbd558..7e45488 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,8 @@ [tox] -envlist = py27,py35,lint,docs +envlist = + py{27,34,35}-sphinx{13,14,15} + py27-lint + py27-docs [testenv] setenv = @@ -7,11 +10,14 @@ setenv = deps = -r{toxinidir}/requirements.txt pytest mock + sphinx14: Sphinx<1.5 + sphinx15: Sphinx<1.6 commands = py.test {posargs} [testenv:docs] deps = + Sphinx==1.5 sphinx_rtd_theme {[testenv]deps} changedir = {toxinidir}/docs