From 3c6e0e401523673c1531951a0d88e90e8770cef9 Mon Sep 17 00:00:00 2001
From: Anthony Johnson <aj@ohess.org>
Date: Fri, 7 Apr 2017 11:46:57 -0700
Subject: [PATCH] Start altering the pydocstyle AST parser for our needs

This forks some of the pydocstyle AST parser into out mapper. Eventually, some
of the other operations such as parsing arguments and performing full name
lookup can be moved in as well. For now, this is not doing any extra assignment
tracking/etc, several of these operations will just throw this information out.

Refs #99
---
 .travis.yml                 |  19 +++--
 autoapi/mappers/python.py   | 147 ++++++++++++++++++++++++++++++++++--
 tests/test_python_parser.py |  68 +++++++++++++++++
 tox.ini                     |   8 +-
 4 files changed, 226 insertions(+), 16 deletions(-)
 create mode 100644 tests/test_python_parser.py

diff --git a/.travis.yml b/.travis.yml
index dd3055d..660e047 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,15 +1,20 @@
 language: python
 python:
- - 2.7
+  - 2.7
+  - 3.4
+  - 3.5
+  - 3.6
 sudo: false
-env:
- - TOX_ENV=py27
- - TOX_ENV=docs
- - TOX_ENV=lint
+matrix:
+  include:
+    - python: 2.7
+      script: tox -e docs
+    - python: 2.7
+      script: tox -e lint
 install:
- - pip install tox
+  - pip install tox-travis
 script:
- - tox -e $TOX_ENV
+  - tox
 notifications:
   slack:
     rooms:
diff --git a/autoapi/mappers/python.py b/autoapi/mappers/python.py
index cf5639f..1e1aa60 100644
--- a/autoapi/mappers/python.py
+++ b/autoapi/mappers/python.py
@@ -2,8 +2,10 @@ import sys
 import os
 import textwrap
 import ast
+import tokenize as tk
 from collections import defaultdict
-from pydocstyle.parser import Parser
+
+from pydocstyle import parser
 
 from .base import PythonMapperBase, SphinxMapperBase
 from ..utils import slugify
@@ -42,7 +44,7 @@ class PythonSphinxMapper(SphinxMapperBase):
         :param path: Path of file to read
         """
         try:
-            parsed_data = Parser()(open(path), path)
+            parsed_data = ParserExtra()(open(path), path)
             return parsed_data
         except (IOError, TypeError, ImportError):
             self.app.warn('Error reading file: {0}'.format(path))
@@ -86,9 +88,7 @@ class PythonPythonMapper(PythonMapperBase):
         self.args = []
         if self.is_callable:
             self.args = self._get_arguments(obj)
-        self.docstring = obj.docstring or ''
-        self.docstring = textwrap.dedent(self.docstring)
-        self.docstring = self.docstring.replace("'''", '').replace('"""', '')
+        self.docstring = obj.docstring
         if getattr(obj, 'parent'):
             self.inheritance = [obj.parent.name]
         else:
@@ -99,15 +99,18 @@ class PythonPythonMapper(PythonMapperBase):
 
     @property
     def is_undoc_member(self):
-        return self.docstring == ''
+        return bool(self.docstring)
 
     @property
     def is_private_member(self):
-        return self.short_name[0] == '_'
+        return not self.obj.is_public
 
     @property
     def is_special_member(self):
-        return self.short_name[0:2] == '__'
+        return (
+            (isinstance(self.obj, parser.Method) and self.obj.is_magic) or
+            (self.obj.name.startswith('__') and self.obj.name.endswith('__'))
+        )
 
     @property
     def display(self):
@@ -260,3 +263,131 @@ class PythonPackage(PythonPythonMapper):
 
 class PythonClass(PythonPythonMapper):
     type = 'class'
+
+
+# Parser
+class ParserExtra(parser.Parser):
+
+    """Extend Parser object to provide customized return"""
+
+    def parse_object_identifier(self):
+        """Parse object identifier"""
+        assert self.current.kind == tk.NAME
+        identifier = ''
+        while True:
+            is_identifier = (
+                self.current.kind == tk.NAME or
+                (
+                    self.current.kind == tk.OP and
+                    self.current.value == '.'
+                )
+            )
+            if is_identifier:
+                identifier += self.current.value
+                self.stream.move()
+            else:
+                break
+        return identifier
+
+    def parse_string(self):
+        """Clean up STRING nodes"""
+        val = self.current.value
+        self.consume(tk.STRING)
+        return val.lstrip('\'"').rstrip('\'"')
+
+    def parse_number(self):
+        """Parse a NUMBER node to either a ``float`` or ``int``"""
+        val = self.current.value
+        self.consume(tk.NUMBER)
+        normalized_val = float(val)
+        try:
+            normalized_val = int(val)
+        except ValueError:
+            pass
+        return normalized_val
+
+    def parse_iterable(self):
+        """Recursively parse an iterable object
+
+        This will return a local representation of the parsed data, except for
+        NAME nodes. This does not currently attempt to perform lookup on the
+        object names defined in an iterable.
+
+        This is mostly a naive implementation and won't handle complex
+        structures. This is only currently meant to parse simple iterables, such
+        as ``__all__`` and class parent classes on class definition.
+        """
+        content = None
+        while self.current is not None:
+            if self.current.kind == tk.OP and self.current.value in '[(':
+                self.stream.move()
+                if content is None:
+                    content = []
+                else:
+                    content.append(self.parse_iterable())
+                continue
+            elif self.current.kind == tk.OP and self.current.value in '])':
+                self.stream.move()
+                return content
+            elif self.current.kind == tk.STRING:
+                content.append(self.parse_string())
+            elif self.current.kind == tk.NUMBER:
+                content.append(self.parse_number())
+            elif self.current.kind == tk.NAME:
+                # TODO this is dropped for now, but will can be handled with an
+                # object lookup in the future, if we decide to track assignment.
+                # content.append(self.parse_object_identifier())
+                pass
+            else:
+                self.stream.move()
+
+    def parse_docstring(self):
+        """Clean up object docstring"""
+        docstring = super(ParserExtra, self).parse_docstring()
+        if not docstring:
+            docstring = ''
+        docstring = textwrap.dedent(docstring)
+        docstring = docstring.replace("'''", '').replace('"""', '')
+        return docstring
+
+    def parse_all(self):
+        """Parse __all__ assignment
+
+        This differs from the default __all__ assignment processing by:
+
+         * Accepting multiple __all__ assignments
+         * Doesn't throw exceptions on edge cases
+         * Parses NAME nodes (but throws them out for now
+        """
+        assert self.current.value == '__all__'
+        self.consume(tk.NAME)
+        if self.current.kind != tk.OP or self.current.value not in ['=', '+=']:
+            return
+        assign_op = self.current.value
+        self.consume(tk.OP)
+
+        if self.all is None:
+            self.all = []
+
+        all_content = []
+        # Support [], [] + [], and [] + foo.__all__ by iterating of list
+        # assignments
+        while True:
+            if self.current.kind == tk.OP and self.current.value in '([':
+                content = self.parse_iterable()
+                all_content.extend(content)
+            elif self.current.kind == tk.NAME:
+                name = self.parse_object_identifier()
+                # TODO Skip these for now. In the future, this name should be
+                # converted to an object that will be resolved after we've
+                # parsed at a later stage in the mapping process.
+                #all_content.append(name)
+            if self.current.kind == tk.OP and self.current.value == '+':
+                self.stream.move()
+            else:
+                break
+
+        if assign_op == '=':
+            self.all = all_content
+        elif assign_op == '+=':
+            self.all += all_content
diff --git a/tests/test_python_parser.py b/tests/test_python_parser.py
new file mode 100644
index 0000000..c0f5705
--- /dev/null
+++ b/tests/test_python_parser.py
@@ -0,0 +1,68 @@
+# coding=utf8
+
+"""Test Python parser"""
+
+import sys
+import unittest
+from textwrap import dedent
+
+from autoapi.mappers.python import ParserExtra
+
+if sys.version_info < (3, 0):
+    from StringIO import StringIO
+else:
+    from io import StringIO
+
+
+class PythonParserTests(unittest.TestCase):
+
+    def parse(self, source):
+        in_h = StringIO(dedent(source))
+        return ParserExtra()(in_h, '/dev/null')
+
+    def test_parses_basic_file(self):
+        source = """
+        def foo(bar):
+            pass
+        """
+        self.assertIsNone(self.parse(source).all)
+
+    def test_parses_all(self):
+        source = """
+        __all__ = ['Foo', 5.0]
+        """
+        self.assertEqual(self.parse(source).all, ['Foo', 5.0])
+
+    def test_parses_all_with_list_addition(self):
+        source = """
+        __all__ = ['Foo'] + []
+        """
+        self.assertEqual(self.parse(source).all, ['Foo'])
+
+    def test_parses_all_with_name_addtion(self):
+        source = """
+        __all__ = ['Foo'] + bar.__all__
+        """
+        self.assertEqual(self.parse(source).all, ['Foo'])
+
+    def test_parses_all_with_multiple_name_addtions(self):
+        source = """
+        __all__ = foo + bar
+        __all__ += boop
+        __all__ += ['foo']
+        """
+        self.assertEqual(self.parse(source).all, ['foo'])
+        source = """
+        __all__ = ['foo']
+        __all__ = foo
+        """
+        self.assertEqual(self.parse(source).all, [])
+
+    def test_parses_all_multiline(self):
+        source = """
+        __all__ = [
+            'foo',
+            'bar',
+        ]
+        """
+        self.assertEqual(self.parse(source).all, ['foo', 'bar'])
diff --git a/tox.ini b/tox.ini
index afbd558..7e45488 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,8 @@
 [tox]
-envlist = py27,py35,lint,docs
+envlist =
+    py{27,34,35}-sphinx{13,14,15}
+    py27-lint
+    py27-docs
 
 [testenv]
 setenv =
@@ -7,11 +10,14 @@ setenv =
 deps = -r{toxinidir}/requirements.txt
     pytest
     mock
+    sphinx14: Sphinx<1.5
+    sphinx15: Sphinx<1.6
 commands =
     py.test {posargs}
 
 [testenv:docs]
 deps =
+    Sphinx==1.5
     sphinx_rtd_theme
     {[testenv]deps}
 changedir = {toxinidir}/docs