sphinx-autoapi/autoapi/mappers/python.py

import sys
import os
import textwrap
import ast
import tokenize as tk
from collections import defaultdict

from pydocstyle import parser

from .base import PythonMapperBase, SphinxMapperBase
from ..utils import slugify

if sys.version_info < (3,):
    from itertools import izip_longest as zip_longest
else:
    from itertools import zip_longest


class PythonSphinxMapper(SphinxMapperBase):

    """Auto API domain handler for Python

    Parses directly from Python files.

    :param app: Sphinx application passed in as part of the extension
    """

    def load(self, patterns, dirs, ignore=None):
        """Load objects from the filesystem into the ``paths`` dictionary

        Also include an attribute on the object, ``relative_path`` which is the
        shortened, relative path the package/module
        """
        for dir_ in dirs:
            for path in self.find_files(patterns=patterns, dirs=[dir_], ignore=ignore):
                data = self.read_file(path=path)
                data.relative_path = os.path.relpath(path, dir_)
                if data:
                    self.paths[path] = data

    def read_file(self, path, **kwargs):
        """Read file input into memory, returning deserialized objects

        :param path: Path of file to read
        """
        try:
            parsed_data = ParserExtra()(open(path), path)
            return parsed_data
        except (IOError, TypeError, ImportError):
            self.app.warn('Error reading file: {0}'.format(path))
        return None

    def create_class(self, data, options=None, **kwargs):
        """Create a class from the passed in data

        :param data: dictionary data of pydocstyle output
        """
        obj_map = dict((cls.type, cls) for cls
                       in [PythonClass, PythonFunction, PythonModule,
                           PythonMethod, PythonPackage])
        try:
            cls = obj_map[data.kind]
        except KeyError:
            self.app.warn("Unknown type: %s" % data.kind)
        else:
            obj = cls(data, jinja_env=self.jinja_env,
                      options=self.app.config.autoapi_options, **kwargs)
            for child_data in data.children:
                for child_obj in self.create_class(child_data, options=options,
                                                   **kwargs):
                    obj.children.append(child_obj)
            yield obj


class PythonPythonMapper(PythonMapperBase):

    language = 'python'
    is_callable = False

    def __init__(self, obj, **kwargs):
        super(PythonPythonMapper, self).__init__(obj, **kwargs)

        self.name = self._get_full_name(obj)
        self.id = slugify(self.name)

        # Optional
        self.children = []
        self.args = []
        if self.is_callable:
            self.args = self._get_arguments(obj)
        self.docstring = obj.docstring
        if getattr(obj, 'parent'):
            self.inheritance = [obj.parent.name]
        else:
            self.inheritance = []

        # For later
        self.item_map = defaultdict(list)

    @property
    def is_undoc_member(self):
        return bool(self.docstring)

    @property
    def is_private_member(self):
        return not self.obj.is_public

    @property
    def is_special_member(self):
        return (
            (isinstance(self.obj, parser.Method) and self.obj.is_magic) or
            (self.obj.name.startswith('__') and self.obj.name.endswith('__'))
        )

    @property
    def display(self):
        if self.is_undoc_member and 'undoc-members' not in self.options:
            return False
        if self.is_private_member and 'private-members' not in self.options:
            return False
        if self.is_special_member and 'special-members' not in self.options:
            return False
        return True

    @staticmethod
    def _get_full_name(obj):
        """Recursively build the full name of the object from pydocstyle

        Uses an additional attribute added to the object, ``relative_path``.
        This is the shortened path of the object name, if the object is a
        package or module.

        :param obj: pydocstyle object, as returned from Parser()
        :returns: Dotted name of object
        :rtype: str
        """

        def _inner(obj, parts=None):
            if parts is None:
                parts = []
            obj_kind = obj.kind
            obj_name = obj.name
            if obj_kind == 'module':
                obj_name = getattr(obj, 'relative_path', None) or obj.name
                obj_name = obj_name.replace('/', '.')
                ext = '.py'
                if obj_name.endswith(ext):
                    obj_name = obj_name[:-len(ext)]
            elif obj_kind == 'package':
                obj_name = getattr(obj, 'relative_path', None) or obj.name
                exts = ['/__init__.py', '.py']
                for ext in exts:
                    if obj_name.endswith(ext):
                        obj_name = obj_name[:-len(ext)]
                obj_name = obj_name.split('/').pop()
            parts.insert(0, obj_name)
            try:
                return _inner(obj.parent, parts)
            except AttributeError:
                pass
            return parts

        return '.'.join(_inner(obj))

    @staticmethod
    def _get_arguments(obj):
        """Get arguments from a pydocstyle object

        :param obj: pydocstyle object, as returned from Parser()
        :returns: list of argument or argument and value pairs
        :rtype: list
        """
        arguments = []
        source = textwrap.dedent(obj.source)
        # Bare except here because AST parsing can throw any number of
        # exceptions, including SyntaxError
        try:
            parsed = ast.parse(source)
        except Exception as e:  # noqa
            print("Error parsing AST: %s" % str(e))
            return []
        parsed_args = parsed.body[0].args
        arg_names = [arg.id if sys.version_info < (3,) else arg.arg
                     for arg in parsed_args.args]

        # Get defaults for display based on AST node type
        arg_defaults = []
        pydocstyle_map = {
            ast.Name: 'id',
            ast.Num: 'n',
            ast.Str: lambda obj: '"{0}"'.format(obj.s),
            # Call function name can be an `Attribute` or `Name` node, make sure
            # we're using the correct attribute for the id
            ast.Call: lambda obj: (obj.func.id if isinstance(obj.func, ast.Name)
                                   else obj.func.attr),
            # TODO these require traversal into the AST nodes. Add this for more
            # complete argument parsing, or handle with a custom AST traversal.
            ast.List: lambda _: 'list',
            ast.Tuple: lambda _: 'tuple',
            ast.Set: lambda _: 'set',
            ast.Dict: lambda _: 'dict',
        }
        if sys.version_info >= (3,):
            pydocstyle_map.update({
                ast.NameConstant: 'value',
            })

        for value in parsed_args.defaults:
            default = None
            try:
                default = pydocstyle_map[type(value)](value)
            except TypeError:
                default = getattr(value, pydocstyle_map[type(value)])
            except KeyError:
                pass
            if default is None:
                default = 'None'
            arg_defaults.append(default)

        # Apply defaults padded to the end of the longest list. AST returns
        # argument defaults as a short array that applies to the end of the list
        # of arguments
        for (name, default) in zip_longest(reversed(arg_names),
                                           reversed(arg_defaults)):
            arg = name
            if default is not None:
                arg = '{0}={1}'.format(name, default)
            arguments.insert(0, arg)

        # Add *args and **kwargs
        if parsed_args.vararg:
            arguments.append('*{0}'.format(
                parsed_args.vararg
                if sys.version_info < (3, 3)
                else parsed_args.vararg.arg
            ))
        if parsed_args.kwarg:
            arguments.append('**{0}'.format(
                parsed_args.kwarg
                if sys.version_info < (3, 3)
                else parsed_args.kwarg.arg
            ))

        return arguments


class PythonFunction(PythonPythonMapper):
    type = 'function'
    is_callable = True


class PythonMethod(PythonPythonMapper):
    type = 'method'
    is_callable = True


class PythonModule(PythonPythonMapper):
    type = 'module'
    top_level_object = True


class PythonPackage(PythonPythonMapper):
    type = 'package'
    top_level_object = True


class PythonClass(PythonPythonMapper):
    type = 'class'


# Parser
class ParserExtra(parser.Parser):

    """Extend Parser object to provide customized return"""

    def parse_object_identifier(self):
        """Parse object identifier"""
        assert self.current.kind == tk.NAME
        identifier = ''
        while True:
            is_identifier = (
                self.current.kind == tk.NAME or
                (
                    self.current.kind == tk.OP and
                    self.current.value == '.'
                )
            )
            if is_identifier:
                identifier += self.current.value
                self.stream.move()
            else:
                break
        return identifier

    def parse_string(self):
        """Clean up STRING nodes"""
        val = self.current.value
        self.consume(tk.STRING)
        return val.lstrip('\'"').rstrip('\'"')

    def parse_number(self):
        """Parse a NUMBER node to either a ``float`` or ``int``"""
        val = self.current.value
        self.consume(tk.NUMBER)
        normalized_val = float(val)
        try:
            normalized_val = int(val)
        except ValueError:
            pass
        return normalized_val

    def parse_iterable(self):
        """Recursively parse an iterable object

        This will return a local representation of the parsed data, except for
        NAME nodes. This does not currently attempt to perform lookup on the
        object names defined in an iterable.

        This is mostly a naive implementation and won't handle complex
        structures. This is only currently meant to parse simple iterables, such
        as ``__all__`` and class parent classes on class definition.
        """
        content = None
        is_list = True
        while self.current is not None:
            if self.current.kind == tk.STRING:
                content.append(self.parse_string())
            elif self.current.kind == tk.NUMBER:
                content.append(self.parse_number())
            elif self.current.kind == tk.NAME:
                # Handle generators
                if self.current.value == 'for' and not content:
                    is_list = False
                # TODO this is dropped for now, but will can be handled with an
                # object lookup in the future, if we decide to track assignment.
                # content.append(self.parse_object_identifier())
                self.stream.move()
            elif self.current.kind == tk.OP and self.current.value in '[(':
                if content is None:
                    content = []
                    self.stream.move()
                else:
                    content.append(self.parse_iterable())
                continue
            elif self.current.kind == tk.OP and self.current.value in '])':
                self.stream.move()
                if is_list:
                    return content
                # Discard generator because we can't do anything with them
                return []
            else:
                self.stream.move()

    def parse_docstring(self):
        """Clean up object docstring"""
        docstring = super(ParserExtra, self).parse_docstring()
        if not docstring:
            docstring = ''
        docstring = textwrap.dedent(docstring)
        docstring = docstring.replace("'''", '').replace('"""', '')
        return docstring

    def parse_all(self):
        """Parse __all__ assignment

        This differs from the default __all__ assignment processing by:

         * Accepting multiple __all__ assignments
         * Doesn't throw exceptions on edge cases
         * Parses NAME nodes (but throws them out for now
        """
        assert self.current.value == '__all__'
        self.consume(tk.NAME)
        if self.current.kind != tk.OP or self.current.value not in ['=', '+=']:
            return
        assign_op = self.current.value
        self.consume(tk.OP)

        if self.all is None:
            self.all = []

        all_content = []
        # Support [], [] + [], and [] + foo.__all__ by iterating of list
        # assignments
        while True:
            if self.current.kind == tk.OP and self.current.value in '([':
                content = self.parse_iterable()
                all_content.extend(content)
            elif self.current.kind == tk.NAME:
                name = self.parse_object_identifier()
                # TODO Skip these for now. In the future, this name should be
                # converted to an object that will be resolved after we've
                # parsed at a later stage in the mapping process.
                # all_content.append(name)
            if self.current.kind == tk.OP and self.current.value == '+':
                self.stream.move()
            else:
                break

        if assign_op == '=':
            self.all = all_content
        elif assign_op == '+=':
            self.all += all_content