import URL from 'url';
import cheerio from 'cheerio';
import TurndownService from 'turndown';

import Resource from 'resource';
import { validateUrl } from 'utils';
import addCustomExtractor from 'extractors/add-extractor';
import getExtractor from 'extractors/get-extractor';
import RootExtractor, { selectExtendedTypes } from 'extractors/root-extractor';
import collectAllPages from 'extractors/collect-all-pages';

const Parser = {
  async parse(url, { html, ...opts } = {}) {
    const {
      fetchAllPages = true,
      fallback = true,
      contentType = 'html',
      headers = {},
      extend,
      customExtractor,
    } = opts;

    // if no url was passed and this is the browser version,
    // set url to window.location.href and load the html
    // from the current page
    if (!url && cheerio.browser) {
      url = window.location.href; // eslint-disable-line no-undef
      html = html || cheerio.html();
    }

    const parsedUrl = URL.parse(url);

    if (!validateUrl(parsedUrl)) {
      return {
        error: true,
        message:
          'The url parameter passed does not look like a valid URL. Please check your URL and try again.',
      };
    }

    const $ = await Resource.create(url, html, parsedUrl, headers);

    // If we found an error creating the resource, return that error
    if ($.failed) {
      return $;
    }

    // Add custom extractor via cli.
    if (customExtractor) {
      addCustomExtractor(customExtractor);
    }

    const Extractor = getExtractor(url, parsedUrl, $);
    // console.log(`Using extractor for ${Extractor.domain}`);

    // if html still has not been set (i.e., url passed to Parser.parse),
    // set html from the response of Resource.create
    if (!html) {
      html = $.html();
    }

    // Cached value of every meta name in our document.
    // Used when extracting title/author/date_published/dek
    const metaCache = $('meta')
      .map((_, node) => $(node).attr('name'))
      .toArray();

    let extendedTypes = {};
    if (extend) {
      extendedTypes = selectExtendedTypes(extend, { $, url, html });
    }

    let result = RootExtractor.extract(Extractor, {
      url,
      html,
      $,
      metaCache,
      parsedUrl,
      fallback,
      contentType,
    });

    const { title, next_page_url } = result;

    // Fetch more pages if next_page_url found
    if (fetchAllPages && next_page_url) {
      result = await collectAllPages({
        Extractor,
        next_page_url,
        html,
        $,
        metaCache,
        result,
        title,
        url,
      });
    } else {
      result = {
        ...result,
        total_pages: 1,
        rendered_pages: 1,
      };
    }

    if (contentType === 'markdown') {
      const turndownService = new TurndownService();
      result.content = turndownService.turndown(result.content);
    } else if (contentType === 'text') {
      result.content = $.text($(result.content));
    }

    return { ...result, ...extendedTypes };
  },

  browser: !!cheerio.browser,

  // A convenience method for getting a resource
  // to work with, e.g., for custom extractor generator
  fetchResource(url) {
    return Resource.create(url);
  },

  addExtractor(extractor) {
    return addCustomExtractor(extractor);
  },
};

export default Parser;