From 440c4e9c50fdfc9d5627fced7d134edd2c3f0e81 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Tue, 29 Dec 2020 18:43:42 -0500 Subject: [PATCH] Remove lxml dependency The lxml dependency in the project was fairly unnecessary, and made the initial build time for the project considerably slower. This replaces all instances of lxml with either the default html parser (for bs4 constructors) or the built in xml.etree package (for search suggestion parsing). --- app/filter.py | 4 ++-- app/request.py | 11 ++++++----- app/utils/filter_utils.py | 2 +- requirements.txt | 1 - 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/app/filter.py b/app/filter.py index af08610..ccd6af3 100644 --- a/app/filter.py +++ b/app/filter.py @@ -111,8 +111,8 @@ class Filter: return # Wrap section in details element to allow collapse/expand - details = BeautifulSoup(features='lxml').new_tag('details') - summary = BeautifulSoup(features='lxml').new_tag('summary') + details = BeautifulSoup(features='html.parser').new_tag('details') + summary = BeautifulSoup(features='html.parser').new_tag('summary') summary.string = question_divs[0].find('h2').text question_divs[0].find('h2').decompose() details.append(summary) diff --git a/app/request.py b/app/request.py index b64f6c9..00e2ce0 100644 --- a/app/request.py +++ b/app/request.py @@ -1,5 +1,5 @@ from app.models.config import Config -from lxml import etree +import xml.etree.ElementTree as ET import random import requests from requests import Response, ConnectionError @@ -185,11 +185,12 @@ class Request: response = self.send(base_url=AUTOCOMPLETE_URL, query=urlparse.urlencode(ac_query)).text - if response: - dom = etree.fromstring(response) - return dom.xpath('//suggestion/@data') + if not response: + return [] - return [] + root = ET.fromstring(response) + return [_.attrib['data'] for _ in + root.findall('.//suggestion/[@data]')] def send(self, base_url=SEARCH_URL, query='', attempt=0) -> Response: """Sends an outbound request to a URL. Optionally sends the request diff --git a/app/utils/filter_utils.py b/app/utils/filter_utils.py index 877c38c..d1a2604 100644 --- a/app/utils/filter_utils.py +++ b/app/utils/filter_utils.py @@ -75,7 +75,7 @@ def filter_link_args(query_link): def gen_nojs(sibling): - nojs_link = BeautifulSoup(features='lxml').new_tag('a') + nojs_link = BeautifulSoup(features='html.parser').new_tag('a') nojs_link['href'] = '/window?location=' + sibling['href'] nojs_link['style'] = 'display:block;width:100%;' nojs_link.string = 'NoJS Link: ' + nojs_link['href'] diff --git a/requirements.txt b/requirements.txt index 399d342..c015b64 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,6 @@ Flask-Session==0.3.2 idna==2.9 itsdangerous==1.1.0 Jinja2==2.10.3 -lxml==4.5.1 MarkupSafe==1.1.1 more-itertools==8.3.0 packaging==20.4