diff --git a/readability/htmls.py b/readability/htmls.py index 97aa55b..cb4ada7 100644 --- a/readability/htmls.py +++ b/readability/htmls.py @@ -43,11 +43,11 @@ def norm_title(title): return normalize_entities(normalize_spaces(title)) def get_title(doc): - title = doc.find('.//title').text - if not title: + title = doc.find('.//title') + if not title or not title.text: return '[no-title]' - - return norm_title(title) + + return norm_title(title.text) def add_match(collection, text, orig): text = norm_title(text) @@ -56,11 +56,11 @@ def add_match(collection, text, orig): collection.add(text) def shorten_title(doc): - title = doc.find('.//title').text - if not title: + title = doc.find('.//title') + if not title or not title.text: return '' - - title = orig = norm_title(title) + + title = orig = norm_title(title.text) candidates = set() @@ -77,7 +77,7 @@ def shorten_title(doc): add_match(candidates, e.text, orig) if e.text_content(): add_match(candidates, e.text_content(), orig) - + if candidates: title = sorted(candidates, key=len)[-1] else: