From 33dc8fa02383ee7cbdfe1e538fdcdaa98e6cb841 Mon Sep 17 00:00:00 2001 From: Evan Tseng Date: Wed, 16 Nov 2016 15:31:09 +0800 Subject: [PATCH] Bug 1255978 - Remove legends candidate, r=Gijs --- Readability.js | 27 +- .../bug-1255978/expected-metadata.json | 6 + test/test-pages/bug-1255978/expected.html | 72 + test/test-pages/bug-1255978/source.html | 40907 ++++++++++++++++ 4 files changed, 41011 insertions(+), 1 deletion(-) create mode 100644 test/test-pages/bug-1255978/expected-metadata.json create mode 100644 test/test-pages/bug-1255978/expected.html create mode 100644 test/test-pages/bug-1255978/source.html diff --git a/Readability.js b/Readability.js index 145add1..fab968d 100644 --- a/Readability.js +++ b/Readability.js @@ -112,7 +112,7 @@ Readability.prototype = { // All of the regular expressions in use within readability. // Defined up here so we don't instantiate them repeatedly in loops. REGEXPS: { - unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|menu|modal|related|remark|rss|share|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i, + unlikelyCandidates: /banner|combx|comment|community|disqus|extra|foot|header|legends|menu|modal|related|remark|rss|shoutbox|sidebar|skyscraper|sponsor|ad-break|agegate|pagination|pager|popup/i, okMaybeItsACandidate: /and|article|body|column|main|shadow/i, positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i, negative: /hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i, @@ -464,6 +464,12 @@ Readability.prototype = { this._clean(articleContent, "h1"); this._clean(articleContent, "footer"); + // Clean out elements have "share" in their id/class combinations from final top candidates, + // which means we don't remove the top candidates even they have "share". + this._forEachNode(articleContent.children, function(topCandidate) { + this._cleanMatchedNodes(topCandidate, /share/); + }); + // If there is only one h2, they are probably using it as a header // and not a subheader, so remove it since we already have a header. if (articleContent.getElementsByTagName('h2').length === 1) @@ -1700,6 +1706,25 @@ Readability.prototype = { }); }, + /** + * Clean out elements whose id/class combinations match specific string. + * + * @param Element + * @param RegExp match id/class combination. + * @return void + **/ + _cleanMatchedNodes: function(e, regex) { + var endOfSearchMarkerNode = this._getNextNode(e, true); + var next = this._getNextNode(e); + while (next && next != endOfSearchMarkerNode) { + if (regex.test(next.className + " " + next.id)) { + next = this._removeAndGetNext(next); + } else { + next = this._getNextNode(next); + } + } + }, + /** * Clean out spurious headers from an Element. Checks things like classnames and link density. * diff --git a/test/test-pages/bug-1255978/expected-metadata.json b/test/test-pages/bug-1255978/expected-metadata.json new file mode 100644 index 0000000..022cd45 --- /dev/null +++ b/test/test-pages/bug-1255978/expected-metadata.json @@ -0,0 +1,6 @@ +{ + "title": "Seven secrets that hotel owners don't want you to know", + "byline": "Hazel Sheffield", + "excerpt": "Most people go to hotels for the pleasure of sleeping in a giant bed with clean white sheets and waking up to fresh towels in the morning. But those towels and sheets might not be as clean as they look, according to the hotel bosses that responded to an online thread about the things hotel owners don’t want you to know.", + "readerable": true +} diff --git a/test/test-pages/bug-1255978/expected.html b/test/test-pages/bug-1255978/expected.html new file mode 100644 index 0000000..35cee1b --- /dev/null +++ b/test/test-pages/bug-1255978/expected.html @@ -0,0 +1,72 @@ +
+
+

Most people go to hotels for the pleasure of sleeping in a giant bed with clean white sheets and waking up to fresh towels in the morning.

+

But those towels and sheets might not be as clean as they look, according to the hotel bosses that responded to an online thread about the things hotel owners don’t want you to know.

+

Zeev Sharon and Michael Forrest Jones both run hotel start-ups in the US. Forrest Jones runs the start-up Beechmont Hotels Corporation, a hotel operating company that consults with hotel owners on how they can improve their business. Sharon is the CEO of Hotelied, a start-up that allows people to sign up for discounts at luxury hotels.

+

But even luxury hotels aren’t always cleaned as often as they should be.

+

Here are some of the secrets that the receptionist will never tell you when you check in, according to answers posted on Quora.

+

+
+

+ Even posh hotels might not wash a blanket in between stays + +

+
+

1. Take any blankets or duvets off the bed

+

Forrest Jones said that anything that comes into contact with any of the previous guest’s skin should be taken out and washed every time the room is made, but that even the fanciest hotels don’t always do so. "Hotels are getting away from comforters. Blankets are here to stay, however. But some hotels are still hesitant about washing them every day if they think they can get out of it," he said.

+
+

Video shows bed bug infestation at New York hotel

+
+

+
+

+ Forrest Jones advised stuffing the peep hole with a strip of rolled up notepaper when not in use. + +

+
+

2. Check the peep hole has not been tampered with

+

This is not common, but can happen, Forrest Jones said. He advised stuffing the peep hole with a strip of rolled up notepaper when not in use. When someone knocks on the door, the paper can be removed to check who is there. If no one is visible, he recommends calling the front desk immediately. “I look forward to the day when I can tell you to choose only hotels where every employee who has access to guestroom keys is subjected to a complete public records background check, prior to hire, and every year or two thereafter. But for now, I can't,” he said.

+

+

3. Don’t use a wooden luggage rack

+

Bedbugs love wood. Even though a wooden luggage rack might look nicer and more expensive than a metal one, it’s a breeding ground for bugs. Forrest Jones says guests should put the items they plan to take from bags on other pieces of furniture and leave the bag on the floor.

+

+
+

+ The old rule of thumb is that for every 00 invested in a room, the hotel should charge in average daily rate + +

+
+

4. Hotel rooms are priced according to how expensive they were to build

+

Zeev Sharon said that the old rule of thumb is that for every $1000 invested in a room, the hotel should charge $1 in average daily rate. So a room that cost $300,000 to build, should sell on average for $300/night.

+

5. Beware the wall-mounted hairdryer

+

It contains the most germs of anything in the room. Other studies have said the TV remote and bedside lamp switches are the most unhygienic. “Perhaps because it's something that's easy for the housekeepers to forget to check or to squirt down with disinfectant,” Forrest Jones said.

+

+ +

6. Mini bars almost always lose money

+

Despite the snacks in the minibar seeming like the most overpriced food you have ever seen, hotel owners are still struggling to make a profit from those snacks. "Minibars almost always lose money, even when they charge $10 for a Diet Coke,” Sharon said.

+

+
+

+ Towels should always be cleaned between stays + +

+
+

7. Always made sure the hand towels are clean when you arrive

+

Forrest Jones made a discovery when he was helping out with the housekeepers. “You know where you almost always find a hand towel in any recently-vacated hotel room that was occupied by a guy? On the floor, next to the bed, about halfway down, maybe a little toward the foot of the bed. Same spot in the floor, next to almost every bed occupied by a man, in every room. I'll leave the rest to your imagination,” he said.

+ + + +
+
diff --git a/test/test-pages/bug-1255978/source.html b/test/test-pages/bug-1255978/source.html new file mode 100644 index 0000000..08840dc --- /dev/null +++ b/test/test-pages/bug-1255978/source.html @@ -0,0 +1,40907 @@ + + + + + + + + + The seven secrets that hotel owners don't want you to know | The Independent + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
    + +
  1. + + +
  2. + +
  3. + +
  4. +
+ + +
+
+
+
+
+
+
+
+ +
+
+ + +
+ +
+ + + +
+ + + +
+ + +

The seven secrets that hotel owners don't want you to know

+ + +
+

Hotel bosses spill the beans on cost cutting and horrible hygiene

+
+ +
+
+ + + + + + + + + + +
+
+
+ + + + + + + + +
+
+
+
+
+
+ + + + + + + + +
+
+
+
+
+
+ + + + + + + + +
+
+
+
+
+
+ + + + + + + + +
+
+
+
+
+ + + + + + + +
+
+ + + + + + + + +
+
+
+ +
+
+
+
+
+
+ + + + + + + +
+
+
+

Most people go to hotels for the pleasure of sleeping in a giant bed with clean white sheets and waking up to fresh towels in the morning.

+ +

But those towels and sheets might not be as clean as they look, according to the hotel bosses that responded to an online thread about the things hotel owners don’t want you to know.

+ +

Zeev Sharon and Michael Forrest Jones both run hotel start-ups in the US. Forrest Jones runs the start-up Beechmont Hotels Corporation, a hotel operating company that consults with hotel owners on how they can improve their business. Sharon is the CEO of Hotelied, a start-up that allows people to sign up for discounts at luxury hotels.

+ +

But even luxury hotels aren’t always cleaned as often as they should be.

+ +

Here are some of the secrets that the receptionist will never tell you when you check in, according to answers posted on Quora.

+ +

+ +
+
+ +
bandb2.jpg
+ +
+
+ Even posh hotels might not wash a blanket in between stays + +
+
+ +

1. Take any blankets or duvets off the bed

+ +

Forrest Jones said that anything that comes into contact with any of the previous guest’s skin should be taken out and washed every time the room is made, but that even the fanciest hotels don’t always do so. "Hotels are getting away from comforters. Blankets are here to stay, however. But some hotels are still hesitant about washing them every day if they think they can get out of it," he said.

+ +
+
+ +
+ +
+
+
+
+
+
+
+
+
Play Video
+
+
Play
+
+
+
+
+
+
+
+
Mute
+
+
Current Time 0:00
+
+
+
/
+
+
+
Duration Time 0:00
+
+
+
+
Loaded: 0% +
+
+
Progress: 0% +
+
+
+
+
Stream TypeLIVE
+
+
+
Remaining Time -0:00
+
+
+
Share
+ + + + + + +
Fullscreen
+
+ +
+
+
+
+ + + + + +
+ +
+ + + + + +
+ +
+ + + + + +
+ +
+ +
+
+ + +
+ +
+ + +
+ +
+ + +
+ +
+
+
+ + +
+
+ + +
+ +
+ +
+ +
+
Video shows bed bug infestation at New York hotel
+
+ +

+ +
+
+ +
hotel-door-getty.jpg
+ +
+
+ Forrest Jones advised stuffing the peep hole with a strip of rolled up notepaper when not in use. + +
+
+ +

2. Check the peep hole has not been tampered with

+ +

This is not common, but can happen, Forrest Jones said. He advised stuffing the peep hole with a strip of rolled up notepaper when not in use. When someone knocks on the door, the paper can be removed to check who is there. If no one is visible, he recommends calling the front desk immediately. “I look forward to the day when I can tell you to choose only hotels where every employee who has access to guestroom keys is subjected to a complete public records background check, prior to hire, and every year or two thereafter. But for now, I can't,” he said.

+ +

+ +
+
+ +
luggage-3.jpg
+ +
+
+ Put luggage on the floor + +
+
+ +

3. Don’t use a wooden luggage rack

+ +

Bedbugs love wood. Even though a wooden luggage rack might look nicer and more expensive than a metal one, it’s a breeding ground for bugs. Forrest Jones says guests should put the items they plan to take from bags on other pieces of furniture and leave the bag on the floor.

+ +

+ +
+
+ +
Lifestyle-hotels.jpg
+ +
+
+ The old rule of thumb is that for every 00 invested in a room, the hotel should charge in average daily rate + +
+
+ +

4. Hotel rooms are priced according to how expensive they were to build

+ +

Zeev Sharon said that the old rule of thumb is that for every $1000 invested in a room, the hotel should charge $1 in average daily rate. So a room that cost $300,000 to build, should sell on average for $300/night.

+ + + +

5. Beware the wall-mounted hairdryer

+ +

It contains the most germs of anything in the room. Other studies have said the TV remote and bedside lamp switches are the most unhygienic. “Perhaps because it's something that's easy for the housekeepers to forget to check or to squirt down with disinfectant,” Forrest Jones said.

+ +

+ + + +

6. Mini bars almost always lose money

+ +

Despite the snacks in the minibar seeming like the most overpriced food you have ever seen, hotel owners are still struggling to make a profit from those snacks. "Minibars almost always lose money, even when they charge $10 for a Diet Coke,” Sharon said.

+ +

+ +
+
+ +
agenda7.jpg
+ +
+
+ Towels should always be cleaned between stays + +
+
+ +

7. Always made sure the hand towels are clean when you arrive

+ +

Forrest Jones made a discovery when he was helping out with the housekeepers. “You know where you almost always find a hand towel in any recently-vacated hotel room that was occupied by a guy? On the floor, next to the bed, about halfway down, maybe a little toward the foot of the bed. Same spot in the floor, next to almost every bed occupied by a man, in every room. I'll leave the rest to your imagination,” he said.

+ + +
+
+ + + + + + + + + + +
+
+
+ + + + + + + + +
+
+
+
+
+
+ + + + + + + + +
+
+
+
+
+
+ + + + + + + + +
+
+
+
+
+
+ + + + + + + + +
+
+
+
+
+ + + + + + + +
+
+ + + + + + + + +
+
+
+ +
+
+
+
+
+ +
+ +
+
+ +
+
+ +
+
+ +
+
+ + + +
+
+
+
+
+
+
+
+ +
+ +
+ +
+

Comments

+
+
+
+
+ +
+ +
+
+
+ + + +
+
+
+
+ +
+
    +
    +

    +
    +
    + +
    +
    +
    +
    +
    0 Comments
    +
    + +
    +
    +
    + +
    +
    + +
    + +
    + +
    + +
    + +
    +
    + +
    +
    + + + +
    +
    + +
    +
    + +
    +
    + +
    + + + + +
    + + + + + + + + + +
    + + + + + + + + + + + +
    + + + + + + + + + + + + + + + + + + + + + +
    +
    +
    +
    +
    +
    +
    +
    +
    + + + +
    +
    +
    +
    +
    + +