From c420d4d843feb1bf66d0b284d87f07b20035ea5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20J=2E=20Rodr=C3=ADguez-Posada?= Date: Fri, 27 Jun 2014 16:59:12 +0200 Subject: [PATCH] adding spider for wikkii, updating the list (10 diff wikis, 2 new, 8 dead), adding info for list --- listsofwikis/wikkii-spider.py | 37 +++++++++++++++++++++++++++++++++++ listsofwikis/wikkii.com | 10 ++-------- listsofwikis/wikkii.com.info | 5 ++++- 3 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 listsofwikis/wikkii-spider.py diff --git a/listsofwikis/wikkii-spider.py b/listsofwikis/wikkii-spider.py new file mode 100644 index 0000000..8793429 --- /dev/null +++ b/listsofwikis/wikkii-spider.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +# Copyright (C) 2014 WikiTeam developers +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import json +import random +import re +import requests +import time + +def main(): + headers = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0', + } + + url = 'http://wikkii.com/wiki/Special:Farmer/list' + r = requests.get(url, headers=headers) + raw = r.text + m = re.findall(ur'