mirror of https://github.com/WikiTeam/wikiteam
commit
269841c909
@ -1,9 +1,4 @@
|
||||
http://de.battlestarwiki.org/w/api.php
|
||||
http://en.battlestarwiki.org/w/api.php
|
||||
http://es.battlestarwiki.org/w/api.php
|
||||
http://fr.battlestarwiki.org/w/api.php
|
||||
http://media.battlestarwiki.org/w/api.php
|
||||
http://ms.battlestarwiki.org/w/api.php
|
||||
http://simple.battlestarwiki.org/w/api.php
|
||||
http://tr.battlestarwiki.org/w/api.php
|
||||
http://zh.battlestarwiki.org/w/api.php
|
||||
https://de.battlestarwiki.org/w/api.php
|
||||
https://en.battlestarwiki.org/w/api.php
|
||||
https://fr.battlestarwiki.ddns.net/api.php
|
||||
https://media.battlestarwiki.org/w/api.php
|
||||
|
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (C) 2022 Simon Liu
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import re
|
||||
import time
|
||||
import requests
|
||||
from urllib import parse
|
||||
from tqdm import tqdm
|
||||
|
||||
def main():
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0',
|
||||
}
|
||||
|
||||
# grab lvl3 links
|
||||
req = requests.get('https://community.fandom.com/wiki/Sitemap?level=2', headers=headers)
|
||||
map_lvl3 = re.findall(r'<a class=\"title\" href=\"([^>]+?)\">', req.text)
|
||||
|
||||
# grab wiki links
|
||||
wikis = []
|
||||
for lvl3 in tqdm(map_lvl3):
|
||||
time.sleep(0.3)
|
||||
req = requests.get('https://community.fandom.com%s' % lvl3)
|
||||
if req.status_code != 200:
|
||||
time.sleep(5)
|
||||
req = requests.get('https://community.fandom.com%s' % lvl3)
|
||||
wikis.extend([wiki.replace('http://', 'https://') for wiki in re.findall(r'<a class=\"title\" href=\"([^>]+?)\">', req.text)])
|
||||
|
||||
wikis = list(set(wikis))
|
||||
wikis.sort()
|
||||
with open('fandom.com', 'w') as f:
|
||||
for wiki in wikis:
|
||||
f.write(parse.urljoin(wiki, 'api.php') + '\n')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
File diff suppressed because it is too large
Load Diff
@ -1,35 +1,53 @@
|
||||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (C) 2014-2017 WikiTeam developers
|
||||
# Copyright (C) 2022 Simon Liu
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import re
|
||||
import time
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin
|
||||
|
||||
def nextpage(soup):
|
||||
try:
|
||||
soup.find('span', text='Next page').parent['href']
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def main():
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0',
|
||||
}
|
||||
|
||||
url = 'https://meta.miraheze.org/wiki/Special:SiteMatrix'
|
||||
r = requests.get(url, headers=headers)
|
||||
raw = r.text
|
||||
m = re.findall(ur'<tr><td>(<del>)?<a href="https://([^>]+?)/">[^<]+</a>', raw)
|
||||
m.sort()
|
||||
for i in m:
|
||||
print 'https://' + i[1] + '/w/api.php'
|
||||
|
||||
|
||||
req = requests.get('https://meta.miraheze.org/wiki/Special:WikiDiscover')
|
||||
soup = BeautifulSoup(req.content, features='lxml')
|
||||
wikis = re.findall(r'<td class=\"TablePager_col_wiki_dbname\"><a href=\"([^>]+?)\">', req.text)
|
||||
|
||||
while nextpage(soup):
|
||||
time.sleep(0.3)
|
||||
req = requests.get(urljoin('https://meta.miraheze.org', soup.find('span', text='Next page').parent['href']))
|
||||
soup = BeautifulSoup(req.content, features='lxml')
|
||||
wikis.extend(re.findall(r'<td class=\"TablePager_col_wiki_dbname\"><a href=\"([^>]+?)\">', req.text))
|
||||
|
||||
wikis = list(set(wikis))
|
||||
wikis.sort()
|
||||
with open('miraheze.org', 'w') as f:
|
||||
for wiki in wikis:
|
||||
f.write(urljoin(wiki, 'w/api.php') + '\n')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -1,183 +1,183 @@
|
||||
http://24.neoseeker.com/w/api.php
|
||||
http://aceattorney.neoseeker.com/w/api.php
|
||||
http://advancewars.neoseeker.com/w/api.php
|
||||
http://adventuretime.neoseeker.com/w/api.php
|
||||
http://animalcrossing.neoseeker.com/w/api.php
|
||||
http://attackontitan.neoseeker.com/w/api.php
|
||||
http://avatar.neoseeker.com/w/api.php
|
||||
http://banished.neoseeker.com/w/api.php
|
||||
http://banjokazooie.neoseeker.com/w/api.php
|
||||
http://batman.neoseeker.com/w/api.php
|
||||
http://battlefield.neoseeker.com/w/api.php
|
||||
http://bioshock.neoseeker.com/w/api.php
|
||||
http://bleach.neoseeker.com/w/api.php
|
||||
http://boktai.neoseeker.com/w/api.php
|
||||
http://bond.neoseeker.com/w/api.php
|
||||
http://borderlands.neoseeker.com/w/api.php
|
||||
http://boundbyflame.neoseeker.com/w/api.php
|
||||
http://bravely.neoseeker.com/w/api.php
|
||||
http://breathoffire.neoseeker.com/w/api.php
|
||||
http://brink.neoseeker.com/w/api.php
|
||||
http://callofduty.neoseeker.com/w/api.php
|
||||
http://castlecrashers.neoseeker.com/w/api.php
|
||||
http://castlevania.neoseeker.com/w/api.php
|
||||
http://childoflight.neoseeker.com/w/api.php
|
||||
http://chrono.neoseeker.com/w/api.php
|
||||
http://cnc.neoseeker.com/w/api.php
|
||||
http://cowboybebop.neoseeker.com/w/api.php
|
||||
http://crash.neoseeker.com/w/api.php
|
||||
http://crossedge.neoseeker.com/w/api.php
|
||||
http://cubeworld.neoseeker.com/w/api.php
|
||||
http://danganronpa.neoseeker.com/w/api.php
|
||||
http://darksouls.neoseeker.com/w/api.php
|
||||
http://deadoralive.neoseeker.com/w/api.php
|
||||
http://deathnote.neoseeker.com/w/api.php
|
||||
http://demonssouls.neoseeker.com/w/api.php
|
||||
http://destiny.neoseeker.com/w/api.php
|
||||
http://devilmaycry.neoseeker.com/w/api.php
|
||||
http://digimon.neoseeker.com/w/api.php
|
||||
http://disgaea.neoseeker.com/w/api.php
|
||||
http://doctorwho.neoseeker.com/w/api.php
|
||||
http://donkeykong.neoseeker.com/w/api.php
|
||||
http://doom.neoseeker.com/w/api.php
|
||||
http://dothack.neoseeker.com/w/api.php
|
||||
http://doujin.neoseeker.com/w/api.php
|
||||
http://dragonage.neoseeker.com/w/api.php
|
||||
http://dragonball.neoseeker.com/w/api.php
|
||||
http://dragonquest.neoseeker.com/w/api.php
|
||||
http://dragonsdogma.neoseeker.com/w/api.php
|
||||
http://dynastywarriors.neoseeker.com/w/api.php
|
||||
http://elderscrolls.neoseeker.com/w/api.php
|
||||
http://endlessocean.neoseeker.com/w/api.php
|
||||
http://evangelion.neoseeker.com/w/api.php
|
||||
http://fable.neoseeker.com/w/api.php
|
||||
http://fairytail.neoseeker.com/w/api.php
|
||||
http://fallout.neoseeker.com/w/api.php
|
||||
http://familyguy.neoseeker.com/w/api.php
|
||||
http://fatalfury.neoseeker.com/w/api.php
|
||||
http://fifa.neoseeker.com/w/api.php
|
||||
http://finalfantasy.neoseeker.com/w/api.php
|
||||
http://fireemblem.neoseeker.com/w/api.php
|
||||
http://footballmanager.neoseeker.com/w/api.php
|
||||
http://fullmetalalchemist.neoseeker.com/w/api.php
|
||||
http://futurama.neoseeker.com/w/api.php
|
||||
http://fzero.neoseeker.com/w/api.php
|
||||
http://gearsofwar.neoseeker.com/w/api.php
|
||||
http://glee.neoseeker.com/w/api.php
|
||||
http://godofwar.neoseeker.com/w/api.php
|
||||
http://goldensun.neoseeker.com/w/api.php
|
||||
http://granturismo.neoseeker.com/w/api.php
|
||||
http://growlanser.neoseeker.com/w/api.php
|
||||
http://gta.neoseeker.com/w/api.php
|
||||
http://gta5.neoseeker.com/w/api.php
|
||||
http://guitarhero.neoseeker.com/w/api.php
|
||||
http://gundam.neoseeker.com/w/api.php
|
||||
http://halflife.neoseeker.com/w/api.php
|
||||
http://halo.neoseeker.com/w/api.php
|
||||
http://harrypotter.neoseeker.com/w/api.php
|
||||
http://haruhi.neoseeker.com/w/api.php
|
||||
http://harvestmoon.neoseeker.com/w/api.php
|
||||
http://hearthstone.neoseeker.com/w/api.php
|
||||
http://heavyrain.neoseeker.com/w/api.php
|
||||
http://heroesofruin.neoseeker.com/w/api.php
|
||||
http://hitman.neoseeker.com/w/api.php
|
||||
http://house.neoseeker.com/w/api.php
|
||||
http://hungergames.neoseeker.com/w/api.php
|
||||
http://infamous.neoseeker.com/w/api.php
|
||||
http://inheritance.neoseeker.com/w/api.php
|
||||
http://inuyasha.neoseeker.com/w/api.php
|
||||
http://jakdaxter.neoseeker.com/w/api.php
|
||||
http://kairosoft.neoseeker.com/w/api.php
|
||||
http://kidicarus.neoseeker.com/w/api.php
|
||||
http://kingdomhearts.neoseeker.com/w/api.php
|
||||
http://kirby.neoseeker.com/w/api.php
|
||||
http://koa.neoseeker.com/w/api.php
|
||||
http://layton.neoseeker.com/w/api.php
|
||||
http://leagueoflegends.neoseeker.com/w/api.php
|
||||
http://legendofdragoon.neoseeker.com/w/api.php
|
||||
http://littlebigplanet.neoseeker.com/w/api.php
|
||||
http://lotr.neoseeker.com/w/api.php
|
||||
http://magicalstarsign.neoseeker.com/w/api.php
|
||||
http://maplestory.neoseeker.com/w/api.php
|
||||
http://mario.neoseeker.com/w/api.php
|
||||
http://masseffect.neoseeker.com/w/api.php
|
||||
http://megaman.neoseeker.com/w/api.php
|
||||
http://megamitensei.neoseeker.com/w/api.php
|
||||
http://metalgear.neoseeker.com/w/api.php
|
||||
http://metroid.neoseeker.com/w/api.php
|
||||
http://minecraft.neoseeker.com/w/api.php
|
||||
http://monsterhunter.neoseeker.com/w/api.php
|
||||
http://mortalkombat.neoseeker.com/w/api.php
|
||||
http://mother.neoseeker.com/w/api.php
|
||||
http://mtg.neoseeker.com/w/api.php
|
||||
http://mylittlepony.neoseeker.com/w/api.php
|
||||
http://naruto.neoseeker.com/w/api.php
|
||||
http://ncis.neoseeker.com/w/api.php
|
||||
http://needforspeed.neoseeker.com/w/api.php
|
||||
http://neopets.neoseeker.com/w/api.php
|
||||
http://ninjagaiden.neoseeker.com/w/api.php
|
||||
http://ninokuni.neoseeker.com/w/api.php
|
||||
http://okami.neoseeker.com/w/api.php
|
||||
http://onepiece.neoseeker.com/w/api.php
|
||||
http://persona.neoseeker.com/w/api.php
|
||||
http://pes.neoseeker.com/w/api.php
|
||||
http://pikmin.neoseeker.com/w/api.php
|
||||
http://pokemon.neoseeker.com/w/api.php
|
||||
http://princeofpersia.neoseeker.com/w/api.php
|
||||
http://ratchetclank.neoseeker.com/w/api.php
|
||||
http://reborn.neoseeker.com/w/api.php
|
||||
http://residentevil.neoseeker.com/w/api.php
|
||||
http://resonance.neoseeker.com/w/api.php
|
||||
http://rockband.neoseeker.com/w/api.php
|
||||
http://rpgmaker.neoseeker.com/w/api.php
|
||||
http://runefactory.neoseeker.com/w/api.php
|
||||
http://runescape.neoseeker.com/w/api.php
|
||||
http://sandbox.neoseeker.com/w/api.php
|
||||
http://scottpilgrim.neoseeker.com/w/api.php
|
||||
http://shadowofthecolossus.neoseeker.com/w/api.php
|
||||
http://shadowrunreturns.neoseeker.com/w/api.php
|
||||
http://shenmue.neoseeker.com/w/api.php
|
||||
http://simpsons.neoseeker.com/w/api.php
|
||||
http://skate.neoseeker.com/w/api.php
|
||||
http://skylanders.neoseeker.com/w/api.php
|
||||
http://skyrim.neoseeker.com/w/api.php
|
||||
http://slycooper.neoseeker.com/w/api.php
|
||||
http://smackdown.neoseeker.com/w/api.php
|
||||
http://smashbros.neoseeker.com/w/api.php
|
||||
http://sonic.neoseeker.com/w/api.php
|
||||
http://soulcalibur.neoseeker.com/w/api.php
|
||||
http://souleater.neoseeker.com/w/api.php
|
||||
http://spiderman.neoseeker.com/w/api.php
|
||||
http://spongebob.neoseeker.com/w/api.php
|
||||
http://spyro.neoseeker.com/w/api.php
|
||||
http://starcraft.neoseeker.com/w/api.php
|
||||
http://starfox.neoseeker.com/w/api.php
|
||||
http://stargate.neoseeker.com/w/api.php
|
||||
http://starocean.neoseeker.com/w/api.php
|
||||
http://starwars.neoseeker.com/w/api.php
|
||||
http://streetfighter.neoseeker.com/w/api.php
|
||||
http://tales.neoseeker.com/w/api.php
|
||||
http://tekken.neoseeker.com/w/api.php
|
||||
http://terraria.neoseeker.com/w/api.php
|
||||
http://thedarkness.neoseeker.com/w/api.php
|
||||
http://thesims.neoseeker.com/w/api.php
|
||||
http://thewarriors.neoseeker.com/w/api.php
|
||||
http://theworldendswithyou.neoseeker.com/w/api.php
|
||||
http://thief.neoseeker.com/w/api.php
|
||||
http://timesplitters.neoseeker.com/w/api.php
|
||||
http://tonyhawk.neoseeker.com/w/api.php
|
||||
http://twilight.neoseeker.com/w/api.php
|
||||
http://twistedmetal.neoseeker.com/w/api.php
|
||||
http://uncharted.neoseeker.com/w/api.php
|
||||
http://valkyriachronicles.neoseeker.com/w/api.php
|
||||
http://vivapinata.neoseeker.com/w/api.php
|
||||
http://wakfu.neoseeker.com/w/api.php
|
||||
http://warcraft.neoseeker.com/w/api.php
|
||||
http://warhammer.neoseeker.com/w/api.php
|
||||
http://watchdogs.neoseeker.com/w/api.php
|
||||
http://whiteknightchronicles.neoseeker.com/w/api.php
|
||||
http://wikiguides.neoseeker.com/w/api.php
|
||||
http://wow.neoseeker.com/w/api.php
|
||||
http://xenoblade.neoseeker.com/w/api.php
|
||||
http://yugioh.neoseeker.com/w/api.php
|
||||
http://zelda.neoseeker.com/w/api.php
|
||||
https://24.neoseeker.com/w/api.php
|
||||
https://aceattorney.neoseeker.com/w/api.php
|
||||
https://advancewars.neoseeker.com/w/api.php
|
||||
https://adventuretime.neoseeker.com/w/api.php
|
||||
https://animalcrossing.neoseeker.com/w/api.php
|
||||
https://attackontitan.neoseeker.com/w/api.php
|
||||
https://avatar.neoseeker.com/w/api.php
|
||||
https://banished.neoseeker.com/w/api.php
|
||||
https://banjokazooie.neoseeker.com/w/api.php
|
||||
https://batman.neoseeker.com/w/api.php
|
||||
https://battlefield.neoseeker.com/w/api.php
|
||||
https://bioshock.neoseeker.com/w/api.php
|
||||
https://bleach.neoseeker.com/w/api.php
|
||||
https://boktai.neoseeker.com/w/api.php
|
||||
https://bond.neoseeker.com/w/api.php
|
||||
https://borderlands.neoseeker.com/w/api.php
|
||||
https://boundbyflame.neoseeker.com/w/api.php
|
||||
https://bravely.neoseeker.com/w/api.php
|
||||
https://breathoffire.neoseeker.com/w/api.php
|
||||
https://brink.neoseeker.com/w/api.php
|
||||
https://callofduty.neoseeker.com/w/api.php
|
||||
https://castlecrashers.neoseeker.com/w/api.php
|
||||
https://castlevania.neoseeker.com/w/api.php
|
||||
https://childoflight.neoseeker.com/w/api.php
|
||||
https://chrono.neoseeker.com/w/api.php
|
||||
https://cnc.neoseeker.com/w/api.php
|
||||
https://cowboybebop.neoseeker.com/w/api.php
|
||||
https://crash.neoseeker.com/w/api.php
|
||||
https://crossedge.neoseeker.com/w/api.php
|
||||
https://cubeworld.neoseeker.com/w/api.php
|
||||
https://danganronpa.neoseeker.com/w/api.php
|
||||
https://darksouls.neoseeker.com/w/api.php
|
||||
https://deadoralive.neoseeker.com/w/api.php
|
||||
https://deathnote.neoseeker.com/w/api.php
|
||||
https://demonssouls.neoseeker.com/w/api.php
|
||||
https://destiny.neoseeker.com/w/api.php
|
||||
https://devilmaycry.neoseeker.com/w/api.php
|
||||
https://digimon.neoseeker.com/w/api.php
|
||||
https://disgaea.neoseeker.com/w/api.php
|
||||
https://doctorwho.neoseeker.com/w/api.php
|
||||
https://donkeykong.neoseeker.com/w/api.php
|
||||
https://doom.neoseeker.com/w/api.php
|
||||
https://dothack.neoseeker.com/w/api.php
|
||||
https://doujin.neoseeker.com/w/api.php
|
||||
https://dragonage.neoseeker.com/w/api.php
|
||||
https://dragonball.neoseeker.com/w/api.php
|
||||
https://dragonquest.neoseeker.com/w/api.php
|
||||
https://dragonsdogma.neoseeker.com/w/api.php
|
||||
https://dynastywarriors.neoseeker.com/w/api.php
|
||||
https://elderscrolls.neoseeker.com/w/api.php
|
||||
https://endlessocean.neoseeker.com/w/api.php
|
||||
https://evangelion.neoseeker.com/w/api.php
|
||||
https://fable.neoseeker.com/w/api.php
|
||||
https://fairytail.neoseeker.com/w/api.php
|
||||
https://fallout.neoseeker.com/w/api.php
|
||||
https://familyguy.neoseeker.com/w/api.php
|
||||
https://fatalfury.neoseeker.com/w/api.php
|
||||
https://fifa.neoseeker.com/w/api.php
|
||||
https://finalfantasy.neoseeker.com/w/api.php
|
||||
https://fireemblem.neoseeker.com/w/api.php
|
||||
https://footballmanager.neoseeker.com/w/api.php
|
||||
https://fullmetalalchemist.neoseeker.com/w/api.php
|
||||
https://futurama.neoseeker.com/w/api.php
|
||||
https://fzero.neoseeker.com/w/api.php
|
||||
https://gearsofwar.neoseeker.com/w/api.php
|
||||
https://glee.neoseeker.com/w/api.php
|
||||
https://godofwar.neoseeker.com/w/api.php
|
||||
https://goldensun.neoseeker.com/w/api.php
|
||||
https://granturismo.neoseeker.com/w/api.php
|
||||
https://growlanser.neoseeker.com/w/api.php
|
||||
https://gta.neoseeker.com/w/api.php
|
||||
https://gta5.neoseeker.com/w/api.php
|
||||
https://guitarhero.neoseeker.com/w/api.php
|
||||
https://gundam.neoseeker.com/w/api.php
|
||||
https://halflife.neoseeker.com/w/api.php
|
||||
https://halo.neoseeker.com/w/api.php
|
||||
https://harrypotter.neoseeker.com/w/api.php
|
||||
https://haruhi.neoseeker.com/w/api.php
|
||||
https://harvestmoon.neoseeker.com/w/api.php
|
||||
https://hearthstone.neoseeker.com/w/api.php
|
||||
https://heavyrain.neoseeker.com/w/api.php
|
||||
https://heroesofruin.neoseeker.com/w/api.php
|
||||
https://hitman.neoseeker.com/w/api.php
|
||||
https://house.neoseeker.com/w/api.php
|
||||
https://hungergames.neoseeker.com/w/api.php
|
||||
https://infamous.neoseeker.com/w/api.php
|
||||
https://inheritance.neoseeker.com/w/api.php
|
||||
https://inuyasha.neoseeker.com/w/api.php
|
||||
https://jakdaxter.neoseeker.com/w/api.php
|
||||
https://kairosoft.neoseeker.com/w/api.php
|
||||
https://kidicarus.neoseeker.com/w/api.php
|
||||
https://kingdomhearts.neoseeker.com/w/api.php
|
||||
https://kirby.neoseeker.com/w/api.php
|
||||
https://koa.neoseeker.com/w/api.php
|
||||
https://layton.neoseeker.com/w/api.php
|
||||
https://leagueoflegends.neoseeker.com/w/api.php
|
||||
https://legendofdragoon.neoseeker.com/w/api.php
|
||||
https://littlebigplanet.neoseeker.com/w/api.php
|
||||
https://lotr.neoseeker.com/w/api.php
|
||||
https://magicalstarsign.neoseeker.com/w/api.php
|
||||
https://maplestory.neoseeker.com/w/api.php
|
||||
https://mario.neoseeker.com/w/api.php
|
||||
https://masseffect.neoseeker.com/w/api.php
|
||||
https://megaman.neoseeker.com/w/api.php
|
||||
https://megamitensei.neoseeker.com/w/api.php
|
||||
https://metalgear.neoseeker.com/w/api.php
|
||||
https://metroid.neoseeker.com/w/api.php
|
||||
https://minecraft.neoseeker.com/w/api.php
|
||||
https://monsterhunter.neoseeker.com/w/api.php
|
||||
https://mortalkombat.neoseeker.com/w/api.php
|
||||
https://mother.neoseeker.com/w/api.php
|
||||
https://mtg.neoseeker.com/w/api.php
|
||||
https://mylittlepony.neoseeker.com/w/api.php
|
||||
https://naruto.neoseeker.com/w/api.php
|
||||
https://ncis.neoseeker.com/w/api.php
|
||||
https://needforspeed.neoseeker.com/w/api.php
|
||||
https://neopets.neoseeker.com/w/api.php
|
||||
https://ninjagaiden.neoseeker.com/w/api.php
|
||||
https://ninokuni.neoseeker.com/w/api.php
|
||||
https://okami.neoseeker.com/w/api.php
|
||||
https://onepiece.neoseeker.com/w/api.php
|
||||
https://persona.neoseeker.com/w/api.php
|
||||
https://pes.neoseeker.com/w/api.php
|
||||
https://pikmin.neoseeker.com/w/api.php
|
||||
https://pokemon.neoseeker.com/w/api.php
|
||||
https://princeofpersia.neoseeker.com/w/api.php
|
||||
https://ratchetclank.neoseeker.com/w/api.php
|
||||
https://reborn.neoseeker.com/w/api.php
|
||||
https://residentevil.neoseeker.com/w/api.php
|
||||
https://resonance.neoseeker.com/w/api.php
|
||||
https://rockband.neoseeker.com/w/api.php
|
||||
https://rpgmaker.neoseeker.com/w/api.php
|
||||
https://runefactory.neoseeker.com/w/api.php
|
||||
https://runescape.neoseeker.com/w/api.php
|
||||
https://sandbox.neoseeker.com/w/api.php
|
||||
https://scottpilgrim.neoseeker.com/w/api.php
|
||||
https://shadowofthecolossus.neoseeker.com/w/api.php
|
||||
https://shadowrunreturns.neoseeker.com/w/api.php
|
||||
https://shenmue.neoseeker.com/w/api.php
|
||||
https://simpsons.neoseeker.com/w/api.php
|
||||
https://skate.neoseeker.com/w/api.php
|
||||
https://skylanders.neoseeker.com/w/api.php
|
||||
https://skyrim.neoseeker.com/w/api.php
|
||||
https://slycooper.neoseeker.com/w/api.php
|
||||
https://smackdown.neoseeker.com/w/api.php
|
||||
https://smashbros.neoseeker.com/w/api.php
|
||||
https://sonic.neoseeker.com/w/api.php
|
||||
https://soulcalibur.neoseeker.com/w/api.php
|
||||
https://souleater.neoseeker.com/w/api.php
|
||||
https://spiderman.neoseeker.com/w/api.php
|
||||
https://spongebob.neoseeker.com/w/api.php
|
||||
https://spyro.neoseeker.com/w/api.php
|
||||
https://starcraft.neoseeker.com/w/api.php
|
||||
https://starfox.neoseeker.com/w/api.php
|
||||
https://stargate.neoseeker.com/w/api.php
|
||||
https://starocean.neoseeker.com/w/api.php
|
||||
https://starwars.neoseeker.com/w/api.php
|
||||
https://streetfighter.neoseeker.com/w/api.php
|
||||
https://tales.neoseeker.com/w/api.php
|
||||
https://tekken.neoseeker.com/w/api.php
|
||||
https://terraria.neoseeker.com/w/api.php
|
||||
https://thedarkness.neoseeker.com/w/api.php
|
||||
https://thesims.neoseeker.com/w/api.php
|
||||
https://thewarriors.neoseeker.com/w/api.php
|
||||
https://theworldendswithyou.neoseeker.com/w/api.php
|
||||
https://thief.neoseeker.com/w/api.php
|
||||
https://timesplitters.neoseeker.com/w/api.php
|
||||
https://tonyhawk.neoseeker.com/w/api.php
|
||||
https://twilight.neoseeker.com/w/api.php
|
||||
https://twistedmetal.neoseeker.com/w/api.php
|
||||
https://uncharted.neoseeker.com/w/api.php
|
||||
https://valkyriachronicles.neoseeker.com/w/api.php
|
||||
https://vivapinata.neoseeker.com/w/api.php
|
||||
https://wakfu.neoseeker.com/w/api.php
|
||||
https://warcraft.neoseeker.com/w/api.php
|
||||
https://warhammer.neoseeker.com/w/api.php
|
||||
https://watchdogs.neoseeker.com/w/api.php
|
||||
https://whiteknightchronicles.neoseeker.com/w/api.php
|
||||
https://wikiguides.neoseeker.com/w/api.php
|
||||
https://wow.neoseeker.com/w/api.php
|
||||
https://xenoblade.neoseeker.com/w/api.php
|
||||
https://yugioh.neoseeker.com/w/api.php
|
||||
https://zelda.neoseeker.com/w/api.php
|
@ -1,8 +1,8 @@
|
||||
Wikifarm: http://neowiki.neoseeker.com/wiki/Main_Page
|
||||
Last update: 2017-06-30
|
||||
Last update: 2022-04-12
|
||||
|
||||
Details:
|
||||
|
||||
There is a dynamic list http://neowiki.neoseeker.com/wiki/Special:WikiList
|
||||
|
||||
Run script: python neoseeker-spider.py > newlist
|
||||
Run script: python3 neoseeker-spider.py
|
||||
|
@ -1,55 +1,74 @@
|
||||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (C) 2014 WikiTeam developers
|
||||
# Copyright (C) 2022 Simon Liu
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import requests
|
||||
import time
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
|
||||
def main():
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0',
|
||||
ids, wikis = [], []
|
||||
gcont = 'tmp'
|
||||
url = 'http://www.shoutwiki.com/w/api.php'
|
||||
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0'}
|
||||
|
||||
# grab wiki pages
|
||||
params = {
|
||||
'action': 'query',
|
||||
'format': 'json',
|
||||
'prop': 'info',
|
||||
'generator': 'categorymembers',
|
||||
'inprop': 'url',
|
||||
'gcmtitle': 'Category:Flat_list_of_all_wikis',
|
||||
'gcmlimit': 'max'
|
||||
}
|
||||
while gcont:
|
||||
if gcont != 'tmp':
|
||||
params['gcmcontinue'] = gcont
|
||||
json = requests.get(url, params=params, headers=headers).json()
|
||||
gcont = json['continue']['gcmcontinue'] if 'continue' in json else ''
|
||||
query = json['query']['pages']
|
||||
for wiki in query:
|
||||
ids.append(wiki)
|
||||
|
||||
# grab wiki API
|
||||
params = {
|
||||
'action': 'query',
|
||||
'format': 'json',
|
||||
'prop': 'revisions',
|
||||
'formatversion': '2',
|
||||
'rvprop': 'content',
|
||||
'rvslots': '*'
|
||||
}
|
||||
swfrom = 1
|
||||
swlimit = 500
|
||||
while swfrom:
|
||||
params = {
|
||||
'action': 'listwikis',
|
||||
'swfrom': swfrom,
|
||||
'swlimit': swlimit,
|
||||
'format': 'json',
|
||||
}
|
||||
url = 'http://www.shoutwiki.com/w/api.php'
|
||||
r = requests.get(url, params=params, headers=headers)
|
||||
jsonsites = json.loads(r.text)
|
||||
|
||||
for site in jsonsites['query']['listwikis']:
|
||||
siteid = int(site['id'])
|
||||
siteurl = site['url']
|
||||
print siteurl
|
||||
|
||||
if len(jsonsites['query']['listwikis']) == int(swlimit):
|
||||
#there are more
|
||||
swfrom = siteid + 1
|
||||
else:
|
||||
swfrom = ''
|
||||
|
||||
time.sleep(random.randint(3,10))
|
||||
|
||||
for n in tqdm(range(0, len(ids), 50)):
|
||||
params['pageids'] = '|'.join(ids[n:n+50])
|
||||
json = requests.get(url, params=params, headers=headers).json()
|
||||
|
||||
for wiki in json['query']['pages']:
|
||||
for val in wiki['revisions'][0]['slots']['main']['content'].split('\n|'):
|
||||
if 'subdomain' in val:
|
||||
wikis.append('http://%s.shoutwiki.com/w/api.php' % val.split('subdomain =')[-1].strip())
|
||||
break
|
||||
|
||||
time.sleep(0.3)
|
||||
wikis = list(set(wikis))
|
||||
wikis.sort()
|
||||
|
||||
with open('shoutwiki.com', 'w') as f:
|
||||
f.write('\n'.join(wikis))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,38 +1,43 @@
|
||||
#!/usr/bin/env python2
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright (C) 2014 WikiTeam developers
|
||||
# Copyright (C) 2014-2022 WikiTeam developers
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import re
|
||||
import requests
|
||||
from urllib import parse
|
||||
|
||||
def main():
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:24.0) Gecko/20100101 Firefox/24.0',
|
||||
}
|
||||
|
||||
|
||||
urls = [
|
||||
'http://www.wiki.co.il/active-wiki-all.html',
|
||||
'http://www.wiki.co.il/active-wiki-all.html',
|
||||
'http://www.wiki.co.il/active-wiki-en.html',
|
||||
]
|
||||
wikis = []
|
||||
for url in urls:
|
||||
r = requests.get(url, headers=headers)
|
||||
raw = r.text
|
||||
m = re.findall(ur'<td><a href="([^>]+?)"', raw)
|
||||
for i in m:
|
||||
print i
|
||||
|
||||
req = requests.get(url, headers=headers)
|
||||
wikis.extend(re.findall(r'<td><a href="([^>]+?)"', req.text))
|
||||
|
||||
wikis = list(set(wikis))
|
||||
wikis.sort()
|
||||
with open('wiki-site.com', 'w') as f:
|
||||
for wiki in wikis:
|
||||
f.write(parse.urljoin(wiki, 'api.php') + '\n')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue