diff --git a/listsofwikis/wikispaces/users.txt b/listsofwikis/wikispaces/users.txt new file mode 100644 index 0000000..7f46df9 --- /dev/null +++ b/listsofwikis/wikispaces/users.txt @@ -0,0 +1,121 @@ +AliciaWaters,1 +Arenoosh,1 +BambuNatural,1 +BlackSheepInn,0 +BlancaRobleda,1 +BleachTeach,1 +Chase.Pereira,1 +Dan.Paleczny,1 +Dancombs98,1 +Deborah.McLaren,1 +DeborahMcLaren,0 +Diegoc93,1 +EVbusinessteacher,1 +Eurapart,0 +JessieRS,1 +JessikaTate,1 +JohnNobilski,1 +Joserios11,1 +Jrios885,1 +JuliaSanabria,1 +Justin.Dabill,1 +KevinGough11,1 +KevinMPA,1 +Lduncan107,1 +LilTlaloc,2 +MBKlein,1 +MFierros,1 +MIGUEOAX,1 +Marlenehrenberg,2 +MayraVazquez1,1 +Melissa63,2 +Moy1976,1 +MrPalmer67,1 +RondaGreen,1 +Ruukel,0 +SamanthaElizabeth,2 +ScottOsterholt1,1 +TylerZybach-DeBoer,1 +WINTAwiki,1 +Xixim,1 +abehl,1 +albabcn,1 +alex.villca,1 +andydrumm,0 +annafoster21,1 +annagmoore,1 +annaspenceley,1 +aseremomax,1 +ashleyrownd123,1 +astronomyteacher,5 +avillicana687,1 +ayuukchacha,1 +ayuukoax,1 +becari,1 +becaricampusqroaxaca,0 +biancagchan,1 +bicicletaspedromartinez,1 +bugambilias,1 +businesscoordinator,1 +bwaters23,2 +camatchitral,0 +carriehurtado,1 +celinabalasoto,1 +chacorunner,1 +charlotten22,1 +charolains,1 +chrismilnes,1 +chtopete,3 +consultoriaindigenaoaxaca,1 +cristinamartinez8,0 +despacharte,1 +dvgovteacher,1 +dvgovteacher1,1 +ecabanilla,1 +edgarbartolo,1 +edgarraygoza95,1 +englishcoordinator,1 +envia,1 +fcummings294,1 +florencio,1 +geoffb1,1 +georginatrout,1 +gerhardbuttner,2 +gregshirley,1 +hermantyler,1 +insitu1,1 +institutoamigosdelsol,1 +jamigo55,1 +jgonzalez631,1 +joannazemla,1 +joshdkirby,1 +justinrieger22,1 +katabel,1 +krestow,2 +lasmariposas,1 +liliacoronel,1 +lindaramirez3,1 +louisebranch,1 +ltimrott,1 +lulaa,1 +mariamcclain,1 +matthewmucha24,1 +nutti,1 +oaxdave,1 +oddyeti,1 +ojoqtv,1 +patwilson2,1 +planeta,0 +raylorscheider,1 +raymondkuntz,1 +respontour,0 +salliegrayson,1 +sandraluz2,1 +sergiolazomendoza,1 +sherrilivingston,1 +susanbeanaycock,1 +thistourismweek,1 +timeunlimited,1 +turismooaxaca,1 +victoria.alahuzos,1 +willcorning,1 \ No newline at end of file diff --git a/listsofwikis/wikispaces/wikis.txt b/listsofwikis/wikispaces/wikis.txt new file mode 100644 index 0000000..c006943 --- /dev/null +++ b/listsofwikis/wikispaces/wikis.txt @@ -0,0 +1,11 @@ +astronomylinks,35 +drkrestow,1 +dvaceacademy,16 +dvapphysics,1 +dvsra,1 +enviabusiness,4 +enviaenglish,8 +gccastronomy,1 +martiangovernment,2 +oaxaca,31 +planeta,35 \ No newline at end of file diff --git a/listsofwikis/wikispaces/wikispaces-spider.py b/listsofwikis/wikispaces/wikispaces-spider.py new file mode 100644 index 0000000..812aa03 --- /dev/null +++ b/listsofwikis/wikispaces/wikispaces-spider.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Copyright (C) 2016 wikiTeam +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import csv +import re +import time +import urllib2 + +def loadUsers(): + users = {} + f = open('users.txt', 'r') + for x in f.read().strip().splitlines(): + username = x.split(',')[0] + numwikis = x.split(',')[1] + users[username] = numwikis + f.close() + return users + +def loadWikis(): + wikis = {} + f = open('wikis.txt', 'r') + for x in f.read().strip().splitlines(): + wikiname = x.split(',')[0] + numusers = x.split(',')[1] + wikis[wikiname] = numusers + f.close() + return wikis + +def saveUsers(users): + f = open('users.txt', 'w') + output = [u'%s,%s' % (x, y) for x, y in users.items()] + output.sort() + output = u'\n'.join(output) + f.write(output.encode('utf-8')) + f.close() + +def saveWikis(wikis): + f = open('wikis.txt', 'w') + output = [u'%s,%s' % (x, y) for x, y in wikis.items()] + output.sort() + output = u'\n'.join(output) + f.write(output.encode('utf-8')) + f.close() + +def getUsers(wiki): + wikiurl = 'https://%s.wikispaces.com/wiki/members?utable=WikiTableMemberList&ut_csv=1' % (wiki) + try: + wikireq = urllib2.Request(wikiurl, headers={ 'User-Agent': 'Mozilla/5.0' }) + wikicsv = urllib2.urlopen(wikireq) + reader = csv.reader(wikicsv, delimiter=',', quotechar='"') + headers = next(reader, None) + usersfound = {} + for row in reader: + usersfound[row[0]] = u'?' + return usersfound + except: + print 'Error reading', wikiurl + return {} + +def getWikis(user): + wikiurl = 'https://www.wikispaces.com/user/view/%s' % (user) + try: + wikireq = urllib2.Request(wikiurl, headers={ 'User-Agent': 'Mozilla/5.0' }) + html = urllib2.urlopen(wikireq).read() + if 'Wikis: ' in html: + html = html.split('Wikis: ')[1].split('')[0] + wikisfound = {} + for x in re.findall(ur'', html): + wikisfound[x] = u'?' + return wikisfound + return {} + except: + print 'Error reading', wikiurl + return {} + +def main(): + users = loadUsers() + wikis = loadWikis() + + usersc = len(users) + wikisc = len(wikis) + print 'Loading files' + print 'Loaded', usersc, 'users' + print 'Loaded', wikisc, 'wikis' + + # find more users + print 'Scanning wikis for more users' + for wiki, numusers in wikis.items(): + if numusers != '?': #we have scanned this wiki before, skiping + continue + print 'Scanning https://%s.wikispaces.com for users' % (wiki) + users2 = getUsers(wiki) + wikis[wiki] = len(users2) + c = 0 + for x2, y2 in users2.items(): + if x2 not in users.keys(): + users[x2] = u'?' + c += 1 + print 'Found %s new users' % (c) + if c > 0: + saveUsers(users) + users = loadUsers() + saveWikis(wikis) + time.sleep(1) + wikis = loadWikis() + + # find more wikis + print 'Scanning users for more wikis' + for user, numwikis in users.items(): + if numwikis != '?': #we have scanned this user before, skiping + continue + print 'Scanning https://www.wikispaces.com/user/view/%s for wikis' % (user) + wikis2 = getWikis(user) + users[user] = len(wikis2) + c = 0 + for x2, y2 in wikis2.items(): + if x2 not in wikis.keys(): + wikis[x2] = u'?' + c += 1 + print 'Found %s new wikis' % (c) + if c > 0: + saveWikis(wikis) + wikis = loadWikis() + saveUsers(users) + time.sleep(1) + users = loadUsers() + + print '\nSummary:' + print 'Found', len(users)-usersc, 'new users' + print 'Found', len(wikis)-wikisc, 'new wikis' + +if __name__ == '__main__': + main()