wikiapiary bots

8 years ago · 6dabef5980
parent 5db991bfbb
commit 6dabef5980
2 changed files with 145 additions and 0 deletions
--- a/wikiapiary/wikiapiary-update-ia-params.py
+++ b/wikiapiary/wikiapiary-update-ia-params.py
@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (C) 2016 WikiTeam
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import re
+import urllib
+
+import pywikibot
+from pywikibot import pagegenerators
+
+def main():
+    site = pywikibot.Site('wikiapiary', 'wikiapiary')
+    catname = 'Category:Website'
+    cat = pywikibot.Category(site, catname)
+    gen = pagegenerators.CategorizedPageGenerator(cat)
+    pre = pagegenerators.PreloadingGenerator(gen)
+    
+    for page in pre:
+        if page.isRedirectPage():
+            continue
+        
+        wtitle = page.title()
+        wtext = page.text
+                
+        if re.search('Internet Archive', wtext):
+            #print('It has IA parameter')
+            pass
+        else:
+            print('\n','#'*50,'\n',wtitle,'\n','#'*50)
+            print('https://wikiapiary.com/wiki/%s' % (re.sub(' ', ',¡_', wtitle)))
+            print('Missing IA parameter')
+            
+            if re.search(r'(?i)API URL=http', wtext):
+                apiurl = re.findall(r'(?i)API URL=(http[^\n]+?)\n', wtext)[0]
+                print('API:', apiurl)
+            else:
+                print('No API found in WikiApiary, skiping')
+                continue
+            
+            urliasearch = 'https://archive.org/search.php?query=originalurl:"%s"' % (apiurl)
+            f = urllib.request.urlopen(urliasearch)
+            raw = f.read().decode('utf-8')
+            if re.search(r'(?i)Your search did not match any items', raw):
+                print('No dumps found at Internet Archive')
+            else:
+                itemidentifier = re.findall(r'<a href="/details/([^ ]+?)" title=', raw)[0]
+                itemurl = 'https://archive.org/details/%s' % (itemidentifier)
+                print('Item found:',itemurl)
+                
+                metaurl = 'https://archive.org/download/%s/%s_files.xml' % (itemidentifier, itemidentifier)
+                g = urllib.request.urlopen(metaurl)
+                raw2 = g.read().decode('utf-8')
+                itemfiles = re.findall(r'(?im)<file name="[^ ]+-(\d{8})-[^ ]+" source="original">\s*<mtime>\d+</mtime>\s*<size>(\d+)</size>', raw2)
+                itemfiles = [[int(x), int(y)] for x, y in itemfiles]
+                itemfiles.sort(reverse=True)
+                print(itemfiles)
+                itemdate = str(itemfiles[0][0])[0:4] + '/' + str(itemfiles[0][0])[4:6] + '/' + str(itemfiles[0][0])[6:8]
+                itemsize = itemfiles[0][1]
+                
+                iaparams = """|Internet Archive identifier=%s
+|Internet Archive URL=%s
+|Internet Archive added date=%s 00:00:00 
+|Internet Archive file size=%s""" % (itemidentifier, itemurl, itemdate, itemsize)
+                newtext = page.text
+                newtext = re.sub(r'(?im)\}\}\n', '%s\n}}\n' % (iaparams), newtext)
+                pywikibot.showDiff(page.text, newtext)
+                page.text = newtext
+                page.save('BOT - Adding dump details: %s, %s, %s bytes' % (itemidentifier, itemdate, itemsize))
+    
+if __name__ == "__main__":
+    main()
+
--- a/wikiapiary/wikiapiary_family.py
+++ b/wikiapiary/wikiapiary_family.py
@ -0,0 +1,61 @@
+# -*- coding: utf-8  -*-
+"""Family module for WikiApiary wiki."""
+from __future__ import unicode_literals
+
+__version__ = '$Id: 8c9856dd7c0af8d400d0d95b00bf406002729008 $'
+
+from pywikibot import family
+
+# The MediaWiki family
+# user-config.py: usernames['wikiapiary']['wikiapiary'] = 'User name'
+class Family(family.WikimediaFamily):
+
+    """Family module for WikiApiary wiki."""
+
+    def __init__(self):
+        """Constructor."""
+        super(Family, self).__init__()
+        self.name = 'wikiapiary'
+
+        self.langs = {
+            'wikiapiary': 'wikiapiary.com',
+        }
+
+        # Wikimedia wikis all use "bodyContent" as the id of the <div>
+        # element that contains the actual page content; change this for
+        # wikis that use something else (e.g., mozilla family)
+        self.content_id = "bodyContent"
+
+    def scriptpath(self, code):
+        """The prefix used to locate scripts on this wiki.
+
+        This is the value displayed when you enter {{SCRIPTPATH}} on a
+        wiki page (often displayed at [[Help:Variables]] if the wiki has
+        copied the master help page correctly).
+
+        The default value is the one used on Wikimedia Foundation wikis,
+        but needs to be overridden in the family file for any wiki that
+        uses a different value.
+
+        """
+        return '/w'
+
+    # Which version of MediaWiki is used? REQUIRED
+    def version(self, code):
+        # Replace with the actual version being run on your wiki
+        return '1.25.3'
+
+    def code2encoding(self, code):
+        """Return the encoding for a specific language wiki"""
+        # Most wikis nowadays use UTF-8, but change this if yours uses
+        # a different encoding
+        return 'utf-8'
+    
+    def path(self, code):
+        return '/w/index.php'
+
+    def apipath(self, code):
+        return '/w/api.php'
+
+    def protocol(self, code):
+        return 'HTTPS'