added a user-agent and another search string

git-svn-id: https://wikiteam.googlecode.com/svn/trunk@924 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95
pull/117/head
scottdb56 10 years ago
parent 009682a037
commit 51ee9e9847

@ -1,13 +1,13 @@
#!/usr/bin/perl
# Name: checkalive.pl v1.1
# Name: checkalive.pl v1.2
# Description: This script will go thru a list of URLs & determine
# if they are online & if they are Mediawiki wikis. It should work
# with: "/index.php/Main_Page", "index.php", "api.php" and even pages
# such as: "/wiki/Pagina_principale".
#
# Created: 12/14/2013
# Most recently updated: 01/25/2014 (It's a work-in-progress...)
# Most recently updated: 01/26/2014 (It's a work-in-progress...)
# Copyright (c) 2013-2014 by Scott D. Boyd - scottdb56@gmail.com
# ====================================================================
# This program is free software: you can redistribute it and/or modify
@ -30,16 +30,18 @@ use LWP::Simple;
use LWP::UserAgent;
use Crypt::SSLeay;
my $slp=2; # You can change this number for seconds to sleep between requests (currently 2 seconds)
my $urllist="URL-list.txt";
my $urllist="my-URL-list.txt";
my $alivelist="alive-wikis.txt";
my $deadlist="dead-wikis.txt";
my $pwrdby1="Powered by MediaWiki";
my $pwrdby2="poweredby_mediawiki";
my $genmw="meta name=\"generator\" content=\"MediaWiki"
my $mwapi="MediaWiki API documentation page";
my $lw=0; my $dw=0;
my $a=1; my $b=0; my $c=0;
my $flag=0;
my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/5.0"); # use this user-agent to get into wikis that block spiders & robots
$ua->timeout(30);
$ua->show_progress(1);
@ -97,6 +99,10 @@ sub ParsePage {
print "It's alive and powered by Mediawiki\n"; # or: "poweredby_mediawiki"
print ALIVEFILE "$url\n"; # then it's a MediaWiki wiki
$flag=1;$lw++;
} elsif ($doc=~/$genmw/i) { # if the content generator is MediaWiki
print "It's alive and powered by Mediawiki\n"; # then it's a MediaWiki wiki
print ALIVEFILE "$url\n";
$flag=1;$lw++;
} elsif ($doc=~/$mwapi/i) { # if the api.php contains: "MediaWiki API documentation page"
print "It's alive and powered by Mediawiki\n"; # then it's a MediaWiki wiki
print ALIVEFILE "$url\n";

Loading…
Cancel
Save