commit 689a144824199277abf25dc4c0db609d307bab3e
Author: Kornel Benko <[email protected]>
Date: Sun Jan 3 15:25:09 2016 +0100
Cmake url tests: Use more sophiticated check for urls.
diff --git a/development/checkurls/CMakeLists.txt
b/development/checkurls/CMakeLists.txt
index 20af8f9..debedc6 100644
--- a/development/checkurls/CMakeLists.txt
+++ b/development/checkurls/CMakeLists.txt
@@ -28,6 +28,7 @@ add_test(NAME "check_accessible_urls"
"filesToScan=${LYXFILES_FILE}"
"ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS"
"ignoredURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS"
+ "knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS"
"summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedAccessibleURLS.log")
# Test inaccessible, but revert the error marker (failed <=> passed)
@@ -39,6 +40,7 @@ add_test(NAME "check_inaccessible_urls"
"filesToScan=${LYXFILES_FILE}"
"selectedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS"
"revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/inaccessibleURLS"
+ "knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS"
"summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedInaccessibleURLS.log")
#
@@ -50,6 +52,7 @@ add_test(NAME "check_invalid_urls"
COMMAND ${PERL_EXECUTABLE} "${SEARCH_URL_SCRIPT}"
"extraURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS"
"revertedURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownInvalidURLS"
+ "knownToRegisterURLS=${CMAKE_CURRENT_SOURCE_DIR}/knownToRegisterURLS"
"summaryFile=${TOP_BINARY_DIR}/Testing/Temporary/LastFailedKnownInvalidURLS.log")
set(URL_TEST_NAMES "check_accessible_urls" "check_inaccessible_urls"
"check_invalid_urls")
diff --git a/development/checkurls/CheckURL.pm
b/development/checkurls/CheckURL.pm
index b7d0e03..d0a8fad 100755
--- a/development/checkurls/CheckURL.pm
+++ b/development/checkurls/CheckURL.pm
@@ -30,26 +30,11 @@ sub check_url($);
sub check_http_url($$$$)
{
- use Net::HTTP;
- use Net::HTTPS;
+ require LWP::UserAgent;
my ($protocol, $host, $path, $file) = @_;
- my $s;
- if ($protocol eq "http") {
- $s = Net::HTTP->new(Host => $host, Timeout => 120);
- }
- elsif ($protocol eq "https") {
- $s = Net::HTTPS->new(Host => $host, Timeout => 120);
- }
- else {
- print " Unhandled http protocol \"$protocol\"";
- return 3;
- }
- if (! $s) {
- print " " . $@;
- return 3;
- }
+ my $ua = LWP::UserAgent->new;
my $getp = "/";
if ($path ne "") {
$getp .= $path;
@@ -62,27 +47,32 @@ sub check_http_url($$$$)
$getp .= "/$file";
}
}
- #print " Trying to use GET => \"$getp\"";
- $s->write_request(GET => $getp, 'User-Agent' => "Mozilla/6.0");
- my($code, $mess, %h) = $s->read_response_headers;
-
- # Try to read something
my $buf;
- my $n = $s->read_entity_body($buf, 1024);
- if (! defined($n)) {
- print " Read from \"$protocol://$host$getp\" ";
+ $ua->agent("Firefox/43.0");
+ my $response = $ua->get("$protocol://$host$getp");
+ if ($response->is_success) {
+ $buf = $response->decoded_content;
+ }
+ else {
+ print " " . $response->status_line . ": ";
return 3;
}
- if ($buf =~ /\<title\>([^\<]*404[^\<]*)\<\/title\>/i) {
+ my @title = ();
+ my $res = 0;
+ while ($buf =~ s/\<title\>([^\<]*)\<\/title\>//i) {
my $title = $1;
- $title =~ s/\n/ /g;
- print "title = \"$title\"\n";
- if ($title =~ /Error 404|404 Not Found/) {
- print " Page reports 'Error 404' from \"$protocol://$host$getp\" ";
- return 3;
+ $title =~ s/[\r\n]/ /g;
+ $title =~ s/ +/ /g;
+ $title =~ s/^ //;
+ $title =~ s/ $//;
+ push(@title, $title);
+ print "title = \"$title\": ";
+ if ($title =~ /Error 404|Not Found/) {
+ print " Page reports 'Not Found' from \"$protocol://$host$getp\": ";
+ $res = 3;
}
}
- return 0;
+ return $res;
}
# Returns ($err, $isdir)
diff --git a/development/checkurls/knownToRegisterURLS
b/development/checkurls/knownToRegisterURLS
new file mode 100644
index 0000000..5a3fb81
--- /dev/null
+++ b/development/checkurls/knownToRegisterURLS
@@ -0,0 +1,7 @@
+# Urls probably exist, but to check
+# we need to register and login first
+http://www.issn.org/en/node/344
+http://www.springer.de/author/tex/help-journals.html
+http://www.wkap.nl/jrnllist.htm/JRNLHOME
+http://www.wkap.nl/kaphtml.htm/STYLEFILES
+
diff --git a/development/checkurls/search_url.pl
b/development/checkurls/search_url.pl
index 8b90044..c656dfe 100755
--- a/development/checkurls/search_url.pl
+++ b/development/checkurls/search_url.pl
@@ -59,6 +59,7 @@ my %ignoredURLS = ();
my %revertedURLS = ();
my %extraURLS = ();
my %selectedURLS = ();
+my %knownToRegisterURLS = ();
my $summaryFile = undef;
my $checkSelectedOnly = 0;
@@ -82,11 +83,14 @@ for my $arg (@ARGV) {
readUrls($val, %revertedURLS);
}
elsif ($type eq "extraURLS") {
- readUrls($val, %extraURLS);
+ readUrls($val, %extraURLS);
}
elsif ($type eq "selectedURLS") {
$checkSelectedOnly = 1;
- readUrls($val, %selectedURLS);
+ readUrls($val, %selectedURLS);
+ }
+ elsif ($type eq "knownToRegisterURLS") {
+ readUrls($val, %knownToRegisterURLS);
}
elsif ($type eq "summaryFile") {
if (open(SFO, '>', "$val")) {
@@ -104,13 +108,14 @@ my $errorcount = 0;
my $URLScount = 0;
for my $u (@urls) {
- if (defined($selectedURLS{$u})) {
- ${selectedURLS}{$u}->{count} += 1;
- }
if (defined($ignoredURLS{$u})) {
$ignoredURLS{$u}->{count} += 1;
next;
}
+ next if (defined($knownToRegisterURLS{$u}));
+ if (defined($selectedURLS{$u})) {
+ ${selectedURLS}{$u}->{count} += 1;
+ }
next if ($checkSelectedOnly && ! defined($selectedURLS{$u}));
$URLScount++;
print "Checking '$u': ";