#!/usr/bin/perl -- require 5; use strict; =item overview AXS Script Set, Logging Module Copyright 1997-2002 by Fluid Dynamics Please adhere to the copyright notice and conditions of use as described at the URL below. For latest version and help files, visit: http://www.xav.com/scripts/axs/ The AXS proprietary log is pipe-delimited and newline separated. Each record contains null leading and trailing fields. The fields are: 0 NULL 1 resolved-ip-address 2 ip-address 3 from-url 4 to-url 5 browser-string 6 time-seconds 0..59 7 time-minutes 0..59 8 time-hour 0..23 9 day-of-month 1..31 10 month-of-year 0..11 11 year-1900 (i.e., 100=>2000, 103=>2003) 12 day-of-week 0..6, sunday=0, saturday=6 13 day-of-year 0..364 0==jan1, 364/65=dec31 14 export||'' bit field; contains literal word "export" if and only if this was a redirect 15 NULL/newline =cut my $VERSION = '2.3.0.0031'; # Enter the location of your log file relative to this script. This is path # and file name, not a web address. Leave as-is for a default install. my $LogFile = 'log.txt'; # Logging can be disabled after the log exceeds a certain size. To use this # feature, enter a non-zero number for the maximum byte size for your log # file. Leave it at zero to always log, without size restriction. my $MaxLogSize = 0; # This script will not log visits from users with hostnames or IP addresses # listed below. Use all lowercase names. Empty the array to log everyone: my @IgnoreHosts = (); # Example: # # @IgnoreHosts = ('.foobar.org', 'host.example.co.uk', '250.245.240.'); # This maps hostnames to a consistent format; for example, if your site can # be addressed as http://xav.com/ and http://www.xav.com/ then this set of # mappings can convert all URL's to a consistent format. # # Format is: # Original-String, Final-String, # # The To and From web addresses will have a find-and-replace operation done # on them with each name-value pair in the %Maps hash. The operation will be # done as a case insensitive substring match. my %Maps = ( '/index.html' => 'http://www.xn2performance.com/', 'http://ftp.n2performance.com/' => 'http://www.xn2performance.com/', ); # Once the script is working to your satisfaction, set the $AllowDebug # variable to zero: my $AllowDebug = 1; # When this is set to 1, ax.pl will perform DNS lookups on unresolved # visitors (i.e., "140.140.58.1" becomes "anaconda.brooks.af.mil"). DNS # resolution is a sometimes slow and time-consuming process, and you can # improve speed by setting this to 0. my $resolve_dns_names = 1; # __________________________________________________________________ # # The following shouldn't need to be changed: my $domain = 'http://' . &query_env('SERVER_NAME','localhost'); # If your webserver doesn't support SERVER_NAME, then set this variable # as the top-level URL to your server without a trailing slash, e.g.: # # my $domain = 'http://www.xav.com'; # my $header = "Content-type: text/html\015\012\015\012"; # This should be deleted if the content-type header is being echoed out # to your SSI output, otherwise leave as is. # This variable allows you to correct for a different time zone if # your ISP is somewhere else. This is an integer of +/- a certain number # of hours. i.e., ISP is in Pennsylvania and owner is in Seattle: # $TimeOffsetInHours = -3; # ISP in Australia, owner in London: # $TimeOffsetInHours = +12; my $TimeOffsetInHours = 0; # If you use image redirects and the image appears broken, you may enter the # path to a real 1x1 pixel transparent GIF image in the $TransURL variable. # This real image will be used by ax.pl instead of a synthetic one if this # variable is set (meaning you will have to remove the # comment in front of # it as well): # #### my $TransURL = 'http://www.xav.com/scripts/axs/trans.gif'; my $TransURL = ''; # If every visitor is being logged twice, try setting the following variable # to 1: my $NoLogHead = 0; # ___________________________________________________________________________ my $IIS = (&query_env('SERVER_SOFTWARE') =~ m!iis!i) ? 1 : 0; my %FORM = (); &WebFormL(\%FORM); my $Export = 0; if (($0 =~ m!^(.*)(\\|/)!) and ($0 !~ m!safeperl\d*!i)) { last Err unless (chdir($1)); } # $mode is one of: # # ssi => server-side include call; no output # redir => redirect visitor to the URL given in nexturl # img => return a 1x1 pixel transparent gif # debug => returns debug print my $mode = $FORM{'mode'} || ''; # $ref is the full URL of the referring file. If not given, will query HTTP_REFERER my $ref = $FORM{'ref'} || $ENV{'HTTP_REFERER'} || ''; # $to is the full URL of the file being visited. If not given, will be pulled from various environment variables my $to = $FORM{'to'} || ''; if ($mode eq 'img') { $to = &query_env('HTTP_REFERER'); } my $nexturl = $FORM{'nexturl'} || ''; my $qs = &query_env('QUERY_STRING'); if (($mode ne 'img') and ($mode ne 'redir')) { # rev-compat code for auto-detecting mode. also used by modern mode=ssi for auto-detecting $to # SSI call: if ($ENV{'DOCUMENT_URI'}) { $mode = 'ssi' unless ($mode); $to = $domain . $ENV{'DOCUMENT_URI'} unless (/cgi_bin/axs/to/index.html); } # Alternate SSI call (via REQUEST_URI not DOCUMENT_URI) elsif ($ENV{'REQUEST_URI'} and ($qs eq '')) { $mode = 'ssi' unless ($mode); $to = $domain . $ENV{'REQUEST_URI'} unless (/cgi_bin/axs/to/index.html); } # Alt SSI call on Windows/IIS elsif (($IIS) and ($ENV{'PATH_INFO'} ne $ENV{'SCRIPT_NAME'})) { $mode = 'ssi' unless ($mode); $to = $domain . $ENV{'SCRIPT_NAME'} unless (/cgi_bin/axs/to/index.html); } # trans image logging: elsif ($qs =~ m!^(\w+)\.gif(\&ref=)?(.*)$!i) { $mode = 'img' unless ($mode); $ref = $3 if ($3); $to = &query_env('HTTP_REFERER'); } # redirect elsif (($qs) and ($qs ne 'debugme')) { $mode = 'redir' unless ($mode); $nexturl = $qs unless ($nexturl); $Export = 1; } elsif (lc($qs) eq 'debugme') { $mode = 'debug'; } } if ($mode eq 'redir') { $to = $nexturl; } # provide output the user first, independent of logging action: if ($mode eq 'ssi') { print "$header\n \n"; } elsif ($mode eq 'img') { &Print_Image; } elsif ($mode eq 'redir') { print "HTTP/1.0 302 Moved\015\012" if ($IIS); print "Location: $nexturl\015\012\015\012"; } elsif ($mode eq 'debug') { &SpawnDebugger; } else { # we should never get here, this is just a valid HTTP response # in case of mis-configuration or whatever: print "HTTP/1.0 200 OK\015\012" if ($IIS); print $header; print "

$0 - working okay - no logging command received - use ?debugme query string for more info.

"; } # decide whether or not to log this visit: my $err = ''; Err: { last Err if ($mode eq 'debug'); last Err if (&query_env('HTTP_COOKIE') =~ m!axs_no_log=1!); last Err if (($NoLogHead) and (&query_env('REQUEST_METHOD') eq 'HEAD')); my ($vhost, $vaddr) = &resolve_host($resolve_dns_names); my $ighost = ''; foreach $ighost (@IgnoreHosts) { $ighost = quotemeta($ighost); next unless ($ighost); last Err if ($vhost =~ m!$ighost!); last Err if ($vaddr =~ m!$ighost!); } # Note: you can filter on other things as well. If you want to ignore people # arriving from a certain site, like Yahoo, you can write the following (note # that HTTP_REFERER is used instead of REMOTE_HOST): # # @ignore = ('yahoo.com', 'av.yahoo.com'); # foreach (@ignore) { # exit if ($ENV{'HTTP_REFERER'} =~ m!$_!); # } # don't fill up the file system: my $LogSize = -s $LogFile || 0; last Err if (($MaxLogSize) and ($MaxLogSize < $LogSize)); # cleanse the data: my ($clean_url, $host, $port, $path, $is_valid) = &parse_url(/cgi_bin/axs/ref/index.html); if ($is_valid) { $ref = $clean_url; } ($clean_url, $host, $port, $path, $is_valid) = &parse_url(/cgi_bin/axs/to/index.html); if ($is_valid) { $to = $clean_url; } # Apply the mappings: foreach (keys %Maps) { $to =~ s!$_!$Maps{$_}!ig; $ref =~ s!$_!$Maps{$_}!ig; } &log_visit($vhost,$vaddr,$ref,$to); last Err; } sub Print_Image { print "HTTP/1.0 200 OK\015\012" if ($IIS); print "Pragma: no-cache\015\012"; print "Expires: Saturday, February 15, 1997 10:10:10 GMT\015\012"; if ($TransURL) { print "Location: $TransURL\015\012\015\012"; } else { print "Content-Type: image/gif\015\012\015\012"; binmode(STDOUT); foreach (71,73,70,56,57,97,1,0,1,0,128,255,0,192,192,192,0,0,0,33,249,4,1,0,0,0,0,44,0,0,0,0,1,0,1,0,0,1,1,50,0,59) { print pack('C',$_); } } } # ___________________________________________________________________________ # This runs a filesystem test against $LogFile and dumps a ton of (hopefully) # useful information to the screen: sub SpawnDebugger { print "HTTP/1.0 200 OK\015\012" if ($IIS); print "Content-Type: text/html\015\012\015\012"; unless ($AllowDebug) { print '

Error: no output available because $AllowDebug = 0 in this script.

'; return 0; } my $filesys_test = ''; my $filesys_ok = 0; TEST: { if (-e $LogFile) { my ($LogSize,$LastModT) = (stat($LogFile))[7,9]; $LastModT = scalar localtime($LastModT); $filesys_test .= "

The log file, $LogFile, exists with size $LogSize bytes. It was last modified on $LastModT. "; if (open(FILE,">>$LogFile")) { binmode(FILE); close(FILE); $filesys_test .= "The log file is writable.

The filesystem test passed!

"; $filesys_ok = 1; } else { $filesys_test .= <<"EOM"; However, the log file is not writable. The filesystem returned "$!" when this script tried to write to it. You need to change the file permissions to make it script-writable.

The filesystem test failed.

EOM last TEST; } } elsif (open(FILE,">>$LogFile")) { binmode(FILE); close(FILE); $filesys_test .= <<"EOM";

The log file, $LogFile, did not exist when this script started. However, this script attempted to create it for you, and the server responded that this was successful. So everything should be fine now. Reload this web page, and hopefully you will see a message that the file system test has passed. If it does not pass, and instead you get an error or you get this message again, then you will have to manually create the log file and set it's permissions.

The filesystem test needs to be run again. (reload this page)

EOM last TEST; } else { $filesys_test .= <<"EOM";

The log file, $LogFile, doesn't exist. You need to create one and give it writable permissions. Alternately, the log file may exist but the \$LogFile variable might not point to the correct location, in which case you will need to change your variable.

The filesystem test failed.

EOM last TEST; } } my $homelink = ''; my @ext = ('pl', 'cgi'); if ($0 =~ m!\.cgi$!) { @ext = ('cgi','pl'); } foreach (@ext) { my $file = 'ax-admin.' . $_; if (-e $file) { $homelink = qq!

Click here to return to $file.

\n!; last; } } print <<"EOM"; AXS Remote Debugger $homelink

Review the AXS help file if you need more help.

Filesystem Test:

$filesys_test
EOM my $axpath = 'http://' . ( $ENV{'HTTP_HOST'} || $ENV{'SERVER_NAME'} || '' ) . $ENV{'SCRIPT_NAME'}; print <<"EOM" if ($filesys_ok);

Usage Instructions:

  1. Add the "AXS tracking code" to any HTML pages that you want to have tracked.

    <script type="text/javascript">
    <!--
    \tdocument.write('<img src="$ENV{'SCRIPT_NAME'}?trans.gif&ref=');
    \tdocument.write(document.referrer);
    \tdocument.write('" height="1" width="1" alt="" />');
    // -->
    </script><noscript>
    \t<img src="$ENV{'SCRIPT_NAME'}?trans.gif" height="1" width="1" alt="" />
    </noscript>

    If your web server supports server-side includes (SSI), then you can try this alternate syntax. You might have to name the pages with a .shtml or .stm extension in order for this to work. If this syntax doesn't work, just use the Javascript code above:

    <!--#exec cgi="$ENV{'SCRIPT_NAME'}" -->
  2. Code your off-site links like this (links to pages/files that don't already contain the AXS tracking code):

    <a href="$ENV{'SCRIPT_NAME'}?http://yahoo.com/">http://yahoo.com/</a>

    Here is an example link.

If any of your HTML pages reside on a different website than AXS, then you should use:

$axpath

instead of:

$ENV{'SCRIPT_NAME'}

in the examples above.

EOM print <<"EOM";

Standard Debugging Information:

This is AXS Logging Module version $VERSION in debug mode.
The file name of this script is $0.
This script is executing under Perl version $].
The critical file system variable is \$LogFile = "$LogFile";. EOM if ($MaxLogSize) { print "MaxLogSize has been initialized to $MaxLogSize bytes."; } else { print 'MaxLogSize is not set.'; } print <<"EOM";

Webmaster Logging Override

You can disable the logging of your own visits by having the "axs_no_log=1" cookie, or by having your IP address or hostname present in the \@IgnoreHosts array.

See this help file for more information about not tracking your own visits.

Cookie Override

EOM my $cookie = &he($ENV{'HTTP_COOKIE'} || ''); print "

Your browser sent the following cookie header:

HTTP_COOKIE: $cookie
\n"; if ($cookie =~ m!axs_no_log=1!) { print "

Your visits will NOT be logged because the 'axs_no_log=1' cookie was detected.

\n"; } else { print "

Your visits will be logged, because the 'axs_no_log=1' cookie was NOT detected.

\n"; } print "

IP or Hostname Override

\n"; if (@IgnoreHosts) { my ($vhost, $vaddr) = &resolve_host($resolve_dns_names); print "

The \@IgnoreHosts array contains:
\n"; my $b_ignored = 0; foreach (@IgnoreHosts) { print "   '$_'"; if ($_) { my $qm = quotemeta($_); if ($vhost =~ m!$qm!) { print " logging disabled for you because $vhost matches\n"; $b_ignored = 1; } elsif ($vaddr =~ m!$qm!) { print " logging disabled for you because $vaddr matches\n"; $b_ignored = 1; } } print "
\n"; } print "

"; if ($b_ignored) { print "

Your client address ($vhost/$vaddr) will cause your visits to not be logged.

\n"; } else { print "

Your client address ($vhost/$vaddr) does not match any of these entries. Logging will not be disabled based on \@IgnoreHosts values.

\n"; } } else { print "

The \@IgnoreHosts array is empty. No logging overrides will occur due to IP address or hostname.

\n"; } print '

Environment Variables:

';
foreach (sort keys %ENV) {
	my ($name, $value) = &he( $_, $ENV{$_} );
	print "$name: $value\n";
	}
print <<"FOOT";


AXS Script Set Version $VERSION is copyright 1997-2002 by Fluid Dynamics.
Visit the AXS Page for help files and most recent version.
FOOT } # End SpawnDebugger. # Trim - thanks to William Boudreau for & fix sub Trim { local $_ = $_[0] ? $_[0] : ''; s!^[\r\n\s]+!!o; s![\r\n\s]+$!!o; return $_; } sub clean_path { local $_ = $_[0] || ''; # trim whitespace: $_ = &Trim($_); # strip pound signs and all that follows (links internal to a page) s!\#.*$!!; # map "/./" to "/" s!/+\./+!/!g; # map trailing "/." to "/" s!/+\.$!/!g; # map "/folder/../" => "/" while (s!([^/]+)/+\.\./+!/!) {} # map /../foo => /foo while (s!^/+\.\./+!/!) {} s!^/+\.\.$!/!; # collapse back-to-back slashes: s!/+!/!g; return $_; } sub parse_url { local $_ = $_[0] || ''; my ($clean_url, $host, $port, $path, $is_valid) = ('', '', 80, '/', 0); # add trailing slash if none present $_ .= '/' if (m!^http://([^/]+)$!i); if (m!^http://([\w|\.|\-]+)\:?(\d*)/(.*)$!i) { ($host, $port, $path, $is_valid) = (lc($1), $2, &clean_path("/$3"), 1); $port = 80 unless $port; if ($port == 80) { $clean_url = "http://$host$path"; } else { $clean_url = "http://$host:$port$path"; } } return ($clean_url, $host, $port, $path, $is_valid); } =item WebFormL Usage: &WebFormL( \%FORM ); Returns a by-reference hash of all name-value pairs submitted to the CGI script. updated: 8/21/2001 Dependencies: &url_decode &query_env =cut sub WebFormL { my ($p_hash) = @_; my @Pairs = (); if (&query_env('QUERY_STRING')) { @Pairs = split(m!\&!, &query_env('QUERY_STRING')); } else { @Pairs = @ARGV; } local $_; foreach (@Pairs) { next unless (m!^(.*?)=(.*)$!s); my ($name, $value) = (&url_decode($1), &url_decode($2)); if ($$p_hash{$name}) { $$p_hash{$name} .= ",$value"; } else { $$p_hash{$name} = $value; } } } sub url_decode { local $_ = defined($_[0]) ? $_[0] : ''; tr!+! !; s!\%([a-fA-F0-9][a-fA-F0-9])!pack('C', hex($1))!eg; return $_; } =item query_env Usage: my $remote_host = &query_env('REMOTE_HOST'); Abstraction layer for the %ENV hash. Why abstract? Here's why: 1. adds safety for -T taint checks 2. always returns '' if undef; prevent -w warnings =cut sub query_env { my ($name,$default) = @_; if (($ENV{$name}) and ($ENV{$name} =~ m!^(.*)$!s)) { return $1; } elsif (defined($default)) { return $default; } else { return ''; } } =item resolve_host Usage: my ($host,$addr) = &resolve_host($resolve_dns_names); Returns either the FQDN and IP address of the visitor, based on the variables $ENV{'REMOTE_HOST'}, $ENV{'REMOTE_ADDR'}, and $resolve_dns_names. =cut sub resolve_host { my ($resolve_dns_names) = @_; # This code converts un-resolved hostnames to their text versions, then makes # the names lowercase, and then aborts logging if this hostname is forbidden: my ($host, $addr) = (&query_env('REMOTE_HOST'), &query_env('REMOTE_ADDR')); if (($host eq '') or ($host =~ m!^\d+\.\d+\.\d+\.\d+$!)) { if (($resolve_dns_names) and ($addr =~ m!^(\d+)\.(\d+)\.(\d+)\.(\d+)$!)) { $host = (gethostbyaddr(pack('C4',$1,$2,$3,$4),2))[0]; } } $host = lc($host) || $addr; return ($host,$addr); } sub log_visit { my ($host,$addr,$ref,$to) = @_; my $logline = '|'; foreach ($host,$addr,$ref,$to,&query_env('HTTP_USER_AGENT')) { # strip delimiters: s!\||\015|\012!!sg; $logline .= $_.'|'; } foreach ((localtime(time + (3600*$TimeOffsetInHours)))[0..7]) { $logline .= $_.'|'; } $logline .= 'export|' if ($Export); $logline .= "\n"; # Make sure the record is strictly valid before writing to the log: exit unless ($logline =~ m!^\|([^\|]+)\|([^\|]+)\|([^\|]*)\|([^\|]*)\|([^\|]*)\|\d+\|\d+\|\d+\|\d+\|\d+\|\d+\|\d+\|\d+\|(export\|)?$!); if (open(LOG,">>$LogFile")) { binmode(LOG); print LOG $logline; close(LOG); } } sub he { my @out = @_; local $_; foreach (@out) { $_ = '' if (not defined($_)); s!\&!\&!g; s!\>!\>!g; s!\ 0)) { return @out; } else { return $out[0]; } } 1;