Cover V12, I08
Article
Figure 1
Figure 2
Listing 1
Listing 2
Listing 3
Listing 4
Listing 5
Listing 6
Listing 7
Listing 8
Listing 9
Listing 10

aug2003.tar

Listing 6 URLSnarf parsing script

#!/usr/bin/perl

use DBI;
use POSIX;
use Getopt::Std;
use Sys::Syslog;

# ------   Configurable constants -------------------
$pw='password';
$database='fwlog';
$host='dbserver';
$dbusername='username';
$firewall="pix1";
$sniffer = "/usr/local/sbin/urlsnarf -n";
#-----------------------------------------------------


# get commandline options
my %option= ();
getopts("dv?h:", \%option);

printusage() if $option{h} or $option{'?'};

if ($option{d}) {
    # go into daemon mode
    # fork once, let the parent exit
    my $pid = fork;
    exit if $pid;
    die "Couldn't fork: $!" unless defined($pid);

    # disassociated from our controlling terminal
    POSIX::setsid() or die "Can't start a new session: $!";
}

# run sniffer program read its output into the filehandle SNIFFER
open (SNIFFER, "$sniffer |") or fatalerror("Couldn't exec sniffer program $sniffer:$!\n");

# open the database
my $dbh=DBI->connect("DBI:Pg:dbname=$database;host=$host",$dbusername,$pw);
fatalerror("Unable to connect: $DBI::errstr\n") unless (defined $dbh);

my $sth = $dbh->prepare("INSERT INTO traffic (firewall,datetime,sourceip,domain,url) VALUES (?,?,?,?,?)");

#Read each record
while (<SNIFFER>) {
    # Dsniff returns http request information in the Extended Common Log Format 
    # used by many web servers.
    # ECLF format (http://www.w3.org/TR/WD-logfile.html) consists of space 
    # delimited fields as follows:
    # remotehost  rfc931-ident  authuser  [date:time timezone]  
    # "method requested-url protocol"  status  bytes  "referer"  "user_agent"
    my ($client, $date, $time, $url) = 
            (/^(\S+)            # remotehost source IP address
            .*?                 # rfc-931 ident username 
                                # http auth username (folded these into one...)
            \[([^:]+):(\d+:\d+:\d+)    # date, time
            \s+([^\]]+)]               # timezone
            \s+"(\S+)\s+(.*?)\s+(\S+)" # method, url, protocol
            \s+(\S+)            # http response code (status)
            \s+(\S+)            # bytes transfered
            \s+"(.*?)"          # referrer
            \s+"(.*?)"          # user agent (browser)
            $/x)[0,1,2,5];

    my ($hostname, $path) = $url =~ m#http://(.*?)(/.*)$#;
    my $datetime = "$date $time";

    my $result = $sth->execute($firewall,$datetime, $client, $hostname, $path);
    fatalerror("Unable to perform INSERT:$DBI::errstr\nurlsnarf = $_\ndatetime = \
      $datetime\nsourceip = $client\ndomain=$hostname\nurl = $url\n")  \
      unless (defined $result);

}

sub printusage {
    print <<"END";

$0: Runs a the sniffer program ($sniffer).  Logs http traffic information to a database.
Usage: $0 [OPTION]... [DEVICENAME]

Options:
        -d          Became a daemon (run in the background) 
        -h or -?    Command line help (this screen)


END
}

sub fatalerror {
    my $message = shift;
    openlog($0, "nowait", "daemon");
    syslog("warning", "%s\n",$message);
    closelog();
    die;
}