-
-
Save jasin/de749ac15292dc62c42f8e0a5fedd942 to your computer and use it in GitHub Desktop.
| #!/usr/bin/perl -w | |
| ################# FreeBSD Log scraper ####################### | |
| ############################################################# | |
| # Basic usage: | |
| # Perl scrape_log.pl -keywords=[KEYWORD,(S)] -log=[LOGNAME] | |
| # | |
| # -keywords= List of keywords to search for in logs | |
| # -log= Name of the log file(s) | |
| # -dir= Location of remote log file(s) | |
| # | |
| # Author: Jasin Colegrove | |
| ############################################################# | |
| use strict; | |
| use Net::SCP; | |
| use IO::Uncompress::Bunzip2 qw(bunzip2 $Bunzip2Error); | |
| # set up some global variables for this script. | |
| my ($logname, $remoteDir, @files, @keys); | |
| # functions | |
| sub scrape_logs { | |
| # Pull the logs locally so we can parse them | |
| my $file; | |
| my $hostname = "coleburt.com"; | |
| my $username = "jasin"; | |
| # Setup our secure copy connection | |
| my $scp = Net::SCP->new( {"host"=>$hostname, "user"=>$username} ); | |
| # Do work! Pull weeks worth of logs | |
| # TODO: dynamic number of log files to fetch | |
| for ((my $i = 0) .. 6) { | |
| $file = "$logname.$i.bz2"; | |
| print "Receiving $file..."; | |
| if ($scp->get("$remoteDir/$file")) { | |
| push @files, $file and print "Done\n"; | |
| } else { | |
| print "$scp->{errstr}\n"; | |
| } | |
| $i++; | |
| } | |
| } | |
| sub parse_logs { | |
| for my $file (@files) { | |
| my $z = new IO::Uncompress::Bunzip2 $file or print $Bunzip2Error; | |
| while (not $z->eof()) { # Keep looping till EOF | |
| my $x = $z->getline(); | |
| # Check each line against the keys in the @keywords array | |
| for my $key (@keys) { | |
| print $x if ($x =~ /$key/); # if exist, write to file | |
| } | |
| } | |
| $z->close(); | |
| } | |
| } | |
| sub email_output { | |
| # TODO: Let someone know that the logs are prepared and ready to be read | |
| } | |
| # main entry point to the script | |
| parse_args(@ARGV); | |
| scrape_logs($logname); | |
| parse_logs(); | |
| sub parse_args { | |
| my ($flag, $keywords); | |
| while (@_) { | |
| if($_[0] =~ /^-keywords/) { | |
| ($flag,$keywords) = split(/=/, $_[0]) and @keys = split(/,/, $keywords); | |
| print "$flag=$keywords\n"; | |
| } elsif ($_[0] =~ /^-log/) { | |
| ($flag,$logname) = split(/=/, $_[0]); | |
| print "$flag=$logname\n"; | |
| } elsif ($_[0] =~ /^-dir/) { | |
| ($flag,$remoteDir) = split(/=/, $_[0]); | |
| print "$flag=$remoteDir\n"; | |
| } else { | |
| pgm_exit(1, "Unknown arg: $_[0]\n"); | |
| } | |
| shift @_; # end switch, get next @ARGV | |
| } | |
| pgm_exit(1, "Keywords and/or log switches must not be empty\n") if (not length $keywords && length $logname); #cheeky way to test for undef | |
| } | |
| sub pgm_exit ($$) { | |
| my($exitcode, $msg) = @_; | |
| print $msg; | |
| exit($exitcode); | |
| } |
@jasin, I played with some revisions to scrape_logs.pl, but no idea how to post those to gist???
The main things were to move the main entry point to down just before the parse_args(), revert parse_args to explicit user declaration, but absolutely do not see the addition of use Switch;... use your own regex, and stop trying to make the UI of the app just a few lines... this should be the most important parsing... take control...
And realize that in a cross-port to WIN32, the = sign is lost, without adding double quotes... accept both forms...
But as I say, do not know how to post those suggested changes here...
And realize that in a cross-port to WIN32, the = sign is lost, without adding double quotes... accept both forms...
What do you mean exactly? In CMD.exe -keywords=BANNED,SPAM needs to be "-keywords=BANNED,SPAM"
confused...
revert parse_args to explicit user declaration
use your own regex
again not sure exactly what you mean here.
stop trying to make the UI of the app just a few lines... this should be the most important parsing... take control...
point taken. I went with the switch route because I thought this was a general consensus on the way to parse command line arguments across other languages as well. I see this case usage a lot when reading source code.
I asked github contact support how one can have multiple editors for a gist, waiting on a reply. But for the time being I see gist are in fact git repo. so if you fork my gist, you get my history, plus any edits you do on top. Gives a full history. Fork the gist and make your changes. I can view them on your gist page
@geoffmcl I took the liberty of posting this here. Allows one to easily see the diffs overtime.