#!/usr/bin/perl # # visitors.cgi / VisitorLog - trial version # # This program is the copyrighted work of Encodable Industries. # Redistribution is prohibited, and copies are permitted only for # backup purposes. You are free to modify the program for your # own use, but you may not distribute any modified copies of it. # # Use of this program requires a one-time license fee. You can # obtain a license here: # # http://encodable.com/visitorlog/#download # # This software comes with no warranty. The author and many other # people have found it to be useful, and it is our hope that you # find it useful as well, but it comes with no guarantees. Under # no circumstances shall Encodable Industries be held liable in # any situation arising from your use of this program. We are # generally happy to provide support to all our users, but we can # make no guarantee of support. # # For more information about this program, as well as for help # and support, please visit the following pages: # # Homepage: http://encodable.com/visitorlog/ # Contact: http://encodable.com/contact/ my $version = "1.50e05trial"; $ENV{PATH} = '/bin:/usr/bin'; delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'}; ($ENV{DOCUMENT_ROOT}) = ($ENV{DOCUMENT_ROOT} =~ /(.*)/); # untaint. ############################################################################ ### Hard-coded variables section: ############################################################################ # Set the number of visitors (from the recent visitors list) to display on # each page where the VisitorLog script is called. If you just want to log # the visitors but not display a list of recent visitors on every page, # then set this to zero. # my $lines_of_output_to_display = 0; # It takes a fair amount of server power to do a database-lookup and a # database-write on every single page access. If your server isn't very # powerful, and/or if your site gets very many (~10,000 or more) visitors # per day, then you may find that VisitorLog bogs the server down. To # prevent that, adjust the two values here. Making the values smaller # will cause VisitorLog to quit early (without performing the logging) # if the server is getting too overloaded. # my $max_load = 40; my $max_instances = 20; ############################################################################ ### End of hard-coded variables section. ############################################################################ my %PREF = (); ############################################################################ ### ### User preferences ("PREFs") section: adjust these variables ### to suit your own server/setup/tastes. If you'd rather store ### and edit these in a separate file (to make upgrades easier, ### etc.) then create a file called visitors_prefs.txt in the ### same directory as the visitors.cgi script. The format for ### the lines is the same as here in the script itself; you could ### just copy or move this entire section into your separate PREFs ### file if you'd like. Comments (that is, lines starting with a ### pound-sign) and blank lines are allowed in the separate PREFs ### file, too. ### ############################################################################ ############################################################################ # Some servers seem to not set $ENV{DOCUMENT_ROOT} properly (in one case, # for users who serve pages from their home directories), so we'll make # our own version. Most of the time it should be set to the docroot, # but if necessary you can adjust this here. The default value is: # # $PREF{DOCROOT} = $ENV{DOCUMENT_ROOT}; # # IIS users will often need to set it like this: # # $PREF{DOCROOT} = 'c:\inetpub\wwwroot'; # $PREF{DOCROOT} = $ENV{DOCUMENT_ROOT}; ######################################################################## # Specify the name of the database where we'll store our visitorlog # tables. database_name can optionally have a :hostname after the db # name. # $PREF{database_name} = 'dbname:dbhost.mysite.com'; $PREF{visitor_table_name} = 'vlogtrial'; ######################################################################## # The 2 files specified here must contain one line each, and the line # must contain just one thing: your database username (in tmpfl2), and # your database password (in tmpfl1). The files are probably in your # DOCROOT, but if you'd rather specify them with absolute or relative # paths (relative to this script, that is), then disable the _in_docroot # PREF. # # Note that the database user you specify must have full MySQL # privileges: Select, Insert, Update, Delete, Create, Drop, Index, Alter. # $PREF{tmpfl1} = '/cgi-bin/p.cgi'; $PREF{tmpfl2} = '/cgi-bin/u.cgi'; $PREF{tmpfls_are_in_docroot} = 'yes'; ######################################################################## # Here you must specify the path to the world-writable (chmod a+rwx or # 0777) VisitorLog data directory. On most servers this will be in the # DOCROOT, in which case we'll prepend your DOCROOT to whatever you set # vlog_data_dir to. If for some reason you want to specify it with # an absolute path, or a path that's relative to the current directory # instead of the DOCROOT, then set _is_in_docroot to no. # $PREF{vlog_data_dir} = '/cgi-bin/vldata'; $PREF{vlog_data_dir_is_in_docroot} = 'yes'; ############################################################################ # Specify the short URLs to the VisitorLog on your server. Note that these # are not full filesystem paths; they are within your server's DOCROOT. # If you have the nice short /visitors/ URL set up on your server # (see VisitorLog homepage for instructions), then set these to: # # $PREF{vlog_url_short} = '/visitors/'; # $PREF{stats_url_short} = '/visitors/'; # # Otherwise set them to: # # $PREF{vlog_url_short} = $ENV{SCRIPT_NAME}; # $PREF{stats_url_short} = $ENV{SCRIPT_NAME}; # $PREF{vlog_url_short} = $ENV{SCRIPT_NAME}; $PREF{stats_url_short} = $ENV{SCRIPT_NAME}; ############################################################################ # You can set a time offset for the datestamps in the visitor log, in case # your server is in a different time-zone than you are. The offset is in # hours and can be negative. # $PREF{time_offset} = 0; ############################################################################ # This determines what the recent visitors list displays: visitors currently # online, visitors recently (but not necessarily currently) online, or both, # where once the online-visitor-count reaches a certain threshold, it switches # from displaying recent visitors to displaying those currently online. # So the possible values are online, recent, and both. # $PREF{show_online_or_recent} = 'recent'; $PREF{online_or_recent_both_threshold} = 5; ############################################################################ # Specify an approximate daily visitors value to limit the number of # records we'll pull from the db each time we're called. This is used # for DISPLAY only, it does NOT affect logging at all. It is used to # determine how far back into the visitor log we'll go when looking for # visitors who are still online, for the online-visitor-count. Since # we consider 1 day to be the maximum length of a visit (after which we # re-log the visitor anew), we only need to check 1 day's worth of db # records to get the online-visitor-count. This directly affects # performance, so if this script is taxing your server too hard then # you may want to set this to a smaller value. # $PREF{approx_daily_visitors} = 300; ############################################################################ # The stats page will display statistics for visitors from the past N days # by default. You can set that value here. # $PREF{default_stats_period} = 60; ############################################################################ # Some visitors -- mainly spiders for the various search engines -- have # practices that frustrate our logging efforts. In particular they often # come from multiple IPs at the same time, and don't accept cookies, so at # first glance we'd assume they are multiple unique visitors. But since we # can recognize (based on hostname or user-agent) that they are spiders, we # know that we don't want them logged 20 times in 5 minutes as 20 different # visitors; instead we'd like to just log all those 20 IPs as a single visit. # By specifying the search engines' user-agents and/or hostnames here, we can # achieve that. # # Also, some spiders/bots are freaking liars: they stuff their user-agent # with random garbage that changes on every single hit. As with the nice # spiders who just use multiple IPs, we'd like to only log these guys once # per session/day/etc, and the only way to do that is to key off their host- # name, since that doesn't change. # $PREF{spider_uas} = 'googlebot|msnbot|ia_archiver|yahoo.*slurp|teoma|appie.*walhello|gigabot|psbot|snapbot|sensis'; $PREF{spider_hosts} = 'googlebot.com$|msnbot.msn.com$|looksmart.com$|inktomisearch.com$|teoma.com$|internetserviceteam.com$|priorityrec.com$|^38.118.25.61$|picsearch.com$'; $PREF{ua_liars} = 'bhelper.com$|traffic4all.com$|public.alexa.com$'; ############################################################################ # ignore_visitor: this allows you keep certain visitors out of the log. Note # that it doesn't just hide them in the visitor-list output: it actually skips # logging them altogether. # # Variables: ip, hostname, browser, os; separator is " &&& ". Each variable # does partial matching, so "comcast" for the hostname will match ALL comcast # users: *.hsd1.pa.comcast.net, *.potshe01.pa.comcast.net, etc. So it's # best to be as specific as possible. # # Note: if HTTP_USER_AGENT is blank, then we set it to 'Unknown' before # testing. And the 'browser' and 'os' variables here are actually both # matched against the user-agent, so browser=Unknown &&& os=Unknown is # true for blank UAs. # # You can specify multiple ignore_visitor lines here. # #$PREF{ignore_visitor} = 'browser=Unknown &&& os=Unknown'; ############################################################################ # Title displayed at the top of some pages. # $PREF{title} = '

VisitorLog

'; ############################################################################ # If called with ?getname, we'll display a box for the visitor to enter # their name. Here you can specify the greeting displayed above the box. # $PREF{brief_greeting} = 'Hello'; ############################################################################ # When calling the script with ?showoutput=N (i.e. when displaying the short # list of recent visitors, instead of just silently logging), we will also # show the list of online visitors in the browser's status bar. You can # disable this here. Note that this does not apply when using ?mode=full. # $PREF{disable_status_bar_output} = 'yes'; ############################################################################ # When calling the script with ?mode=hitsgraph, we'll display a graph of # your visitor-counts from the past week. Here you can specify the colors # for this graph. # $PREF{hitsgraph_empty_color} = '#eee'; $PREF{hitsgraph_filled_color} = '#860600'; $PREF{hitsgraph_border_color} = '#000'; ######################################################################## # The default css file specified here will be used, unless your site # uses cookie-based themes and the user has the cookie, in which case, # a css file named .css will be used instead. # $PREF{path_to_css_files} = '/'; $PREF{default_css_file_name} = 'default.css'; $PREF{theme_cookie_name} = ''; ############################################################################ # If you're embedding VisitorLog into an existing layout, then you probably # want to disable the output of the full HTML tags. In that case we'll just # wrap our output in a div instead. # $PREF{print_full_html_tags} = 'yes'; $PREF{outer_container} = '
'; $PREF{outer_container_end} = '
'; ############################################################################ ### ### End of user preferences section. You probably don't want to mess with ### anything below here unless you really know what you're doing. ### ############################################################################ if($ENV{QUERY_STRING} eq 'js') { print_js_for_header(); exit; } # do this regardless of load/instances. my $load = `uptime`; if($load =~ /load average:\s*(\d+\.\d+),\s*(\d+\.\d+),\s*(\d+\.\d+)/) { my ($longterm_loadavg) = ($3 =~ /(\d+)\.\d+/); if($longterm_loadavg > $max_load) { warn "vlog: longterm_loadavg=$longterm_loadavg; quitting early since it's >$max_load, to be nice to the server.\n"; print "Content-type: text/plain\n\n"; exit; } } my ($scriptname) = ($ENV{SCRIPT_NAME} =~ m!([^/\\]+)$!); my $instances_of_self = `ps auxw |grep $scriptname |grep -v grep |wc -l`; $instances_of_self =~ s/\D//g; if($instances_of_self > $max_instances) { warn "vlog: $instances_of_self instances of $scriptname found in the process list; quitting early since it's >$max_instances, to be nice to the server.\n"; print "Content-type: text/plain\n\n"; exit; } use Time::HiRes qw(gettimeofday); my $scriptstart = gettimeofday(); if($ENV{QUERY_STRING} eq 'version') { print "Content-type: text/plain\n\n"; print "$version\n"; exit; } my ($cwd) = ($ENV{SCRIPT_FILENAME} =~ m!^(.+)/.*?$!); chdir $cwd; $|++; use CGI::Carp 'fatalsToBrowser'; use POSIX; use CGI 'standard'; use CGI::Cookie; use Time::Local; use File::Copy; use DBI; load_prefs(); my $dbh = $PREF{dbh} = get_db_connection(); my $qs = $ENV{QUERY_STRING}; create_vlog_table_if_DNE(); my (%cookies, $site_id, $cookies_enabled, $first_three_octets_of_logged_ip, $serial_id, $logged_etime, $logged_prettytime, $logged_site_id, $logged_cookies_enabled, $user_cookies, $logged_ip, $logged_host, $logged_site_username, $logged_res, $logged_dur, $logged_uri, $logged_ref, $logged_ua, $logged_datestring8, @output, $theme_cookie_value, $online_user_start_tag, $online_user_end_tag, $give_client_a_new_id_cookie); #my $logdir = 'logfiles'; #my $log = get_logfile_name(); #my $visitorcounts_log = "$logdir/stats/visitor_counts.log"; #my $visitorcounts_log = "$stats_cache_dir/visitor_counts.log"; my $site_username = 'Guest'; my $etime = time() + $PREF{time_offset}; my $global_current_etime = time() + $PREF{time_offset}; my $five_min_ago = $global_current_etime - (300); my $one_hour_ago = $global_current_etime - (3600); my $one_and_a_half_hours_ago = $global_current_etime - (3600*1.5); my $two_hours_ago = $global_current_etime - (3600*2); my $one_day_ago = $global_current_etime - (3600*24); my $online_watchdog_timeout = 180; #seconds. my $past_hits = 0; my $in_delete_commit_mode = $qs =~ /(?:^|&)action=delete_commit(?:&|$)/ ? 1 : 0; my $max_displayname_length = 24; my (%monthnum,%monthname) = (); populate_month_conversion_hashes(); if($qs =~ /(?:^|&)mode=hitsgraph(?:&|$)/) { show_hits_graph(); } else { # This must come before printing the HTTP headers. Note: sometimes returns early. get_name() if $qs =~ /getname/; # Determine the user's site_id from the cookie. get_cookies_and_determine_site_id(); # Set some date/time variables. my ($prettytime, $today, $longdatetime, $datestring8, $datestring12, $thetime) = set_date_and_time($global_current_etime); # Note: early returns. if($in_delete_commit_mode) { confirm_delete(); exit; } if($qs =~ /(?:^|&)action=searchhelp(?:&|$)/) { print_search_help(); exit; } my $dologging_start = gettimeofday(); do_logging(); #print STDERR "pid=$$: do_logging: " . (gettimeofday() - $dologging_start) . "\n"; # Not yet implemented. #log_chat_invite_and_redirect_to_chat_page(); # Show some output, if we were called that way. if($qs =~ /(?:showoutput=\d+|mode=full)/) { my $showoutput_start = gettimeofday(); show_output(); #print STDERR "pid=$$: show_output: " . (gettimeofday() - $showoutput_start) . "\n"; } # For benchmarking. # #my $scriptelapsed = gettimeofday() - $scriptstart; #$scriptelapsed =~ s/(\d+\.\d\d).*/$1/; #my $did = (); #if(!$admin_is_logged_in) { $did .= 'do_logging(), '; } #if($qs =~ /(?:showoutput=\d+|mode=full)/) { $did .= 'show_output(), '; } #$did = "visitors.cgi: limit=$PREF{approx_daily_visitors}: $scriptelapsed seconds for $did (URI=$ENV{REQUEST_URI}).\n"; #print STDERR "$did"; # In case we got here without calling any functions that send output # (which is fine), send the HTTP headers so we don't die with an error. print_server_headers(); } ################################################################################ ################################################################################ ##### Subroutines follow. ###################################################### ################################################################################ ################################################################################ sub print_server_headers { unless($PREF{output_started}) { print "Cache-Control: no-store, no-cache\n"; print "Content-type: text/html\n\n"; $PREF{output_started} = 1; } } sub get_cookies_and_determine_site_id() { %cookies = fetch CGI::Cookie; if(exists $cookies{$PREF{site_id_cookie}}) { $site_id = $cookies{$PREF{site_id_cookie}}->value; $give_client_a_new_id_cookie = 0; $cookies_enabled = 'yes'; } else { if($ENV{QUERY_STRING} =~ /siteid=(\d+)(?:&|$)/) { $site_id = $1; } else { $site_id = get_new_site_id(); } $give_client_a_new_id_cookie = 1; $cookies_enabled = 'no'; } if(exists $cookies{$PREF{site_username_cookie}}) { $site_username = $cookies{$PREF{site_username_cookie}}->value; sanitize_username($site_username) if username_is_illegal($site_username); $site_username = 'Guest' unless $site_username =~ /\w/; $cookies_enabled = 'yes'; } if(exists $cookies{$PREF{theme_cookie_name}}) { $theme_cookie_value = $cookies{$PREF{theme_cookie_name}}->value; $theme_cookie_value =~ s/\s/%20/g; $cookies_enabled = 'yes'; } else { $theme_cookie_value = 'unset'; } } sub print_js_for_header { print "Content-type: text/javascript\n\n"; print qq` var showoutput = $lines_of_output_to_display; var theRequest = false; function goajax(page) { theRequest = false; if(window.XMLHttpRequest) { theRequest = new XMLHttpRequest(); if(theRequest.overrideMimeType) { theRequest.overrideMimeType('text/xml'); } } else if(window.ActiveXObject) { try { theRequest = new ActiveXObject("Msxml2.XMLHTTP"); } catch (e) { try { theRequest = new ActiveXObject("Microsoft.XMLHTTP"); } catch (e) {} } } if(!theRequest) { // client's browser doesn't support AJAX. return false; } // IE will cache this (making it useless) unless you 1) use POST instead of GET, // and 2) send a variable instead of null in the send(). theRequest.onreadystatechange = updateVlog; theRequest.open('POST', page, true); theRequest.send('ie=junk'); } function updateVlog() { if(theRequest) { if(theRequest.readyState == 4) { if(theRequest.status == 200) { if(document.getElementById("$PREF{vlog_div_name}")) { document.getElementById("$PREF{vlog_div_name}").innerHTML = theRequest.responseText; } } scheduleVlogUpdate(); } } } function scheduleVlogUpdate() { window.setTimeout("goajax('" + get_target() + "')", 170000); } function get_vid() { var vid = ""; if(document.getElementById("vlogidnum")) { vid = document.getElementById("vlogidnum").innerHTML; } return vid; } function get_target() { return "$ENV{SCRIPT_NAME}?res=" + screen.width + "x" + screen.height + "&showoutput=" + showoutput + "&siteid=" + get_vid(); } function isNum(testval,decimalsOK) { if(typeof(testval) == 'undefined') return false; testval = testval.toString(); if (!testval.length) return false; var numbers = decimalsOK ? '.0123456789' : '0123456789'; for (i=0; iprepare("DELETE FROM $PREF{visitor_table_name} WHERE id='$site_id' AND ua='$ENV{HTTP_USER_AGENT}' AND etime>'$two_hours_ago';"); $sth->execute(); } sub show_output { my $lines_to_display; if($qs =~ /showoutput=(\d+)/) { $lines_to_display = $1; } elsif($qs =~ /(?:^|&)mode=full(?:&|$)/) { $lines_to_display = 20; } elsif($PREF{show_online_or_recent} =~ /recent/i) { $lines_to_display = $PREF{num_recent_visitors_to_display}; } elsif($PREF{show_online_or_recent} =~ /online/i) { $lines_to_display = $PREF{num_online_visitors_to_display}; } else { $lines_to_display = 8; } my $user_is_still_online; my $num_visitors_online = 0; my $lines_displayed = 0; my $total_hits = get_total_hits(); my (@online_users, $ua_display); my $css_file_name = get_css_filename(); my $hidedups = 1 if $qs =~ /hidedups/; my (%already_displayed, $chat_link, %SEARCH) = (); my $in_delete_mode = $qs =~ /(?:^|&)action=delete(?:&|$)/ ? 1 : 0; my (@output_full, @output_recent, @output_online) = (); push @output_online, "Currently online: \n"; print_server_headers(); if($qs =~ /action=search/) { my @vars = split(/&/, $qs); for(@vars) { next unless /^s_|^showoutput/; my ($var,$value) = split /=/; $value =~ s/%23/#/g; $value =~ s/%7c/|/gi; $value =~ s/\+/ /g; $SEARCH{$var} = $value; $lines_to_display = $value if $var eq 'showoutput' && $value =~ /^\d+$/; } } my $records_to_check = $PREF{approx_daily_visitors} > $lines_to_display ? $PREF{approx_daily_visitors} : $lines_to_display; my $sth = $PREF{dbh}->prepare("SELECT * FROM $PREF{visitor_table_name} ORDER BY serial_id DESC LIMIT $records_to_check;"); $sth->execute(); $sth->bind_columns( \$serial_id, \$logged_etime, \$logged_prettytime, \$logged_site_id, \$logged_cookies_enabled, \$user_cookies, \$logged_ip, \$logged_host, \$logged_site_username, \$logged_user_res, \$logged_dur, \$logged_uri, \$logged_ref, \$logged_ua, \$logged_datestring8 ); while($sth->fetchrow_arrayref) { $user_is_still_online = 0; $chat_link = ''; if(($logged_etime + $logged_dur) > ($global_current_etime - $online_watchdog_timeout)) { $user_is_still_online = 1; $num_visitors_online++; } # We don't want to go through the whole log here... just the recent ones. # # First of all, even if we may only be displaying the 5 most recent visitors, we always # want to go to $end_of_long_window so we can at least get the *count* of users currently # on the site. if($logged_etime < $end_of_long_window) { if($lines_displayed >= $lines_to_display) { last; } } my $display_name; if($logged_site_username ne 'Guest') { $display_name = $logged_site_username; $display_name =~ s/^(.{22}).*/$1.../ if length($display_name) > 22; if($user_is_still_online) { push @online_users, $display_name; } } elsif($logged_host =~ /[a-zA-Z]/) { if(length($logged_host) < 18) { $display_name = $logged_host; } else { my @octets = split(/\./, $logged_host); @octets = reverse @octets; for(@octets) { if($display_name !~ /\w/) { $display_name = $_; next; } if( (length($_) + length($display_name)) < 18 ) { $display_name = $_ . '.' . $display_name; } else { $display_name = '*' . '.' . $display_name; last; } } } if($user_is_still_online) { push @online_users, $display_name; } } else { $display_name = $logged_ip; if($user_is_still_online) { push @online_users, $logged_ip; } } if( $logged_etime < ($global_current_etime - (3600*24)) ) { next; } if($lines_displayed < $lines_to_display) { my $hours_online = sprintf("%d", $logged_dur / 3600); my $seconds_leftover = $logged_dur % 3600; if($hours_online != 0) { my $minutes_online = sprintf("%02d", $seconds_leftover / 60); $logged_dur = "${hours_online}h${minutes_online}m"; } else { my $minutes_online = sprintf("%d", $seconds_leftover / 60); $logged_dur = "${minutes_online}m"; } for($logged_ua) { s//>/g; s/"/"/g; } my $logged_user_res_display = $logged_user_res eq 'Other' ? '' : $logged_user_res; $ua_display = get_ua($logged_ua); if( ($ua_display =~ /unknown/i) && ($ENV{'QUERY_STRING'} =~ /mode=full/) ) { if($logged_ua eq 'Unknown') { $ua_display = 'Spoofed'; } else { ($ua_display) = ($logged_ua =~ /^(.{1,12})/); $ua_display .= '...'; } $ua_display = qq`$ua_display`; } if($user_is_still_online) { $online_user_start_tag = ''; $online_user_end_tag = ''; $chat_link = qq`[c]` if $PREF{'enable_chat_system'} =~ /yes/i; } else { $online_user_start_tag = ''; $online_user_end_tag = ''; } my $logged_ref_display = get_search_terms_from_ref($logged_ref); $logged_ref_display =~ s/"/"/g; $logged_ref_display =~ s/^(.{60}).*/$1\.\.\./ if length($logged_ref_display) > 60; $logged_ref = '' if $logged_ref eq 'DirectHit'; s/&/&/g for ($logged_ref, $logged_ref_display); $logged_ref_display =~ s//>/g; my $logged_ref_link = $logged_ref ? qq`$logged_ref_display` : $logged_ref_display; next unless $logged_datestring8 == $datestring8; my $logged_uri_display = $logged_uri; if(length($logged_uri_display) > 30) { if($logged_uri_display =~ /\?(.+)/) { $logged_uri_display = '...' . $1; } # If it's still too long, truncate it. $logged_uri_display =~ s/^(.{27}).*/$1\.\.\./ if length($logged_uri_display) > 30; } if($qs =~ /mode=full/) { my ($day_of_visit,$time_of_visit) = ($logged_prettytime =~ /(\w\w\w\d\d),\d\d\d\d,(\d\d:\d\d(a|p)m)$/); # Can pass ip=xx.yyy.z or host=whatever (whole or partial IP/host) to only show hits matching that IP/host: my $line_matches_search_criteria = 1; if($qs =~ /action=search/ && $qs =~ /(s_(?:ip|host|name|date|uri|res|browser|os|ref))=([^&]+?)(?:&|$)/) { my @pairs = split(/&/, $qs); foreach my $pair (@pairs) { my ($field,$value) = split(/=/, $pair, 2); $field =~ s/^s_//; next unless $value && $field =~ /^(ip|host|name|date|uri|res|browser|os|ref)$/; $value =~ s/%7c/|/gi; my @values = split(/\|/, $value); my $this_field_matches = 0; foreach my $searchterm (@values) { my $match_negatively = 0; if($searchterm =~ /^-/) { $match_negatively = 1; $searchterm =~ s/^-+//; } if( (($field eq 'ip') && ($logged_ip =~ /$searchterm/)) || (($field eq 'host') && ($logged_host =~ /$searchterm/i)) || (($field eq 'name') && ($display_name =~ /$searchterm/i)) || (($field eq 'date') && ($day_of_visit =~ /$searchterm/i)) || (($field eq 'uri') && ($logged_uri =~ /$searchterm/i)) || (($field eq 'res') && ($logged_user_res =~ /$searchterm/i)) || (($field eq 'browser') && ($ua_display =~ /$searchterm/i)) || (($field eq 'os') && ($ua_display =~ /$searchterm/i)) || ( ($field eq 'ref') && (($logged_ref =~ /$searchterm/i) || ($logged_ref_display =~ /$searchterm/i)) ) ) { if($match_negatively) { # user typed -foo but the field actually did contain foo, so # this is NOT a match. nothing to do since $this_field_matches # starts out as null. if @values contains more terms, continue # checking them. } else { $this_field_matches = 1; last; } } else { if($match_negatively) { # this field DIDN'T contain the search term, which is what the # user requested via -foo, so it IS a match. $this_field_matches = 1; last; } } } #print STDERR "this_field_matches=$this_field_matches\n"; $line_matches_search_criteria = 0 unless $this_field_matches; } } elsif($qs =~ /(?:^|&)action=showonline(?:&|$)/) { $line_matches_search_criteria = $user_is_still_online; } if($line_matches_search_criteria) { my $row = (($lines_displayed % 2) == 0) ? 'even' : 'odd'; push @output_full, qq``; if($in_delete_mode) { my ($safe_username, $safe_res, $safe_ua) = ($logged_site_username, $logged_user_res, $logged_ua); s/"/.A.CONVERTED.QUOTE./g for ($safe_username, $safe_res, $safe_ua); my $checkbox = qq``; push (@output_full, "$checkbox"); } elsif($PREF{'enable_chat_system'} =~ /yes/i) { push (@output_full, "$chat_link"); } push @output_full, "$online_user_start_tag$display_name$online_user_end_tag$logged_dur" . "$time_of_visit$day_of_visit$logged_uri_display" . "$logged_ip$ua_display" . " \n"; $lines_displayed++; } } else { if( $hidedups && ($already_displayed{"$logged_ip-$display_name"} || $already_displayed{"spider-$display_name"}) ) { next; } else { # hide any QS from the public: $logged_uri =~ s/\?.*$//; my $link_title = "$logged_dur $ua_display $logged_uri $logged_ref_display"; $link_title =~ s/"/"/g; my $displaying_this_visitor = 0; my $recent_visitor_string = qq`$online_user_start_tag$display_name$online_user_end_tag
\n`; my $online_visitor_string = qq`$online_user_start_tag$display_name$online_user_end_tag, \n`; if($PREF{'show_online_or_recent'} =~ /recent/i) { push @output_recent, $recent_visitor_string; $displaying_this_visitor = 1; } elsif($PREF{'show_online_or_recent'} =~ /online/i && $user_is_still_online) { push @output_online, $online_visitor_string; $displaying_this_visitor = 1; } elsif($PREF{'show_online_or_recent'} =~ /both/i) { # only push the first $PREF{'num_recent_visitors_to_display'} visitors into # the @output_recent list, because in "both" mode, we'll have to go much # further into the log to get all the online visitors too. push (@output_recent, $recent_visitor_string) if ($#output_recent + 1) < $PREF{'num_recent_visitors_to_display'}; push (@output_online, $online_visitor_string) if $user_is_still_online; # "both" means keep track of both recent and online visitors, and if the number # of online visitors exceeds our threshold, then display it. since it is always # true that recent_visitors >= online_visitors (because for our purposes here, # "recent" just means "those currently in the log"), that means that while in # "both" mode, we must only increment our $displaying_this_visitor variable if # the user is still online. $displaying_this_visitor = 1 if $user_is_still_online; } if($displaying_this_visitor) { $lines_displayed++; } # It seems like this should be inside the if($displaying_this_visitor) { } block # above, but it shouldn't, because that block is only used to keep track of the # *number* of visitors already displayed, which for "both" mode, means the number # of *online* visitors. So if we put this inside that block, then any recent- # visitor list that we output will fail to have its dups removed. $already_displayed{"$logged_ip-$display_name"} = 1; if($logged_host =~ /($PREF{spider_hosts})/) { $already_displayed{"spider-$display_name"} = 1; } } } } } #$sth = $PREF{dbh}->prepare("SELECT COUNT(*) FROM $PREF{visitor_table_name} WHERE date LIKE '${today}%';"); $sth = $PREF{dbh}->prepare("SELECT COUNT(*) FROM $PREF{visitor_table_name} WHERE date8 = $datestring8;"); $sth->execute; my $num_visitors_today = $sth->fetchrow; if($qs =~ /(?:^|&)mode=full(?:&|$)/) { @output = @output_full; } elsif($lines_to_display != 0) { # Determine whether to display currently-online visitors, or recent visitors. if( ($PREF{'show_online_or_recent'} =~ /online/i) || ( $PREF{'show_online_or_recent'} =~ /both/i && ($num_visitors_online >= $PREF{'online_or_recent_both_threshold'}) ) ) { @output = @output_online; $output[$#output] =~ s/,\s*$//; } else { @output = @output_recent; } } if($qs =~ /mode=full/) { print qq`\n`; print qq`\n` unless $qs =~ /action=(search|delete)/; print qq`\n` . qq`\n$num_visitors_online/$num_visitors_today` . qq`\n` . qq`\n` . qq`\n` . qq`\n` . qq``; } print qq`
\n
\n" unless (($qs =~ /justcount|justonline/) || ($lines_to_display == 0)); unless($qs =~ /mode=full/) { print qq`` unless (($qs =~ /nohyperlink/) || ($lines_to_display == 0)); print qq`` unless (($qs =~ /justcount|justonline/) || ($lines_to_display == 0)); print qq`Online $num_visitors_online / Today $num_visitors_today` unless (($qs =~ /justcount|justonline/) || ($lines_to_display == 0)); print qq`Online:   ` . (join '   ', @online_users) if (($qs =~ /justonline/) && ($lines_to_display != 0)); print qq`` unless (($qs =~ /justcount|justonline/) || ($lines_to_display == 0)); print qq`` unless (($qs =~ /nohyperlink/) || ($lines_to_display == 0)); print qq`\n`; } # close the "$PREF{vlog_div_name}" div. print qq`
\n` unless (($qs =~ /justcount|justonline/) || ($lines_to_display == 0)); if($qs =~ /mode=full/) { print "\n\n"; } } # end of sub show_output(). sub print_search_help() { print "Content-type: text/html\n\n"; print qq`\n\nEncodable VisitorLog: Help With Searching` . qq`\n\n\n` . qq`\n

Help With Searching

` . qq`\n` . qq`\n

This search feature tries to be as intuitive as possible, and/therefore as Google-like as possible.

` . qq`\n

It uses case-insensitive partial matching: for example, "jan" in the Date: field will match Jan01-Jan31, "jan2" will match Jan20-Jan29, etc.

` . qq`\n

You can use the pipe (vertical bar) character as an OR operator: for example, "comcast|aol" in the Hostname: field will match both Comcast users and AOL users.

` . qq`\n

You can also prefix a term with a dash to exclude matches: for example, "-aol" in the Hostname: field will match anyone not from an AOL hostname.

` . qq`\n

Currently the search only considers the last ` . ($PREF{'min_num_days_in_current_log'} + $PREF{'num_days_per_archive'}) . qq` days' worth of visitors; this will be updated to search through all visitors in the next release.

` . qq`\n\n\n` . qq`\n`; } sub create_random_int() { # Create a random integer by concatenating 5 sources of pseudo-random (or at least, # different for different clients at different times) numbers. # Note: make sure to concatenate here, not multiply, since you end up with a very # large integer that's virtually certain to not be prime, which means multiple # different values of the individual sources could result in the same overall # large random int. my $int = (time() + $PREF{time_offset}) . $ENV{REMOTE_ADDR} . $$ . $ENV{REMOTE_PORT} . rand() . $ENV{HTTP_USER_AGENT}; $int =~ s/\D//g; # remove any non-digits. $int =~ s/^(.{1,85}).+?/$1/; # limit it to 85 digits or fewer. return $int; } sub get_new_site_id() { return create_random_int(); } sub load_prefs() { # Pre-init stuff. # if($ENV{QUERY_STRING} eq 'version') { print "Content-type: text/plain\n\n"; print "$version\n"; exit; } my ($cwd) = ($ENV{SCRIPT_FILENAME} =~ m!^(.+)/.*?$!); unless($cwd) { $cwd = $ENV{PATH_TRANSLATED}; $cwd =~ s![^/\\]+$!!; } chdir $cwd; $PREF{on_page} = 'default'; $qs = $ENV{QUERY_STRING}; $PREF{internal_appname} = 'visitorlog'; # Fix the %ENV if necessary. # if(!$ENV{REQUEST_URI}) # IIS is crap. { $ENV{REQUEST_URI} = $ENV{PATH_INFO}; $ENV{REQUEST_URI} .= '?' . $qs if $qs; } $PREF{DOCROOT} = $ENV{DOCUMENT_ROOT} unless exists $PREF{DOCROOT}; if(!$PREF{DOCROOT}) { ($PREF{DOCROOT}) = ($ENV{SCRIPT_FILENAME} =~ m!^(.+)$ENV{SCRIPT_NAME}$!i); if(!$PREF{DOCROOT}) { # try to fix IIS garbage. my $path_translated = $ENV{PATH_TRANSLATED}; $path_translated =~ s!\\\\!/!g; $path_translated =~ s!\\!/!g; ($PREF{DOCROOT}) = ($path_translated =~ m!^(.+)$ENV{PATH_INFO}$!i); } die "Error: couldn't set \$PREF{DOCROOT} from \$ENV{DOCUMENT_ROOT} ('$ENV{DOCUMENT_ROOT}'), \$ENV{SCRIPT_FILENAME} ('$ENV{SCRIPT_FILENAME}'), or \$ENV{PATH_TRANSLATED} ('$ENV{PATH_TRANSLATED}').\n" unless $PREF{DOCROOT}; } $PREF{DOCROOT} =~ s![/\\]+$!! unless $PREF{DOCROOT} =~ m!^[/\\]+$!; # remove trailing slashes. my $prefs_file = 'visitors_prefs.txt'; for($prefs_file, "$PREF{DOCROOT}/cgi-bin/$prefs_file", "$PREF{DOCROOT}/../cgi-bin/$prefs_file") { if(-e $_) { $prefs_file = $_; my $prefs_contents = (); open(IN,"<$prefs_file") or die_nice("$PREF{internal_appname}: couldn't open prefs file '$prefs_file': $!"); flock IN, 1; seek IN, 0, 0; while() { $prefs_contents .= $_; } close IN or die_nice("$PREF{internal_appname}: couldn't close prefs file '$prefs_file': $!"); $prefs_contents =~ /(.*)/s; $prefs_contents = $1; # cheap untaint since this is our own config file. eval $prefs_contents; die_nice("Error processing your prefs file: $@") if $@; last; } } $PREF{userbase_data_dir} = '/cgi-bin/userbase/data' unless exists $PREF{userbase_data_dir}; $PREF{userbase_data_dir_is_in_docroot} = 'yes' unless exists $PREF{userbase_data_dir_is_in_docroot}; $ENV{REMOTE_HOST} = $ENV{REMOTE_ADDR}; $PREF{protoprefix} = $ENV{SERVER_PORT} =~ /443/ ? 'https://' : 'http://'; $PREF{site_id_cookie} = 'enc_id'; $PREF{time_offset} = $PREF{time_offset} * 3600 if $PREF{time_offset} =~ /^-?\d+$/; $PREF{'vlog_url_short'} = '/visitors/' unless exists $PREF{'vlog_url_short'}; $PREF{'disable_status_bar_output'} = 'no' unless exists $PREF{'disable_status_bar_output'}; $PREF{vlog_div_name} = 'vlogoutput' unless exists $PREF{vlog_div_name}; $PREF{'show_online_or_recent'} = 'recent' unless $PREF{'show_online_or_recent'} =~ /^(?:online|recent|both)$/; $PREF{'num_recent_visitors_to_display'} = 8 unless $PREF{'num_recent_visitors_to_display'} =~ /^\d+$/; $PREF{'num_online_visitors_to_display'} = 20 unless $PREF{'num_online_visitors_to_display'} =~ /^\d+$/; $PREF{'online_or_recent_both_threshold'} = 5 unless $PREF{'online_or_recent_both_threshold'} =~ /^\d+$/; $PREF{visitor_table_name} = 'visitors' unless exists $PREF{visitor_table_name}; $PREF{abbreviate_hostnames_longer_than} = 35 unless $PREF{abbreviate_hostnames_longer_than} =~ /^\d+$/; $PREF{approx_daily_visitors} = 500 unless $PREF{approx_daily_visitors} =~ /^\d+$/; $PREF{archive_counts_table} = "visitor_archive_counts" unless exists $PREF{archive_counts_table}; $PREF{num_days_login_lasts} = 15 unless exists $PREF{num_days_login_lasts}; $PREF{brief_greeting} = 'Hello' unless exists $PREF{brief_greeting}; $PREF{'stats_url_short'} = "/stats/" unless exists $PREF{'stats_url_short'}; $PREF{'show_most_simultaneous_visitors'} = 'no' unless exists $PREF{'show_most_simultaneous_visitors'}; $PREF{'stats_are_private'} = 'no' unless exists $PREF{'stats_are_private'}; $PREF{'lowest_percent_to_show'} = 2 unless exists $PREF{'lowest_percent_to_show'} && $PREF{'lowest_percent_to_show'} =~ /^\d+$/; $ENV{HTTP_REFERER} = (); $PREF{'lowest_percent_to_show'} = 0 if $ENV{'QUERY_STRING'} =~ /(?:^|&)showtiny(?:$|&)/; $PREF{'recent_visitor_table_last'} = 'yes' unless exists $PREF{'recent_visitor_table_last'}; $PREF{today_8} = strftime("%Y%m%d",localtime(time + $PREF{time_offset})); $PREF{tmpfl1} = $PREF{tmpfls_are_in_docroot} =~ /yes/i ? $PREF{DOCROOT} . $PREF{tmpfl1} : $PREF{tmpfl1}; $PREF{tmpfl2} = $PREF{tmpfls_are_in_docroot} =~ /yes/i ? $PREF{DOCROOT} . $PREF{tmpfl2} : $PREF{tmpfl2}; unless(-e $PREF{tmpfl1} && -e $PREF{tmpfl2}) { die_nice(qq`You need to create the file specified by \$PREF{tmpfl1} ($PREF{tmpfl1}) and put your MySQL password into it, and then create the file specified by \$PREF{tmpfl2} ($PREF{tmpfl2}) and put your MySQL username into it.`); } $PREF{vlog_data_dir} = $PREF{vlog_data_dir_is_in_docroot} =~ /yes/i ? $PREF{DOCROOT} . $PREF{vlog_data_dir} : $PREF{vlog_data_dir}; if(! -d $PREF{vlog_data_dir}) { die_nice("Error: your settings for \$PREF{vlog_data_dir} and \$PREF{vlog_data_dir_is_in_docroot} \nresult in \$PREF{vlog_data_dir} being set to '$PREF{vlog_data_dir}', \nbut that path does not exist.\n"); } die_nice("Error: the directory \$PREF{vlog_data_dir} ($PREF{vlog_data_dir}) must be world-readable, but it isn't.\n") if ! -r $PREF{vlog_data_dir}; die_nice("Error: the directory \$PREF{vlog_data_dir} ($PREF{vlog_data_dir}) must be world-writable, but it isn't.\n") if ! -w $PREF{vlog_data_dir}; if( ((my $mode = sprintf "%04o", ((stat( "$PREF{vlog_data_dir}" ))[2] & 07777)) ne '0777') && ($PREF{ignore_chmod_errors} !~ /yes/i) ) { die_nice( qq`Error: the directory \$PREF{vlog_data_dir} ($PREF{vlog_data_dir}) must be chmodded 0777, but it's currently $mode.` . qq`\nIn rare cases, some servers may not report 0777 even though the folder is chmodded correctly.` . qq`\nIf you're SURE you've chmodded it to 0777 (a+rwx, or "world-readable, -writable, and -executable"),` . qq`\nthen add \$PREF{ignore_chmod_errors} = 'yes'; near the top of this script and try again.\n`); } $PREF{stats_cache_dir} = $PREF{vlog_data_dir} . '/statscache' unless exists $PREF{stats_cache_dir}; create_dir_if_DNE($PREF{stats_cache_dir}, 0777); die_nice("Error: the directory \$PREF{stats_cache_dir} ($PREF{stats_cache_dir}) must be world-readable, but it isn't.\n") if ! -r $PREF{stats_cache_dir}; die_nice("Error: the directory \$PREF{stats_cache_dir} ($PREF{stats_cache_dir}) must be world-writable, but it isn't.\n") if ! -w $PREF{stats_cache_dir}; # For switch to UserBase, we're putting these in each script's load_prefs(). $PREF{site_username_cookie} = 'site_username' unless exists $PREF{site_username_cookie}; $PREF{site_password_cookie} = 'site_password' unless exists $PREF{site_password_cookie}; $PREF{site_emailaddy_cookie} = 'site_emailaddy' unless exists $PREF{site_emailaddy_cookie}; expand_custom_vars_in_prefs(\%PREF); } sub die_nice { my $msg = shift; print "Content-type: text/html\n\n"; print $msg; exit; } sub create_vlog_table_if_DNE { my $table = (); my $vlog_table_exists = 0; my $sth = $dbh->prepare(qq`show tables;`); $sth->execute(); $sth->bind_columns(\$table); while($sth->fetchrow_arrayref) { if($table eq $PREF{visitor_table_name}) { $vlog_table_exists = 1; last; } } if( ! $vlog_table_exists) { print STDERR "$0: table $PREF{visitor_table_name} does not exist; attempting to create it now.\n"; my $statement = qq`CREATE TABLE $PREF{visitor_table_name} (` . qq`serial_id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY, ` . qq`etime INT UNSIGNED NOT NULL, ` . qq`date CHAR(21) NOT NULL, ` . qq`id VARCHAR(85) NOT NULL, ` . qq`cook_en CHAR(3) NOT NULL, ` . qq`theme VARCHAR(50) NOT NULL, ` . qq`ip VARCHAR(40) NOT NULL, ` . qq`host VARCHAR(150) NOT NULL, ` . qq`name VARCHAR(70) NOT NULL, ` . qq`res VARCHAR(16) NOT NULL, ` . qq`dur INT UNSIGNED NOT NULL, ` . qq`uri TEXT NOT NULL, ` . qq`ref TEXT, ` . qq`ua TEXT NOT NULL, ` . qq`date8 INT UNSIGNED NOT NULL ` . qq`);`; $sth = $dbh->prepare($statement); $sth->execute() or die_nice("$0: couldn't create table '$PREF{visitor_table_name}': $DBI::errstr\n"); $statement = qq`alter table $PREF{visitor_table_name} add index (etime);`; $sth = $dbh->prepare($statement); $sth->execute() or die_nice("$0: couldn't add index to etime column on table '$PREF{visitor_table_name}': $DBI::errstr\n"); $statement = qq`alter table $PREF{visitor_table_name} add index (date8);`; $sth = $dbh->prepare($statement); $sth->execute() or die_nice("$0: couldn't add index to date8 column on table '$PREF{visitor_table_name}': $DBI::errstr\n"); print STDERR "$0: created table $PREF{visitor_table_name} successfully.\n"; } } sub do_logging { my $new_visitor = 1; my ($IDs_match, $IPs_match, $UAs_match, $matching_fingerprints, @failed_fingerprints, @recent_visitors); my %fingerprints; $fingerprints{1} = 'ID'; $fingerprints{2} = 'IP'; $fingerprints{3} = 'UA'; my $end_of_short_window = $one_and_a_half_hours_ago; # $etime - 1800; # temporarily set to a half-hour. my $end_of_long_window = $one_day_ago; # $etime - (3600*8); # temporarily set to 8 hours. # If we were passed the visitor's resolution, then set it: my $user_res = 'Other'; if($qs =~ /res=(\d+x\d+)/) { $user_res = $1; } my $sth = $PREF{dbh}->prepare("SELECT * FROM $PREF{visitor_table_name} WHERE etime>'$end_of_long_window';"); $sth->execute(); $sth->bind_columns( \$serial_id, \$logged_etime, \$logged_prettytime, \$logged_site_id, \$logged_cookies_enabled, \$user_cookies, \$logged_ip, \$logged_host, \$logged_site_username, \$logged_user_res, \$logged_dur, \$logged_uri, \$logged_ref, \$logged_ua, \$logged_datestring8 ); while($sth->fetchrow_arrayref) { #($first_three_octets_of_logged_ip) = ($logged_ip =~ /^(\d+\.\d+\.\d+\.)/); $IDs_match = $IPs_match = $UAs_match = $matching_fingerprints = 0; @failed_fingerprints = (); if( (($logged_etime + $logged_dur) > $end_of_short_window) || ( ($logged_etime > $end_of_long_window) && (($logged_etime + $logged_dur) > ($global_current_etime - $online_watchdog_timeout)) ) ) { $IDs_match = ($logged_site_id eq $site_id); $IPs_match = ($logged_ip eq $ENV{'REMOTE_ADDR'}); $UAs_match = ($logged_ua eq $ENV{'HTTP_USER_AGENT'}); my $count = 0; for($IDs_match, $IPs_match, $UAs_match) { $count++; if($_ == 1) { $matching_fingerprints++; } else { push @failed_fingerprints, "Failed to match $fingerprints{$count}.\n"; } } if(($matching_fingerprints >= 2) || is_a_spider_whose_ua_and_host_match_this_log_entry($ENV{HTTP_USER_AGENT}, $ENV{REMOTE_ADDR}, $logged_ua, $logged_host)) { $new_visitor = 0; $logged_dur = $global_current_etime - $logged_etime; $logged_dur = 30 if $logged_dur < 30; # our system only starts counting after 1 minute, but since this script runs twice in a row right away, most visitors who stay less than a minute show up as "0" seconds. so assume that 30 seconds is a reasonable rough average for these. unless($ENV{REQUEST_URI} =~ /(favicon.ico$|\.css$|\.js$|$ENV{SCRIPT_NAME})/i) { $logged_uri = $ENV{REQUEST_URI}; } if( ($logged_user_res eq 'Other') && ($user_res =~ /\d+x\d+/) ) { $logged_user_res = $user_res; } my $sth = $PREF{dbh}->prepare( qq`UPDATE $PREF{visitor_table_name} ` . qq`SET id='$site_id', cook_en='$cookies_enabled', theme='$theme_cookie_value', ip='$ENV{REMOTE_ADDR}', host='$ENV{REMOTE_ADDR}', name='$site_username', res='$logged_user_res', dur='$logged_dur', uri='$logged_uri' ` . qq`WHERE serial_id='$serial_id'` # serial_id is the table's primary key and therefore is unique. it's also auto-incrementing. . qq`;` ); $sth->execute; last; } } else { unless($logged_etime > $end_of_long_window) { last; } } } if($new_visitor) { my $user_ua = $ENV{HTTP_USER_AGENT} =~ /\S/ ? $ENV{HTTP_USER_AGENT} : 'Unknown'; my $user_uri = $ENV{REQUEST_URI}; my $ignore_this_visitor = 0; my $num_matched_criteria = 0; foreach my $visitor_to_ignore (keys %{$PREF{ignore_visitor}}) { my $num_criteria = scalar keys %{$PREF{ignore_visitor}{$visitor_to_ignore}}; my $matching_criteria = 0; foreach my $criteria (keys %{$PREF{ignore_visitor}{$visitor_to_ignore}}) { if($criteria eq 'ip' && $ENV{REMOTE_ADDR} =~ /$PREF{ignore_visitor}{$visitor_to_ignore}{$criteria}/i) { $matching_criteria++; } elsif($criteria eq 'hostname' && $ENV{REMOTE_ADDR} =~ /$PREF{ignore_visitor}{$visitor_to_ignore}{$criteria}/i) { $matching_criteria++; } elsif($criteria eq 'browser' && $user_ua =~ /$PREF{ignore_visitor}{$visitor_to_ignore}{$criteria}/i) { $matching_criteria++; } elsif($criteria eq 'os' && $user_ua =~ /$PREF{ignore_visitor}{$visitor_to_ignore}{$criteria}/i) { $matching_criteria++; } } if($matching_criteria == $num_criteria) { $ignore_this_visitor = 1; $num_matched_criteria = $matching_criteria; last; } } if($ignore_this_visitor) { my $vlog_string = "$global_current_etime $prettytime $site_id $cookies_enabled $theme_cookie_value $ENV{REMOTE_ADDR} $ENV{REMOTE_ADDR} $site_username $user_res 30 $user_uri $user_ua\n"; print STDERR "Ignoring this visitor based on $num_matched_criteria matched criteria: $vlog_string"; } else { my $sth = $PREF{dbh}->prepare( qq`INSERT INTO $PREF{visitor_table_name} ` . qq`(etime,date,id,cook_en,theme,ip,host,name,res,dur,uri,ua,date8) ` . qq`VALUES('$global_current_etime', '$prettytime', '$site_id', '$cookies_enabled', '$theme_cookie_value', '$ENV{REMOTE_ADDR}', '$ENV{REMOTE_ADDR}', '$site_username', '$user_res', '30', '$user_uri', '$user_ua', '$datestring8') ` . qq`;` ); $sth->execute() or die "$0: $DBI::errstr\n"; } } # print this no matter what; it's hidden off-screen. print_server_headers(); print qq`\n
$site_id
\n\n`; if($give_client_a_new_id_cookie) { # Note that do_logging() (and therefore this if()) only happens if logging=none # is NOT passed in the QS. print_server_headers(); print qq`` . qq`\n`; } } sub get_total_hits { my $sth = $PREF{dbh}->prepare("SELECT COUNT(*) FROM $PREF{visitor_table_name};"); $sth->execute; my $hits_in_current_log = $sth->fetchrow; my $hits = $hits_in_current_log; return $hits; } # Get the past stats from the log containing them. #sub get_past_hits() #{ # # $past_hits is now a global var, so that if this function # # is called more than once, we don't have to actually open # # the file each time. # # if($past_hits == 0) # { # create_file_if_DNE($visitorcounts_log,0666); # open(IN,"<$visitorcounts_log") or die "$0: couldn't open $visitorcounts_log: $!\n"; # flock IN, 1; # seek IN, 0, 0; # while() # { # if(/^\d{8}-\d{8}::\d+-\d+::(\d+)$/) { $past_hits += $1; } # } # close IN or die "$0: couldn't close $visitorcounts_log: $!\n"; # } # # return $past_hits; #} sub db_table_exists($$) { my $dbh = shift; my $tablename = shift; #my $sth = $dbh->prepare("SELECT * FROM $tablename"); #my $rv = $sth->execute; # if table doesn't exist, then undef is returned. #return ($rv eq undef) ? 0 : 1; my @alltables = $dbh->tables(); # or just use SHOW TABLES with a normal prepare/execute... die "$0: couldn't get table names\n" unless @alltables; my $exists = 0; foreach my $table (@alltables) { $table =~ s/[\`'"]//g; # because $dbh->tables() returns the table-names quoted with backticks. if($table eq $tablename) { $exists = 1; last; } } return $exists; } #sub get_logfile_name() #{ # my $dir = $logdir; # create_dir_if_DNE($dir,0777); # # my $file = "$dir/vlogfile_current.log"; # create_file_if_DNE($file,0666); # # return $file; #} sub confirm_delete() { my $query = new CGI; my $password = $query->param('password'); print "Content-type: text/html\n\n"; # If we're integrating with the site's existing login system, we have no way to # verify the login credentials, so we can't use this check. # unless($PREF{integrate_with_existing_login_system} =~ /yes/i) { unless(account_validates($logged_in_username, $password, 'admin')) { print qq`

Authorization failed.  Please try again.

\n`; return; } } my (%successful_deletions, %failed_deletions, @new_contents) = (); foreach my $visitor_to_delete ($query->param) { next if $visitor_to_delete eq 'password'; $failed_deletions{$visitor_to_delete} = 1; } print qq`\n\n`; foreach my $visitor_to_delete ($query->param) { next if $visitor_to_delete eq 'password'; my ($etime, $id, $name, $res, $ua) = split(/\.ENC\|SEP\|STRING\./, $visitor_to_delete); s/\.A\.CONVERTED\.QUOTE\./"/g for ($name, $res, $ua); my $sth = $PREF{dbh}->prepare("DELETE FROM $PREF{visitor_table_name} WHERE etime='$etime' AND id='$id' AND name='$name' AND res='$res' AND ua='$ua';"); if(my $retval = $sth->execute()) { unless($retval =~ /^(0|0E0)$/) # execute() returns '0E0' if no rows were affected by the statement. { delete $failed_deletions{$visitor_to_delete}; #$successful_deletions{$visitor_to_delete} = "$logged_prettytime, $logged_ip, $logged_host
$logged_site_username, $logged_user_res, $logged_ref, $unescaped_ua"; $successful_deletions{$visitor_to_delete} = "$name, $id, $etime
$res, $ua"; # store the log line for display. } } } if(%failed_deletions) { foreach my $key (keys %failed_deletions) { print qq`

Failed to delete the visitor identified by:
"$key"

\n`; } } if(%successful_deletions) { foreach my $key (keys %successful_deletions) { print qq`

Successfully deleted the visitor identified by:
$successful_deletions{$key}"

\n`; } } print qq`

When you are finished with all your deletions, make sure to click the re-create links at the bottom of the stats page, to bring your stats up to date.

\n`; print qq`

Home  |  VisitorLog  |  Search  |  Delete More

\n`; print qq`\n`; } sub get_name() { # So we can return them to the correct page: my $ref = $ENV{HTTP_REFERER}; use CGI qw(:param); my $name = param('name'); # This way the original (real) ref gets preserved even # if this script is called more than once: if(!($ref) || ($ref =~ /$ENV{SCRIPT_NAME}/)) { $ref = param('ref'); } if( ($ENV{'REQUEST_METHOD'} =~ /^post$/i) && $name ) { my @cursearray = ("fuck", "shit", "damn", "bastard", "bitch", "\bass\b"); foreach $naughtyword (@cursearray) { $name =~ s/$naughtyword/[oops]/gi; } # If they enter a name that exists as a login for our system, prompt for the password. if(membername_is_taken($name) || adminname_is_taken($name)) { print "Content-type: text/html\n\n"; print qq`\nAuthenticate yourself.`; print qq`\n

That name is reserved.
You must provide the password.

\n`; print qq`
`; print qq`
`; print qq`\n`; print qq`
\n`; print qq`
\n`; print qq`Keep me logged in for\n`; print qq`
a few days (if unchecked,`; print qq`
you get logged out when`; print qq`
the browser is closed)`; print qq`

`; print qq``; print qq`
`; print qq`
\n`; print qq`

Or, go back and enter a different name.

`; print qq`\n`; exit; } else { if(username_is_illegal($name)) { ##### Warning: early return ############################################################# print "Content-type: text/html\n\n"; my $error = get_invalid_name_error_message(); print qq`

$error

\n

Please go back and try again.

\n\n\n`; exit; } else { sanitize_username($name); my $c = new CGI::Cookie( -name => $PREF{'site_username_cookie'}, -value => $name, -expires => '+6M'); ##### Warning: early return ############################################################# print "Set-Cookie: $c\n"; print "Location: $ref\n\n"; } } } elsif( ($ENV{'REQUEST_METHOD'} =~ /^post$/) && !$name ) { my $c = new CGI::Cookie( -name => $PREF{'site_username_cookie'}, -value => '', -expires => '-1M'); ##### Warning: early return ############################################################# print "Set-Cookie: $c\n"; print "Location: $ref\n\n"; } use CGI qw/:standard/; use CGI::Cookie; my %all_cookies = fetch CGI::Cookie; $name = (); if(exists $all_cookies{$PREF{'site_username_cookie'}}) { $name = $all_cookies{$PREF{'site_username_cookie'}}->value; } print "Content-type: text/html\n\n"; # If they already have a cookie set, then don't output anything: if($name) { exit; } else { # If no cookie is set, greet them and ask them their name: print qq`\n
` . qq`\n` . qq`\n
` . qq`\n`; exit; } } # Pass an item (like "foo=bar") that you want to be added to the existing # qs; this function will remove any existing "foo=*", add your new one, # and return the new qs. You can pass as many items as you want. # sub make_new_qs { my $new_qs = $qs; for(@_) { my $to_add = $_; my ($remove_from_existing_qs) = ($to_add =~ /^(\w+)=.*/); $new_qs =~ s/(?:^|&)$remove_from_existing_qs=.*?(?:&|$)/&/g; } for(@_) { $new_qs .= "&$_"; } # remove duplicate ampersands. $new_qs =~ s/&+/&/g; # remove dups. my %items = map { $_ => 1 } split(/&/, $new_qs); $new_qs = (); foreach my $item (keys %items) { $new_qs .= "&$item"; } # remove style=plain&mode=full if it's already there in the SSI/PHP include. if($ENV{REQUEST_URI} =~ /$PREF{vlog_url_short}/) { #$new_qs =~ s/(?:^|&)style=.+?(?:&|$)/&/g; #$new_qs =~ s/(?:^|&)mode=full(?:&|$)/&/g; } # remove duplicate ampersands again, and leading/trailing ones. $new_qs =~ s/&+/&/g; $new_qs =~ s/^&//; $new_qs =~ s/&$//; return $new_qs; } sub get_ua { # Pass this function an HTTP_USER_AGENT string. Optionally, you can # pass a second argument to indicate the "mode" of the returned string # (full, short, etc.). my $ua = shift; my $mode = shift; my($os_make, $os_ver, $browser_make, $browser_ver, $browser_extra, $wholething); return '(null)' unless defined $ua; $os_make = 'Unknown'; if($ua =~ /Windows/i) { $os_make = 'Win'; } if($ua =~ /Win98/i) { $os_make = 'Win'; } # Note: some UAs contain both "Darwin Power Macintosh" and # "X11" so we must check for x11 before checking for mac. if($ua =~ /x11/i) { $os_make = 'Unix'; } if($ua =~ /mac/i) { $os_make = 'Mac'; } if($ua =~ /linux/i) { $os_make = 'Linux'; } if($ua =~ /solaris/i) { $os_make = 'Solaris'; } if($ua =~ /(Free|Open)BSD/i) { $os_make = 'BSD'; } $browser_make = 'Unknown'; if($ua =~ /Gecko/i) { $browser_make = 'Mozilla'; } if($ua =~ /Mozilla/i) { $browser_make = 'Mozilla'; } if($ua =~ /Firefox/i) { $browser_make = 'Firefox'; } # Firefox must come after Mozilla since Firefox's UA contains "Mozilla" if($ua =~ /Konqueror/i) { $browser_make = 'Konqueror'; } # Note: Netscape v6+ string will contain Gecko too, but as long # as we set Mozilla first, it will end up correct here. if($ua =~ /Netscape/i) { $browser_make = 'Netscape'; } if($ua =~ /MSIE/i) { $browser_make = 'IE'; } # Note: Opera has to come AFTER MSIE because it also has MSIE in its tag: if($ua =~ /Opera/i) { $browser_make = 'Opera'; } if($ua =~ /Lynx/i) { $browser_make = 'Lynx'; } if($ua =~ /Galeon/i) { $browser_make = 'Galeon'; } if($ua =~ /Safari/i) { $browser_make = 'Safari'; } if($ua =~ /ZyBorg/i) { $browser_make = 'ZyBorg'; $os_make = 'Spider'; } if($ua =~ /msnbot/i) { $browser_make = 'MSNBot'; $os_make = 'Spider'; } if($ua =~ /googlebot/i) { $browser_make = 'GoogleBot'; $os_make = 'Spider'; } if($ua =~ /Ask ?Jeeves/i) { $browser_make = 'AskJeevesBot'; $os_make = 'Spider'; } if($ua =~ /Teoma/i) { $browser_make = 'TeomaBot'; $os_make = 'Spider'; } if($ua =~ /ia_archiver/i) { $browser_make = 'AlexaBot'; $os_make = 'Spider'; } if($ua =~ /yahoo.+?slurp/i) { $browser_make = 'YahooBot'; $os_make = 'Spider'; } if($ua =~ /appie/i) { $browser_make = 'AppieBot'; $os_make = 'Spider'; } if($ua =~ /gigabot/i) { $browser_make = 'Gigabot'; $os_make = 'Spider'; } $os_ver = 'v?'; if($ua =~ /Windows/i) { $os_ver = 'v?'; } if($ua =~ /Windows 95/i) { $os_ver = '95'; } if($ua =~ /Windows 98/i) { $os_ver = '98'; } if($ua =~ /Win95/i) { $os_ver = '95'; } if($ua =~ /Win98/i) { $os_ver = '98'; } if($ua =~ /Win 9x 4.90/i) { $os_ver = 'ME'; } if($ua =~ /Windows ME/i) { $os_ver = 'ME'; } if($ua =~ /Windows NT/i) { $os_ver = 'NT'; } if($ua =~ /WinNT/i) { $os_ver = 'NT'; } if($ua =~ /Windows NT 4.0/i) { $os_ver = 'NT4'; } if($ua =~ /Windows NT 5.0/i) { $os_ver = '2000'; } if($ua =~ /Windows 2000/i) { $os_ver = '2000'; } if($ua =~ /Windows NT 5.1/i) { $os_ver = 'XP'; } if($ua =~ /Windows XP/i) { $os_ver = 'XP'; } if($ua =~ /Windows NT 5.2/i) { $os_ver = '2003'; } if($ua =~ /mac_powerpc/i) { $os_ver = 'PowerPC'; } if($ua =~ /mac.*ppc/i) { $os_ver = 'PowerPC'; } if($ua =~ /mac.*68k/i) { $os_ver = '68k'; } if($ua =~ /mac.*(os x|osx)/i) { $os_ver = 'OSX'; } $browser_ver = 'v?'; # Note that Mozilla must come first, since it's a browser, AND a component of # other browsers, and thus it shows up in the useragent string of other browsers. if($ua =~ /mozilla/i) { $browser_ver = 'v?'; } #if($ua =~ /mozilla\/?(\S+)(;| )/i) { $browser_ver = $1; } if($ua =~ /mozilla.+rv:(.+?)\)/i) { $browser_ver = $1; } if($ua =~ /Firefox\/(\d+\.\d+)/i) { $browser_ver = $1; } if($ua =~ /Netscape/i) { $browser_ver = 'v?'; } if($ua =~ /netscape\d?\/?(\S+);?/i) { $browser_ver = $1; } if($ua =~ /msie/i) { $browser_ver = 'v?'; } if($ua =~ /msie\/? ?(\S+);/i) { $browser_ver = $1; } # Opera needs to be checked AFTER MSIE here (since these just IFs, not IF-ELSEs) # because Opera lies and also claims to be MSIE. if($ua =~ /Opera/i) { $browser_ver = 'v?'; } if($ua =~ /Opera\/? ?(\S+);?/i) { $browser_ver = $1; } if($ua =~ /Konqueror/i) { $browser_ver = 'v?'; } if($ua =~ /Konqueror\/? ?(\S+)(;|\))/i) { $browser_ver = $1; } if($ua =~ /Galeon/i) { $browser_ver = 'v?'; } if($ua =~ /Galeon\/? ?(\S+)(;| )/i) { $browser_ver = $1; } $browser_ver =~ s/\.0+$//; $browser_extra = ''; if($ua =~ /aol\/? ?(\S+);/i) { $browser_extra = "AOL $1"; $browser_extra =~ s/\.0+$//; $browser_extra = "($browser_extra)"; } if($mode eq 'full') { $wholething = "$browser_make $browser_ver $browser_extra / $os_make $os_ver"; $wholething =~ s/\s{2,}/ /g; } elsif($mode eq 'separate') { $browser_extra = 'null' unless $browser_extra; #s/ /%20/g for($browser_make, $browser_ver, $browser_extra, $os_make, $os_ver); #$wholething = "$browser_make $browser_ver $browser_extra $os_make $os_ver"; my $sep = ':::::'; $wholething = $browser_make . $sep . $browser_ver . $sep . $browser_extra . $sep . $os_make . $sep . $os_ver; $wholething =~ s/\s{2,}/ /g; } else { if($os_ver eq 'PowerPC') { $os_ver = 'PPC'; } if($browser_make eq 'Mozilla') { $browser_make = 'Moz'; } if($browser_make eq 'Firefox') { $browser_make = 'Ffox'; } if($browser_make eq 'Netscape') { $browser_make = 'NS'; } for($os_ver,$browser_ver) { $_ = '' if /\?/; } $wholething = "$browser_make$browser_ver$browser_extra/$os_make$os_ver"; $wholething =~ s/\s//g; } return $wholething; } # Pass this function a URL, that possibly is a search engine referrer URL. # If so, this function will attempt to parse out the search terms and the # engine name, and return them. If it can't, then it will just return # the same URL that was passed in. The idea is that either way, the text # string returned will be used as the text of a link. sub get_search_terms_from_ref($) { my $ref = shift; my ($engine, $ref2); if( $ref =~ /custom\? |default\.asp\? # search.lycos.com |director\.asp\? # click.lycos.com |dirsearch\.adp\? # aol and some others |ie\?q |iepane\? # altavista.com |jump\.php\? # abcsearch.com |linux\? |multisearch\.jsp\? # search.iwon.com |new-search\? # insider.com |q\? # de.altavista.com |query\? |\?query= # hotbot.lycos.com |query_(\w\w\|msie)? # msie for google.yahoo.com |results\.(adp|aspx?|html|pl)\? # adp for aol and some others, html for redhat.com, pl for bbc.co.uk |search(\.pl|\.php)?\? # php for search.cometsystems.com |web\? # altavista.com |websearch\? # attbi.com /xi ) { # Get the site name by splitting around "//" then "/": (undef, $engine) = split(/\/\//, $ref, 0); ($engine, undef) = split(/\//, $engine, 0); # Remove the www. if it exists: if($engine =~ /^www\./) { # Chop off the first 4 chars, "www.": $engine = substr($engine, 4); } # Remove the .com if it exists: if($engine =~ /\.com$/) { # Chop off the last 4 chars, ".com": $engine = substr($engine, 0, -4); } # The search engines usually use p= or q= for their queries: # Sometimes the google search string contains more than one of these xx_q= things, # and some are undefined. So we'll just have to try each one: (undef, $ref2) = split(/as_epq=/, $ref, 0); if( !($ref2) || (substr($ref2, 0, 1) eq '&') ) { (undef, $ref2) = split(/qry=/, $ref, 0); } # attbi.com if( !($ref2) || (substr($ref2, 0, 1) eq '&') ) { (undef, $ref2) = split(/terms=/, $ref, 0); } # abcsearch.com if( !($ref2) || (substr($ref2, 0, 1) eq '&') ) { (undef, $ref2) = split(/search=/, $ref, 0); } # insider.com if( !($ref2) || (substr($ref2, 0, 1) eq '&') ) { (undef, $ref2) = split(/searchfor=/, $ref, 0); } # search.iwon.com if( !($ref2) || (substr($ref2, 0, 1) eq '&') ) { (undef, $ref2) = split(/Keywords=/, $ref, 0); } # overture.com if( !($ref2) || (substr($ref2, 0, 1) eq '&') ) { (undef, $ref2) = split(/query_cb=/, $ref, 0); } # for redhat.com searches if( !($ref2) || (substr($ref2, 0, 1) eq '&') ) { (undef, $ref2) = split(/query=/, $ref, 0); } # for aol & some other searches if( !($ref2) || (substr($ref2, 0, 1) eq '&') ) { (undef, $ref2) = split(/as_q=/, $ref, 0); } if( !($ref2) || (substr($ref2, 0, 1) eq '&') ) { (undef, $ref2) = split(/[\/|&|\?][p|q|k]=/, $ref, 0); } # k= is for w.galaxy.com # Cut off any other variables on the URL: ($ref2, undef) = split(/&/, $ref2, 0); # Replace the +s with spaces: $ref2 =~ s/\+/ /g; # Decode the %FF hex encodings: $ref2 =~ s/%([A-Fa-f\d]{2})/chr hex $1/eg; # Escape any quotes in the string: $ref2 =~ s/"/"/gi; $ref = "$ref2 ($engine)"; } return $ref; } # This webspider-detection stuff is used to prevent spiders from getting logged # a bazillion times in a row, when a bunch of them from the same search-engine # (but with different IPs) hit your site simultaneously. Since they don't # accept cookies, and are coming from multiple machines at different IPs, they # correctly get logged as different visitors. However we don't want that # behavior for spiders, instead we'd like each visit from "a spider" to look # like a single visitor. So when the visitor matches either one of the UAs or # hosts listed, our code doesn't use the normal two-out-of-three IP/UA/visitor-ID # check to determine if it's the same visitor as another one in the log; instead # we just check that the UA matches, that the end-of-host/beginning-of-IP # matches, and that either the UA or host matches one of the following. sub is_a_spider_whose_ua_and_host_match_this_log_entry($$$$) { my ($ua, $host, $logged_ua, $logged_host) = @_; for($host, $logged_host) { if(/[a-zA-Z]/) { s/^(?:.+\.)?(.+\..+)$/$1/; # truncate it to just foo.com } else { s/\d+\.\d+$//; # truncate it to just the first 2 octets } } my $hosts_match = $host eq $logged_host ? 1 : 0; my $uas_match = $ua eq $logged_ua ? 1 : 0; # Some spiders not only spoof their UAs, but they constantly rotate them, so two # requests right in a row from the same spider have two different UAs. For hosts # that do that, the only thing we can match on is the host. Otherwise match on # the host *and* the UA. if( $hosts_match && ($host =~ /($PREF{ua_liars})/) ) { return 1; } elsif($hosts_match && $uas_match) { if($ua =~ /$PREF{spider_uas}/i || $host =~ /($PREF{spider_hosts})/) { return 1; } else { return 0; } } } sub populate_month_conversion_hashes() { $monthnum{"Jan"} = '01'; $monthnum{"Feb"} = '02'; $monthnum{"Mar"} = '03'; $monthnum{"Apr"} = '04'; $monthnum{"May"} = '05'; $monthnum{"Jun"} = '06'; $monthnum{"Jul"} = '07'; $monthnum{"Aug"} = '08'; $monthnum{"Sep"} = '09'; $monthnum{"Oct"} = '10'; $monthnum{"Nov"} = '11'; $monthnum{"Dec"} = '12'; $monthname{"01"} = 'Jan'; $monthname{"02"} = 'Feb'; $monthname{"03"} = 'Mar'; $monthname{"04"} = 'Apr'; $monthname{"05"} = 'May'; $monthname{"06"} = 'Jun'; $monthname{"07"} = 'Jul'; $monthname{"08"} = 'Aug'; $monthname{"09"} = 'Sep'; $monthname{"10"} = 'Oct'; $monthname{"11"} = 'Nov'; $monthname{"12"} = 'Dec'; } # vlog, fc sub make_password_hash { if($ENV{REQUEST_METHOD} =~ /post/i) { use Digest::MD5 'md5_hex'; use CGI ':param'; my $hashed_password = md5_hex(param('password')); start_html_output('Here is your hashed password...', 'css', 'js'); print qq`

The hashed version of the password you just entered is:

$hashed_password

` . qq`\n`; finish_html_output(); } else { start_html_output('Enter your new password', 'css', 'js'); print qq`
` . qq`\n

Enter your new password:

` . qq`\n` . qq`\n

` . qq`\n
` . qq`\n`; finish_html_output(); } } sub get_db_connection { # $dbh is going to be a global var at the top of each script, so that # we never have to make >1 db connection per script execution. so # here, we want to make sure it hasn't been set before we connect. unless($dbh) { open(my $infh,"<$PREF{tmpfl1}") or die "$0: couldn't open $PREF{tmpfl1} for reading: $!\n"; flock $infh, 1; seek $infh, 0, 0; my $this = <$infh>; close $infh or die "$0: couldn't close $PREF{tmpfl1}: $!\n"; open(my $infh,"<$PREF{tmpfl2}") or die "$0: couldn't open $PREF{tmpfl2} for reading: $!\n"; flock $infh, 1; seek $infh, 0, 0; my $that = <$infh>; close $infh or die "$0: couldn't close $PREF{tmpfl2}: $!\n"; chomp ($this,$that); $dbh = DBI->connect("dbi:mysql:$PREF{database_name}", $that, $this) or die "$0: $DBI::errstr\n"; } return $dbh; } # vlog, sub account_validates($$$) { my $username = shift; my $password = shift; my $type = shift; use Digest::MD5 'md5_hex'; my $crypted_password = md5_hex($password); if($PREF{integrate_with_userbase} =~ /yes/i) { my $hashref = (); if($type eq 'member') { $hashref = load_userlist('member'); } elsif($type eq 'admin') { $hashref = load_userlist('admin'); } else { die "$0: account_validates(): invalid account type passed.\n"; } return( exists($$hashref{lc($username)}) && ($$hashref{lc($username)}{'pw'} eq $crypted_password) ) } else { if($type eq 'member') { return $crypted_password eq $PREF{member_password_hash}; } elsif($type eq 'admin') { return $crypted_password eq $PREF{admin_password_hash}; } else { die "$0: account_validates(): invalid account type passed.\n"; } } } # vlog, sub sanitize_username($) { $_[0] =~ s/[^0-9A-Za-z\., -]//g; # no repeating punctuation $_[0] =~ s/\.{2,}/./g; $_[0] =~ s/,{2,}/,/g; $_[0] =~ s/ {2,}/ /g; $_[0] =~ s/-{2,}/-/g; # no leading/trailing spaces $_[0] =~ s/^ +//; $_[0] =~ s/ +$//; # must start with an alphanumeric $_[0] =~ s/^[^0-9A-Za-z]+//; # if somehow it's still invalid, reset it. $_[0] = 'Guest' unless $_[0] =~ /[0-9A-Za-z]/; } sub get_cookies() { use CGI ':standard'; use CGI::Cookie; my %cookies = fetch CGI::Cookie; return %cookies; } sub get_cookie($) { my $which = shift; my %jar = get_cookies(); my $value; if(exists $jar{$which}) { $value = $jar{$which}->value; } return $value; } sub set_cookie($$$) { my $name = shift; my $value = shift; my $expiry = shift; my $cookie; # This if/else is necessary because setting "expires" to "" isn't # the same as not setting it. Setting it to "" is the same as # setting it to zero, which expires the cookie immediately # (i.e., deletes it). But explicitly *not* setting the expiry # causes the cookie to persist until the end of the session. if($expiry eq "") { $cookie = new CGI::Cookie( -name => $name, -value => $value, -path => '/'); } else { $cookie = new CGI::Cookie( -name => $name, -value => $value, -expires => $expiry, -path => '/'); } if($PREF{output_started}) { warn "$0: warning: cannot set cookie '$name' => '$value' because the page output has already been started.\n"; } else { print "Set-Cookie: $cookie\n"; } } sub load_userlist($) { my $list = shift; my ($file, %userlist) = (); $PREF{userbase_data_dir} = ($PREF{userbase_data_dir_is_in_docroot} =~ /yes/i && $PREF{userbase_data_dir} !~ /^\Q$PREF{DOCROOT}\E/) ? $PREF{DOCROOT} . $PREF{userbase_data_dir} : $PREF{userbase_data_dir}; $PREF{admin_username_file} = $PREF{userbase_data_dir} . '/enc_admins.txt' unless exists $PREF{admin_username_file}; $PREF{member_username_file} = $PREF{userbase_data_dir} . '/enc_members.txt' unless exists $PREF{member_username_file}; if($list eq 'admin') { $file = $PREF{admin_username_file}; } if($list eq 'member') { $file = $PREF{member_username_file}; } open(my $infh,"<$file") or die "$0: couldn't open $file: $!\n"; flock $infh, 1; seek $infh, 0, 0; while(<$infh>) { # the format of each line is: # # username:hashed_pw:option1=foo:::::setting2=bar:::::prefN=baz(...) # chomp; next if /^\s*(#|$)/; my ($username, $crypted_actual_password, $options) = split(/:/, $_, 3); if($username && $crypted_actual_password) { $userlist{lc($username)}{pw} = $crypted_actual_password; foreach my $option (split(/:::::/, $options)) { if($option =~ /\s*(.+?)\s*=\s*(.+)\s*/) { if($1 eq 'pw') { die "$0: error: 'pw' is a reserved option/setting/preference. please choose another name.\n"; } else { $userlist{lc($username)}{$1} = $2; } } } } } close $infh or die "$0: couldn't close $file: $!\n"; return \%userlist; } sub print_needlogin_error_and_exit { my $target = shift; my $login_url = $PREF{integrate_with_userbase} =~ /yes/i || $PREF{integrate_with_existing_login_system} =~ /yes/i ? $PREF{login_url} : "$PREF{vlog_url_short}?action=login&target=$target"; start_html_output('Authentication Required', 'css', 'js'); print qq`

Authentication Required

` . qq`\n
You must log in first.
` . qq`\n`; finish_html_output(); exit; } sub start_html_output { my $title = shift; my $css = shift; my $js = shift; $css = get_css() if $css; $js = get_js() if $js; print_server_headers(); if($PREF{print_full_html_tags} =~ /yes/i) { print qq`` . qq`\n` . qq`\n` . qq`\n` . qq`\n$title` . qq`\n` . qq`\n` . qq`\n` . qq`\n` . qq`\n`; } else { print qq`$PREF{outer_container}\n`; } print qq`
\n`; print $PREF{title} if $PREF{title} =~ /\S/; } sub finish_html_output { print_powered_by() unless $PREF{hide_poweredby} =~ /yes/i; print qq`
\n`; # end vlogcontainer. if($PREF{print_full_html_tags} =~ /yes/i) { print qq`\n\n`; } else { print qq`$PREF{outer_container_end}\n`; } } sub print_powered_by { print qq`
\n`; print get_powered_by(); print qq`
\n`; } sub get_powered_by { return qq`Powered by Encodable`; } sub get_js { my $js = qq` `; return $js; } sub get_css { my $css = qq` #vlogbody { background: #ddd; font-family: sans-serif; text-align: center; } #vlogcontainer { background: #fff; margin: 20px auto; border: 1px solid #444; } #pb { margin: 20px auto; } /* table.log, #simultvisitors { width: 100%; border: 1px solid #cccccc; } */ table.none { width: 100%; } table.other { border: 1px solid #cccccc; margin: 8px; } .statstable { font-size: 85%; border: 1px solid #555; border-collapse: collapse; margin: 5px auto 15px auto; text-align: left; } .statstable tr.head { background: #ccc; } .statstable tr.odd { background: #e6e6e6; } .statstable tr.even { background: #efefef; } .statstable th { padding: 4px; } .statstable td { padding: 7px; } .statstable td.oc { text-align: right; font-weight: bold; } .statstable td.c { text-align: center; } .statstable td.p { text-align: center; font-weight: bold; } table.log td, table.statstable td { white-space: nowrap; } #week td, #year td, #busy td { font-family: monospace; } td.weekc, td.yearc, td.busyc { text-align: center; } th.dateheading { text-align: left; } `; return $css; } # blog, vlog, ub, sub username_is_taken($$) { my $typedname = shift; my $level = shift; if($level eq 'member') { my $member_hashref = load_userlist('member'); return exists($$member_hashref{lc($typedname)}) ? 1 : 0; } elsif($level eq 'admin') { my $admin_hashref = load_userlist('admin'); return exists($$admin_hashref{lc($typedname)}) ? 1 : 0; } else { die "$0: in username_is_taken($typedname,$level): \$level must be 'member' or 'admin' not $level\n"; } } # blog, vlog, ub, sub membername_is_taken($) { return username_is_taken($_[0],'member'); } # blog, vlog, ub, sub adminname_is_taken($) { return username_is_taken($_[0],'admin'); } # blog, vlog, ub, sub username_is_illegal($) { return (($_[0] =~ /[^0-9A-Za-z\., -]/) || ($_[0] !~ /[0-9A-Za-z]/)); } # blog, vlog, ub, sub get_invalid_name_error_message() { return "Sorry, your name must contain at least one alphanumeric character (0-9A-Za-z), and optionally can contain periods, commas, dashes, and spaces."; } # blog, vlog, ub, sub get_css_filename() { my %cookies = get_cookies(); my $css_file_name = $PREF{'default_css_file_name'}; if(exists $cookies{$PREF{'theme_cookie_name'}}) { $css_file_name = $cookies{$PREF{'theme_cookie_name'}}->value; } $css_file_name .= '.css' unless $css_file_name =~ /\.css$/i; $css_file_name = "$PREF{'path_to_css_files'}$css_file_name"; return $css_file_name; } # blog, vlog, sub datestring8_to_etime($$) { my $d8 = shift; my $when = shift; $when = $when =~ /start/ ? 'start' : 'end'; my ($year,$month,$day) = ($d8 =~ /^(\d{4})(\d{2})(\d{2})$/); my ($sec,$min,$hour); if($when eq 'start') { ($sec,$min,$hour) = (0,0,0); } else { ($sec,$min,$hour) = (59,59,23); } return timelocal($sec,$min,$hour,$day,($month - 1),$year); } # blog, vlog, sub num_elapsed_days_between_datestring8_dates($$) { my $earlier_date = shift; my $later_date = shift; if($earlier_date !~ /^\d{8}$/ || $later_date !~ /^\d{8}$/) { die qq`$0: datestrings must contain exactly 8 digits and nothing else, but you've passed "$earlier_date" and "$later_date"\n`; } return 0 if $earlier_date == $later_date; if($earlier_date > $later_date) { my $temp = $earlier_date; $earlier_date = $later_date; $later_date = $temp; } # now $earlier_date is smaller/earlier than $later_date. my $elapsed_days = 0; while($earlier_date < $later_date) { $later_date = decrement_datestring_8($later_date, 1); $elapsed_days++; } return $elapsed_days; } # blog, vlog, sub increment_datestring_8($$) { use Time::Local; my $start = shift; my $amount = shift; my $i; for($i=1; $i<=$amount; $i++) { my ($year,$month,$day) = ($start =~ /(\d{4})(\d{2})(\d{2})/); my ($newmonth,$newday) = ($month,$day); s/^0// for($newmonth,$newday); # try to inc the day eval { timelocal(0,0,0,$newday+1,$newmonth-1,$year); }; # note: timelocal's month is 0..11 so $month-1 = $month if($@) # means there was an error. { # try to inc the month eval { timelocal(0,0,0,1,$newmonth,$year); }; if($@) { # inc the year eval { timelocal(0,0,0,1,0,$year+1); }; if($@) { die "$0: there's some serious problem if we can't increment the year...\n"; } else { $year++; $start = $year . '0101'; } } else { $newmonth++; $newmonth = "0$newmonth" if ($newmonth =~ /^\d$/); $start = $year . $newmonth . '01'; } } else { $newday++; $newday = "0$newday" if ($newday =~ /^\d$/); $start = $year . $month . $newday; } } return $start; } # blog, vlog, sub decrement_datestring_8($$) { use Time::Local; my $start = shift; my $amount = shift; my $i; for($i=1; $i<=$amount; $i++) { my ($year,$month,$day) = ($start =~ /(\d{4})(\d{2})(\d{2})/); my ($newmonth,$newday) = ($month,$day); s/^0// for($newmonth,$newday); # try to dec the day eval { timelocal(0,0,0,$newday-1,$newmonth-1,$year); }; # note: timelocal's month is 0..11 so $month-1 = $month if($@) # means there was an error. { # try to dec the month eval { timelocal(0,0,0,1,$newmonth-2,$year); }; if($@) { # month is Jan... dec the year eval { timelocal(0,0,0,1,0,$year-1); }; if($@) { die "$0: there's some serious problem if we can't decrement the year...\n"; } else { $year--; $start = $year . '1231'; } } else { # we know the month is valid, so find the highest valid day: $newday = 32; do { $newday--; eval { timelocal(0,0,0,$newday,$newmonth-2,$year); }; } while ($@); $newmonth--; $newmonth = "0$newmonth" if ($newmonth =~ /^\d$/); $start = $year . $newmonth . $newday; } } else { $newday--; $newday = "0$newday" if ($newday =~ /^\d$/); $start = $year . $month . $newday; } } return $start; } sub expand_custom_vars_in_prefs($) { my $hashref = shift; foreach my $key (keys %$hashref) { next unless $$hashref{$key} =~ /\$\$/; $$hashref{$key} =~ s/\$\$server_name\$\$/$ENV{'SERVER_NAME'}/g; $$hashref{$key} =~ s/\$\$httphost_withport\$\$/$ENV{'HTTP_HOST'}/g; $$hashref{$key} =~ s/\$\$name_of_site\$\$/$$hashref{'name_of_site'}/g; } } # pass filename to create and optionally the mode to chmod it to. # the mode must consist of 1-4 octal digits and must NOT be quoted. # see "perldoc -f chmod" and "man chmod". sub create_file_if_DNE { my $file = shift; my $mode = shift; return if -T $file; open(NEW,">$file") or die "$0: couldn't create new file $file: $!\n"; close NEW or die "$0: couldn't close $file after creating it: $!\n"; if($mode) { chmod($mode,$file) or die "$0: couldn't chmod file \"$file\" with mode \"$mode\": $!\n"; } } sub create_dir_if_DNE { my $dir = shift; my $mode = shift; return if -d $dir; mkdir($dir,0777) or die "$0: couldn't create dir $dir: $!\n"; if($mode) { chmod($mode,$dir) or die "$0: couldn't chmod dir \"$dir\" with mode \"$mode\": $!\n"; } } sub enc_untaint { my $item = shift; my $original_item = $item; my $keep_path = shift; #print STDERR "enc_untaint($item)\n"; # Regardless of whether we're keeping the path, dots surrounded by slashes are never allowed. # #$item =~ s!(^|/|\\)\.+(/|\\|$)!$1!g; while($item =~ m!((?:^|/|\\)\.+(?:/|\\|$))!) { $item =~ s!$1!/!; } #print STDERR "removed slashdots: $item\n"; if( $item =~ m!(/|\\)! && !$keep_path) { $item =~ s!^.*[/\\]+([^/\\]+)!$1!; # remove any path from the front. #print STDERR "removed path from front: $item\n"; $item =~ s!^([^/\\]+)[/\\]+!$1!; # ...and the back. } $item =~ s![`\*\?\|<>]!!g; # remove some other potentially-unsafe stuff. $item =~ s![/\\]{2,}!/!g; # condense any multiples. ($item) = ($item =~ /(.*)/); # untaint. # In case anything slips through, die as a security precaution. # die qq`$0: couldn't untaint "$original_item".\n` if $item =~ m![/\\]! && !$keep_path; die qq`$0: couldn't untaint "$original_item".\n` if $item =~ m!(?:^|/|\\)\.+(?:/|\\|$)!; die qq`$0: couldn't untaint "$original_item".\n` if $item =~ m!^\.+$!; die qq`$0: couldn't untaint "$original_item".\n` if $item =~ m!^\s*$!; #print STDERR "untainted: $item\n\n"; return $item; } sub enc_urlencode { s/([^\w()'*~!.-])/sprintf '%%%02x', ord $1/eg for @_; } sub enc_urldecode { # assuming the input really was URL-encoded, then any plus-signs that were originally there # are now in their hex form, so any plus-signs STILL there were converted from spaces by the # browser. so they must be converted back BEFORE restoring any original plus-signs from the # hex codes. convert_plus_signs_back_to_spaces_in_var_from_GET_method(@_); s/%([a-fA-F\d]{2})/chr hex $1/eg for @_; } sub convert_plus_signs_back_to_spaces_in_var_from_GET_method { s/\+/ /g for @_; } sub show_hits_graph { my $days_to_show = 7; my $stop_etime = (($global_current_etime + $PREF{time_offset}) - (60 * 60 * 24 * ($days_to_show+1))); # plus one because otherwise the final day-ago is incomplete. my $image = '/layout/transpixel.gif'; my $height_of_bars = 8; # In pixels my $width_of_bars = 70; # In pixels my ($rank, $max_hits, $previous_day, %hits, $logged_etime, $logged_prettytime) = (); my %month_name = (); $month_name{'01'} = 'Jan'; $month_name{'02'} = 'Feb'; $month_name{'03'} = 'Mar'; $month_name{'04'} = 'Apr'; $month_name{'05'} = 'May'; $month_name{'06'} = 'Jun'; $month_name{'07'} = 'Jul'; $month_name{'08'} = 'Aug'; $month_name{'09'} = 'Sep'; $month_name{'10'} = 'Oct'; $month_name{'11'} = 'Nov'; $month_name{'12'} = 'Dec'; my $date = strftime("%Y%m%d",localtime($global_current_etime + $PREF{time_offset})); my $a_week_ago = decrement_datestring_8($date,7); my $eight_days_ago = decrement_datestring_8($date,8); my $sth = $PREF{dbh}->prepare("SELECT COUNT(*) FROM $PREF{visitor_table_name} WHERE date8>$eight_days_ago ORDER BY serial_id DESC;"); $sth->execute; my $limit = $sth->fetchrow; my $i = 0; while($date > $a_week_ago) { my ($year,$month,$day) = ($date =~ /(\d\d\d\d)(\d\d)(\d\d)/); my $prettydate_noyear = $month_name{"$month"} . $day; my $sth = $PREF{dbh}->prepare("SELECT COUNT(*) FROM $PREF{visitor_table_name} WHERE date8=$date ORDER BY serial_id DESC LIMIT $limit;"); $sth->execute; my $count = $sth->fetchrow; $hits{"$prettydate_noyear"} = $count; $date = decrement_datestring_8($date,1); $i++; } print "Content-type: text/html\n\n"; print qq`
\n`; foreach my $key (sort (keys %hits)) { if($hits{$key} > $max_hits) { $max_hits = $hits{$key}; } } foreach my $key (sort { $b cmp $a } (keys %hits)) { my $percent = sprintf( "%0d", (($hits{$key}/$max_hits)*$width_of_bars) ); my $rest = $width_of_bars - $percent; my $empty = qq`$hits{$key}`; # Workaround for IE problem whereby images with zero width are displayed with width=1: if($percent == $width_of_bars) { $empty = ''; $percent += 2;} my $filled = qq`$hits{$key}`; my ($day) = ($key =~ /(.+)/); print qq`
$day$filled$empty$hits{$key}
\n`; } print qq`
\n`; #print STDERR "dhg-db4.cgi: end-time=" . time(); }