TABLE OF CONTENTS


URL/check_for_response [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Checks if URL is fetchable. 

INPUTS

   url -- URL to fetch.

RESULT

   The content type, e.g. "image/jpeg".

NOTES

   Name conflicts with actual source code.
   Does not follow redirects at the moment.

SOURCE

sub URL::check_for_response {
  my $url = shift;
  my $ct;

  my $ua = LWP::UserAgent->new( agent => "irc-collective/0.1.4" );
  my $response = $ua->head($url);

  Debug::print("Status code: " . $response->code());
  Debug::print("Status line: " . $response->status_line());

  if ($response->is_error()) {
    Debug::print("Request is errorneus, abort.");
  }
  elsif ($response->is_success()) {
    Debug::print("Request is successful, continue.");
  }
  elsif ($response->is_redirect()) {
    Debug::print("Got redirect, follow?");
  }
  elsif ($response->is_info()) {
    Debug::print("What to do with informational response?");
  }

  $ct = $response->content_type;
  Debug::print("Content-type: " . $ct);

  return $ct;
}

URL/extract_url [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Filters an URL from the line. 

INPUTS

   content -- the line, possible containing one or more URLs. 

RESULT

   The filtered URL if found, undef otherwise.

NOTES

   Only supports ftp and http at the moment.
   Does not scan for multiple occurances, only first occurance is returned.

SOURCE

sub URL::extract_url {
  my $content = shift;
  my $url;

  if ($content =~ m/.*((ftp|http):\/\/\S+).*/) {
    $url = $1;
  }

  return $url;
}

URL/get_urlid [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Returns ID for new or existing URL. 

INPUTS

   dbh -- database handle
   date -- date in YYYYMMDD format, e.g. "20070813"
   line_no -- line number in logfile, e.g. "210"

RESULT

   ID of URL stored in database.

SOURCE

sub URL::get_urlid {
  my $dbh = shift;
  my ($date, $line_no) = @_;
  my $urlid = 0;

  # see if we already have an existing urlid
  my $sth = $dbh->prepare(
    qq[SELECT urlid
       FROM urls
       WHERE from_date=?
       AND from_lineno=?
       LIMIT 1]
  );

  $sth->execute($date, $line_no)
    or print STDERR "ERROR: Could not perform query to get urlid. ",
      "Reason: ", $dbh->errstr, "\n";

  # get/set urlid
  if ($sth->rows != 0) {
    # use existing hash
    my $record = $sth->fetchrow_hashref;
    $urlid = $record->{urlid};
  }
  else {
    # create a new hash
    my $md5 = Digest::MD5->new;
    $md5->add($date, $line_no);
    $urlid = $md5->hexdigest;
  }

  $sth->finish;

  return $urlid;
}

URL/handle_url [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Wrapper function to record URLs.

INPUTS

   args -- dbh, year, month, day, line_no, time, nick and url

SOURCE

sub URL::handle_url{
  my @args = @_;

  if (URL::record_url(@args) > 0) {
    Debug::print("Added new url.");
  }
}

URL/has_imagedata [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Checks content type for image tag 

INPUTS

   ct -- content type, e.g. "image/jpeg"

RESULT

   True if image, false otherwise.

SOURCE

sub URL::has_imagedata {
  my $ct = shift;
  my $image_tag = 0;

  if ($ct =~ m/.*image.*/) {
    $image_tag = 1;
  }

  return $image_tag;
}

URL/process_url [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Wrapper function to handle images and URLs. 

INPUTS

   imagdir -- directory where local copy of images are stored
   dbh -- database handle
   year -- year in YYYY format, e.g. "2007"
   month -- month in MM format, e.g. "08"
   day -- day in DD format, e.g. "13"
   line_no -- line number in logfile, e.g. "210"
   time -- time in HH:MM[:SS] format, e.g. "14:48"
   nick -- nickname, e.g. "rjm"
   url -- url to record, e.g. "http://www.irc-collective.org/"
   ct -- content type, e.g. "image/jpeg"

SOURCE

sub URL::process_url {
  my $imagedir = shift;
  my $dbh = shift;

  my $year = shift;
  my $month = shift;
  my $day = shift;

  my $line_no = shift;
  my $time = shift;
  my $nick = shift;

  my $url = shift;
  my $ct = shift;

  # handle image or regular url
  if (URL::has_imagedata($ct)) {
    Image::handle_image(
      $imagedir,
      $dbh,
      $year, $month, $day,
      $line_no, $time, $nick,
      $url, $ct
    );
  }
  else {
    URL::handle_url(
      $dbh,
      $year, $month, $day,
      $line_no, $time, $nick,
      $url
    ); # should add $ct to urls as well
  }

  # todo: return state
}

URL/record_url [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Records URL to database.

INPUTS

   dbh -- database handle
   year -- year in YYYY format, e.g. "2007"
   month -- month in MM format, e.g. "08"
   day -- day in DD format, e.g. "13"
   line_no -- line number in logfile, e.g. "210"
   time -- time in HH:MM[:SS] format, e.g. "14:48"
   nick -- nickname, e.g. "rjm"
   url -- url to record, e.g. "http://www.irc-collective.org/"

RESULT

   True on success, false otherwise.

SOURCE

sub URL::record_url {
  my $dbh = shift;
  my ($year, $month, $day, $line_no, $time, $nick, $url) = @_;
  my $date = "$year$month$day"; # requirement lz

  # prevent storing url multiple times for same logfile
  # usefull when invoking parselog every 10 minutes e.g.
  my $urlid = URL::get_urlid($dbh, $date, $line_no);

  my $protocol = URI->new($url)->scheme();

  my $sth = $dbh->prepare(
    qq[INSERT INTO urls
       (urlid, url, from_date, from_lineno, from_time, from_nick, protocol)
       VALUES (?, ?, ?, ?, ?, ?, ?)]);

  $sth->execute(
    $urlid, $url, $date, $line_no, $time, $nick, $protocol
  ) or print "ERROR: Could not add URL to database: ", $dbh->errstr, "\n";

  $sth->finish;

  return $sth->rows;
}