TABLE OF CONTENTS


WordList/get_wordid [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Returns an ID for the given word.

INPUTS

   word -- the word to lookup

RESULT

   An existing ID in the wordlist or a new one.

SOURCE

sub WordList::get_wordid {
  my $word = shift;
  my $wordid = 0;

  my $sth = $dbh->prepare(qq[SELECT wordid
                             FROM wordlist
                             WHERE word=?])
    or print STDERR "ERROR: Could not lookup word. Reason: ",
      $dbh->errstr, "\n";

  $sth->execute($word)
    or print STDERR "ERROR: Could not lookup word. Reason: ",
      $sth->errstr, "\n";

  my $record = $sth->fetchrow_hashref;
  $wordid = $record->{wordid} || 0;

  return $wordid;
}

WordList/match_exists [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Checks if a wordmatch is already present in the database.

INPUTS

   eventid -- he ID of the sentence in the events table
   wordid -- the ID of the word

RESULT

   True if a match exists, false otherwise.

SOURCE

sub WordList::match_exists {
  my ($eventid, $wordid) = @_;

  my $sth = $dbh->prepare(qq[SELECT eventid
                             FROM wordmatch
                             WHERE eventid = ?
                             AND wordid = ?
                             LIMIT 1])
    or print STDERR "ERROR: Could not prepare statement: ",
      $dbh->errstr, "\n";

  $sth->execute($eventid, $wordid)
    or print STDERR "ERROR: Could not execute statement: ",
      $dbh->errstr, "\n";

  return $sth->rows;
}

WordList/record_word [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Records word to the wordlist table.

INPUTS

   word -- the word to lookup

RESULT

   The ID for newly added word.

SOURCE

sub WordList::record_word {
  my $word = shift;
  my $wordid = 0;

  # check if word already exists in wordlist
  $wordid = WordList::get_wordid($word);

  # return wordid if word found
  if ($wordid) {
    Debug::print("Word ($word) found in wordlist (wordid: $wordid)");
    return $wordid;
  }

  my $sth = $dbh->prepare(qq[INSERT INTO wordlist (word)
                             VALUES (?)])
    or print STDERR "ERROR: Could not update wordlist table. Reason: ",
      $dbh->errstr, "\n";

  # try to add the word to the wordlist
  $sth->execute($word)
    or print STDERR "ERROR: Could not update wordlist table. Reason: ",
      $sth->errstr, "\n";

  # get reference for wordmatch table
  if ($sth->rows != 0) {
    $wordid = $dbh->last_insert_id(undef, undef, qw(wordlist wordid));
    Debug::print("Word ($word) added to wordlist (wordid: $wordid)");
  }

  $sth->finish;

  return $wordid;
}

WordList/record_wordmatch [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Adds a reference where a word was found.

INPUTS

   eventid -- he ID of the sentence in the events table
   wordid -- the ID of the word

SOURCE

sub WordList::record_wordmatch {
  my ($eventid, $wordid) = @_;

  if (WordList::match_exists($eventid, $wordid)) {
    return;
  }

  my $sth = $dbh->prepare(qq[INSERT INTO wordmatch
                             (eventid, wordid)
                             VALUES (?, ?)])
    or print STDERR "ERROR: Could not update wordmatch table. Reason: ",
      $dbh->errstr, "\n";

  $sth->execute($eventid, $wordid)
    or print STDERR "ERROR: Could not update wordmatch table. Reason: ",
      $sth->errstr, "\n";

  if ($sth->rows != 0) {
    Debug::print("Wordmatch added (eventid: $eventid wordid: $wordid)");
  }
}

WordList/update_wordlist [ Functions ]

[ Top ] [ Functions ]

FUNCTION

   Wrapper function to add new words to the wordlist.

INPUTS

   eventid -- the ID of the sentence in the events table
   sentence -- the line in question 

SOURCE

sub WordList::update_wordlist {
  my ($eventid, $sentence) = @_;
  my $wordid = 0;

  # filter punctuation
  $sentence =~ s/[\!\:\;\,\.\?\/\<\>\@\-\%\^\&\$\+\=\*\(\)\{\}\[\]\'\"\`\|\_\~\#\\]/ /g;

  # split sentence into words
  my @words = split / /, $sentence;

  # process each word separetely
  foreach (@words) {

    # handle only words that consist of at least 3 characters
    # and do not exceed the maximum size of the column inside
    # the database
    my $len = length;
    if ( ($len > 2) && ($len <= 40) ) {

      $wordid = WordList::record_word($_);
      WordList::record_wordmatch($eventid, $wordid);
    }
  }
}