TABLE OF CONTENTS
- 1. WordList/get_wordid
- 2. WordList/match_exists
- 3. WordList/record_word
- 4. WordList/record_wordmatch
- 5. WordList/update_wordlist
WordList/get_wordid [ Functions ]
FUNCTION
Returns an ID for the given word.
INPUTS
word -- the word to lookup
RESULT
An existing ID in the wordlist or a new one.
SOURCE
sub WordList::get_wordid { my $word = shift; my $wordid = 0; my $sth = $dbh->prepare(qq[SELECT wordid FROM wordlist WHERE word=?]) or print STDERR "ERROR: Could not lookup word. Reason: ", $dbh->errstr, "\n"; $sth->execute($word) or print STDERR "ERROR: Could not lookup word. Reason: ", $sth->errstr, "\n"; my $record = $sth->fetchrow_hashref; $wordid = $record->{wordid} || 0; return $wordid; }
WordList/match_exists [ Functions ]
FUNCTION
Checks if a wordmatch is already present in the database.
INPUTS
eventid -- he ID of the sentence in the events table wordid -- the ID of the word
RESULT
True if a match exists, false otherwise.
SOURCE
sub WordList::match_exists { my ($eventid, $wordid) = @_; my $sth = $dbh->prepare(qq[SELECT eventid FROM wordmatch WHERE eventid = ? AND wordid = ? LIMIT 1]) or print STDERR "ERROR: Could not prepare statement: ", $dbh->errstr, "\n"; $sth->execute($eventid, $wordid) or print STDERR "ERROR: Could not execute statement: ", $dbh->errstr, "\n"; return $sth->rows; }
WordList/record_word [ Functions ]
FUNCTION
Records word to the wordlist table.
INPUTS
word -- the word to lookup
RESULT
The ID for newly added word.
SOURCE
sub WordList::record_word { my $word = shift; my $wordid = 0; # check if word already exists in wordlist $wordid = WordList::get_wordid($word); # return wordid if word found if ($wordid) { Debug::print("Word ($word) found in wordlist (wordid: $wordid)"); return $wordid; } my $sth = $dbh->prepare(qq[INSERT INTO wordlist (word) VALUES (?)]) or print STDERR "ERROR: Could not update wordlist table. Reason: ", $dbh->errstr, "\n"; # try to add the word to the wordlist $sth->execute($word) or print STDERR "ERROR: Could not update wordlist table. Reason: ", $sth->errstr, "\n"; # get reference for wordmatch table if ($sth->rows != 0) { $wordid = $dbh->last_insert_id(undef, undef, qw(wordlist wordid)); Debug::print("Word ($word) added to wordlist (wordid: $wordid)"); } $sth->finish; return $wordid; }
WordList/record_wordmatch [ Functions ]
FUNCTION
Adds a reference where a word was found.
INPUTS
eventid -- he ID of the sentence in the events table wordid -- the ID of the word
SOURCE
sub WordList::record_wordmatch { my ($eventid, $wordid) = @_; if (WordList::match_exists($eventid, $wordid)) { return; } my $sth = $dbh->prepare(qq[INSERT INTO wordmatch (eventid, wordid) VALUES (?, ?)]) or print STDERR "ERROR: Could not update wordmatch table. Reason: ", $dbh->errstr, "\n"; $sth->execute($eventid, $wordid) or print STDERR "ERROR: Could not update wordmatch table. Reason: ", $sth->errstr, "\n"; if ($sth->rows != 0) { Debug::print("Wordmatch added (eventid: $eventid wordid: $wordid)"); } }
WordList/update_wordlist [ Functions ]
FUNCTION
Wrapper function to add new words to the wordlist.
INPUTS
eventid -- the ID of the sentence in the events table sentence -- the line in question
SOURCE
sub WordList::update_wordlist { my ($eventid, $sentence) = @_; my $wordid = 0; # filter punctuation $sentence =~ s/[\!\:\;\,\.\?\/\<\>\@\-\%\^\&\$\+\=\*\(\)\{\}\[\]\'\"\`\|\_\~\#\\]/ /g; # split sentence into words my @words = split / /, $sentence; # process each word separetely foreach (@words) { # handle only words that consist of at least 3 characters # and do not exceed the maximum size of the column inside # the database my $len = length; if ( ($len > 2) && ($len <= 40) ) { $wordid = WordList::record_word($_); WordList::record_wordmatch($eventid, $wordid); } } }