Friday, January 27, 2006

Perl Bigram count

#program: bigramcount - counts the number of bigrams in a text,
# prints them out in order of decreasing frequency

while(<>) {
chop;
tr/A-Z/a-z/;
tr/.,:;!?"(){}//d;
foreach $word1 (split) {
$bigram = "$word2 $word1";
$word2 = $word1;
$count{$bigram}++;
}
}

foreach $bigram (sort numerically keys %count) {
print "$count{$bigram} $bigram\n";
}

sub numerically { # compare two words numerically
$count{$b} <=> $count{$a}; # decreasing order
# $count{$b} <=> $count{$a}; # increasing order
}

No comments: