#program: bigramcount - counts the number of bigrams in a text,
# prints them out in order of decreasing frequency
while(<>) {
chop;
tr/A-Z/a-z/;
tr/.,:;!?"(){}//d;
foreach $word1 (split) {
$bigram = "$word2 $word1";
$word2 = $word1;
$count{$bigram}++;
}
}
foreach $bigram (sort numerically keys %count) {
print "$count{$bigram} $bigram\n";
}
sub numerically { # compare two words numerically
$count{$b} <=> $count{$a}; # decreasing order
# $count{$b} <=> $count{$a}; # increasing order
}
Friday, January 27, 2006
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment