################################################################################
# Data::Random
#
# A module used to generate random data.
#
# Author: Adekunle Olonoh
# Date: October 2000
################################################################################
package Data::Random::WordList;
################################################################################
# - Modules and Libraries
################################################################################
#require 5.005_62;
use FileHandle;
use File::Basename qw(dirname);
################################################################################
# - Global Constants and Variables
################################################################################
$Data::Random::WordList::VERSION = '0.05';
################################################################################
# - Subroutines
################################################################################
################################################################################
# new()
################################################################################
sub new {
my $proto = shift;
my %options = @_;
# Check if what was passed in was a prototype reference or a class name
my $class = ref($proto) || $proto;
$options{'wordlist'} ||= dirname($INC{'Data/Random.pm'}).'/Random/dict';
# Create a new filehandle object
my $fh = new FileHandle $options{'wordlist'}
or die "could not open $options{'wordlist'} : $!";
# Calculate the number of lines in the file
my $size = 0;
while (<$fh>) {
$size++;
}
# Create the object
my $self = bless {
'fh' => $fh,
'size' => $size,
}, $class;
return $self;
}
################################################################################
# close()
################################################################################
sub close {
my $self = shift;
# Close the filehandle
$self->{'fh'}->close;
}
################################################################################
# get_words()
################################################################################
sub get_words {
my $self = shift;
my $num = shift || 1;
my $fh = $self->{'fh'};
# Perform some error checking
die 'the size value must be a positive integer'
if $num < 0 || $num != int($num);
die
"$num words were requested but only $self->{'size'} words exist in the wordlist"
if $num > $self->{'size'};
# Pick which lines we want
my %rand_lines = ();
for ( my $i = 0 ; $i < $num ; $i++ ) {
my $rand_line;
do {
$rand_line = int( rand( $self->{'size'} ) );
} while ( exists( $rand_lines{$rand_line} ) );
$rand_lines{$rand_line} = 1;
}
my $line = 0;
my @rand_words = ();
# Seek to the beginning of the filehandle
$fh->seek( 0, 0 ) or die "could not seek to position 0 in wordlist: $!";
# Now get the lines
while (<$fh>) {
chomp;
push ( @rand_words, $_ ) if $rand_lines{$line};
$line++;
}
# Return an array or an array reference, depending on the context in which the sub was called
if ( wantarray() ) {
return @rand_words;
}
else {
return \@rand_words;
}
}
1;
=head1 NAME
Data::Random::WordList - Perl module to get random words from a word list
=head1 SYNOPSIS
use Data::Random::WordList;
my $wl = new Data::Random::WordList( wordlist => '/usr/dict/words' );
my @rand_words = $wl->get_words(10);
$wl->close();
=head1 DESCRIPTION
Data::Random::WordList is a module that manages a file containing a list of words.
The module expects each line of the word list file to contain only one word. It could thus be easily used to select random lines from a file, but for coherency's sake, I'll keep referring to each line as a word.
The module uses a persistent filehandle so that there isn't a lot of overhead every time you want to fetch a list of random words. However, it's much more efficient to grab multiple words at a time than it is to fetch one word at a time multiple times.
The module also refrains from reading the whole file into memory, so it can be safer to use with larger files.
=head1 METHODS
=head2 new()
Returns a reference to a new Data::Random::WordList object. Use the "wordlist" param to initialize the object:
=over 4
=item *
wordlist - the path to the wordlist file. If a path isn't supplied, the wordlist distributed with this module is used.
=back 4
=head2 get_words([NUM])
NUM contains the number of words you want from the wordlist. NUM defaults to 1 if it's not specified. get_words() dies if NUM is greater than the number of words in the wordlist. This function returns an array or an array reference depending on the context in which it's called.
=head2 close()
Closes the filehandle associated with the word list. It's good practice to do this every time you're done with the word list.
=head1 VERSION
0.05
=head1 AUTHOR
Adekunle Olonoh, koolade@users.sourceforge.net
=head1 COPYRIGHT
Copyright (c) 2000 Adekunle Olonoh. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
=head1 SEE ALSO
Data::Random
=cut
syntax highlighted by Code2HTML, v. 0.9.1