Data Generator

Save the following perl script as mk_examples.pl then use as follows:

perl mk_examples.pl > example_data.txt
psql testdb -f example_data.txt
  

You can control the amount of data produced by altering the num_xxx variables at the top of the script.

#!/usr/bin/perl -w
use strict;

# Generate the following numbers of records for the example database
#
my $num_companies = 100;
my $num_products = 10;
my $num_orders = 1000;
my $num_diary = 1000;

# The following elements are used to build example records
#
my @co_name1 = qw(Acme British Continental Imperial Mammon United Universal Worldwide);
my @co_name2 = qw(Associates Engineering Industries Investments Partnership Research Services);
my @co_name3 = qw(Corp Inc Ltd PLC);
my @pr_name1 = qw(Small Medium Large Deluxe);
my @pr_name2 = qw(Red Blue Green Black White);
my @pr_name3 = qw(Widget Gizmo Thingummy Boggle);
my @dy_notes = ('Blah blah blah blah blah', 'Rhubarb rhubarb rhubard', 'Yak yak yak', 'Whatever');
my $from_time= time() - 60*60*24*365;
my $to_time  = time();

# The following keep track of the companies and products.
#
my (%co_hash, %pr_hash);
my (@co_list, @pr_list);
my $i;

# Seed the random-number generator
#
srand(123456);

# Build some companies
#
print "-- Companies\n--\n";
for ($i=1; $i<=$num_companies; $i++) {
    my $new_co_name = mk_name(\%co_hash, \@co_name1, \@co_name2, \@co_name3);
    print "INSERT INTO companies VALUES ($i, '$new_co_name', 'unknown', now());\n";
    $co_hash{$new_co_name} = $i;
    push @co_list, $i;
}
print "select setval('companies_co_id_seq',$i);\n\n";

# Now build products
#
print "-- Products\n--\n";
for ($i=1; $i<=$num_products; $i++) {
    my $new_pr_name = mk_name(\%pr_hash, \@pr_name1, \@pr_name2, \@pr_name3);
    my $new_pr_code = mk_code($new_pr_name);
    print "INSERT INTO products VALUES ('$new_pr_code', '$new_pr_name');\n";
    $pr_hash{$new_pr_name} = $new_pr_code;
    push @pr_list, $new_pr_code;
}
print "\n";

# Create orders for products
#
print "-- Orders\n--\n";
for ($i=1; $i<=$num_orders; $i++) {
    my $ord_co = $co_list[ rand($num_companies) ];
    my $ord_pr = $pr_list[ rand($num_products) ];
    my $ord_qty= int( rand(100)+1 );
    my $base_time = int( rand($to_time-$from_time) + $from_time );
    my $ord_placed= fmt_date($base_time);
    my $ord_delivered= ($i%2==0)?fmt_date($base_time+60*60*24*7):'null';
    my $ord_paid= ($i%4==0)?fmt_date($base_time+60*60*24*14):'null';
    print "INSERT INTO orders VALUES ($i,$ord_co,'$ord_pr',$ord_qty,$ord_placed,$ord_delivered,$ord_paid);\n";
}
print "select setval('orders_ord_id_seq',$i);\n\n";

# Add some diary entries
my $num_dy_notes = scalar @dy_notes;
print "-- Diary\n--\n";
for ($i=1; $i<=$num_diary; $i++) {
    my $dy_co = $co_list[ rand($num_companies) ];
    my $dy_type = 'NOTE';
    my $dy_time = fmt_date( int( rand($to_time-$from_time) + $from_time ) );
    my $dy_note = $dy_notes[ rand($num_dy_notes) ];
    print "INSERT INTO diary VALUES ($i,$dy_co,$dy_time,'$dy_type','$dy_note');\n";
}
print "select setval('diary_dy_id_seq',$i);\n\n";
exit;

# mk_name(KEYHASH, NAME1, NAME2, NAME3)
#
sub mk_name {
    my ($keyhash, $names1, $names2, $names3) = @_;
    my $nlen1 = scalar @$names1;
    my $nlen2 = scalar @$names2;
    my $nlen3 = scalar @$names3;
    my $name;
    while (!$name) {
        my $name_cand = $names1->[ rand($nlen1) ] . ' '.
        $names2->[ rand($nlen2) ] . ' '.
        $names3->[ rand($nlen3) ];
        if (!$keyhash->{$name_cand}) { $name = $name_cand; }
    }
return $name;
}

# mk_code(NAME)
#
sub mk_code {
    my $name = shift;
    $name =˜ /(\w)\w+ (\w)\w+ (\w{3})\w+/;
    my $code = uc($1.$2.$3);
    return $code;
}

# fmt_date(EPOCH)
#
sub fmt_date {
    my $tm = shift;
    my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($tm);
    $year+=1900;
    $mon++;
    return "'$year-$mon-$mday'";
}