#!/usr/bin/perl -w

####################################################################
# pgfsck - a checking and dumping program for PostgreSQL databases #
# Copyright Martijn van Oosterhout <kleptog@svana.org> April 2002  #
#                                                                  #
# This program understands the internal structure of the tables    #
# and attempts to check them. It picks up on many types of errors. #
# It is also a dumping program of last resort. It will read the    #
# table and can output insert statements to reconstruct the table  #
# (or a version of it anyway). It won't reconstruct your schema    #
# though.                                                          #
#                                                                  #
# This program may be distributed under the sames terms as         #
# PostgreSQL itself.                                               #
####################################################################

use strict;
use DiskStruct;
use PgVersion;
use Data::Dumper;
use Getopt::Std;

my %opts;
getopts("D:s:dar:", \%opts);

if( @ARGV == 0 )
{
  print STDERR "$0 [-D /path/to/database] [-s blocksize] [-d] [-a] [-r filename] dbname [tablename...]\n";
  print STDERR <<EOF;
    -a                 While dumping, dump all tuples rather than just xmax == 0 (default: no)
    -d                 Dump the tables rather than just scanning them (default: no)
    -D path            Use alternate database root (default: /var/lib/postgres/data)
    -r filename        When reading table, read this file instead. Nice for testing.
    -s blocksize       Use alternate blocksize (default: 8192)
EOF

  exit 1;
}

# List of options
my $database = shift @ARGV;
my $datapath = $opts{'D'} || "/var/lib/postgres/data/";
my $blocksize = $opts{'s'} || 8192;
my $dumptable = $opts{'d'} || 0;
my $dumpall   = $opts{'a'} || 0;
my $readfile  = $opts{'r'};        # Default is undef

# Verify the path and detect the version of the database
my $version = SetDataPath($datapath) || exit;

printf "-- Detected database format %.1f\n", $version/10;

# Load up the various varibles with stuff needed to read the DB
my $header = GetPageHeader();  # Structure of the header

# Tuple header is thankfully unchanged between versions
my $tupleheader = new DiskStruct( "LLLLLSSSSSC", [ qw( oid cmin cmax xmin xmax tid1 tid2 tid3 natts infomask size ) ] );

# pg_attribute did change between versions
my $pg_attribute = GetPGAttribute();

# pg_class did change, but since we only use the first field, this works to start with
my $pg_class = new DiskStruct( "A32", [qw(relname)] );

my %tableoid = ( "pg_class" => 1259, "pg_database" => 1262 );    # Map table to OID
my %tableattrs = ( "pg_class" => $pg_class, "pg_attribute" => $pg_attribute );  # Map table to structure
my %dboidmap = ( "template1" => 1 );   # Bootstrap database to OID map

# Functions to display various types. Obviously needs expansion
my %typeoutputs = ( bool   => sub { (unpack "C", $_[0])?'t':'f' },
                    char   => sub { $_[0] },
                    name   => sub { unpack "A*", $_[0] },
                    oid    => sub { unpack "L", $_[0] },
                    int2   => sub { unpack "s", $_[0] },
                    int4   => sub { unpack "l", $_[0] },
                    float4 => sub { unpack "f", $_[0] },
                    text   => sub { $_[0] } );
# Maps type OID to type name
my %typeinfo;

my @context;

# Read the type OID map from database
sub GetTypeInfo ($)
{
  my $typetable = shift;

  my $types = TableScan( $typetable, sub {1} );

  %typeinfo = map { ($_->{oid}, $_->{typname}) } @$types;
}

# Read the structure of the table from the database
sub GetAttributeInfo ($)
{
  my $class = shift;

  my $classoid = $tableoid{$class} || die "GetAttributeInfo called without initialised map\n";

  # Get all the relevent attributes and sort them
  my $res = TableScan( "pg_attribute", sub { $_[0]{attrelid} == $classoid and $_[0]{attnum} > 0 } );
  my @attrs = sort { $a->{attnum} <=> $b->{attnum} } @$res;

  my @attinfo;

  # Convert alignment types to number. Portable?
  my %alignmap = ( c  => 1, s   => 2, i   => 4, d   => 8,    # Before sorting out types, chars become numbers
                   99 => 1, 115 => 2, 105 => 4, 100 => 8 );

  # Here we just store the data. The trickery is later
  foreach my $att ( @attrs )
  {
    push @attinfo, [ $att->{attname}, $att->{atttypid}, $att->{attlen}, $alignmap{$att->{attalign}} ];
  }
  $tableattrs{$class} = [@attinfo];
}

# Parse a tuples. It is passed:
# - The tuple data itself. As a string, all the bytes after the header
# - The description of the structure of the tuple
# - Anonymous hash containing the contents of the tuple header, decoded
# - String containing the nulllist.
sub ParseTuple ($$$$)
{
  my($tupledata,$attrinfo,$header,$nulllist) = @_;

  my $i;

  my %data;
  my $off = 0;
  # Expand the nulllist string into a list of ones and zeros
  my @nulls = map { my $a = $_; map { ($a&(2**$_))?1:0 } (0..7) } unpack("c*", $nulllist);

#  print ">>nulllist=",join(",",@nulls),"\n";

#  print ">>",(map { sprintf "%02X ", $_ } unpack("C*", $tupledata)),"\n";

  # Loop through the attributes in the tuple
  for my $i (1..$header->{natts})
  {
    my ($name,$type,$len,$align) = @{ $attrinfo->[$i-1] };

#    print ">> Off $off: Field $i: ($name,$type,$len,$align)\n";
    
    # If it's null, attribute is skipped
    if( defined ($nulls[$i-1]) and not $nulls[$i-1] )
    {
#      print ">>>null($nulls[$i-1])\n";
      $data{$name} = undef;
      next;
    }

    # Deal with the alignment requirements
    $off = ($off + $align - 1) & ~($align - 1);

    # Have we gone over the length of the tuple?
    if( $off > length($tupledata) )
    {
      print ErrorContext( "Decoding tuple runs off end: $off > ".length($tupledata) );
      return \%data;
    }
    # Shift to the right part of the tuple
    my $fielddata = substr($tupledata,$off);

    my $val;
    my $outputfunc;

    # Decide how this type is output
    if( defined $typeinfo{$type} and defined $typeoutputs{$typeinfo{$type}} )
    {
      $outputfunc = $typeoutputs{$typeinfo{$type}};
    }
    elsif( scalar( %typeinfo ) )   # No point complaining unless we have some actual info
    {
      print ErrorContext( "Unknown type $typeinfo{$type} ($type)" );
    }

    # If it's a fixed-length field, we just pass it to the conversion function. Done.
    if( $len > 0 and defined $outputfunc )
    {
      $val = $outputfunc->( substr( $fielddata, 0, $len ) );
    }
    elsif( $len == 1 )     # 1 byte defaults to int1
    {
      $val = unpack("c", $fielddata);
    }
    elsif( $len == 2 )     # 2 bytes defaults to int2
    {
      $val = unpack("s", $fielddata);
    }
    elsif( $len == 4 )     # 4 bytes defaults to int4
    {
      $val = unpack("l", $fielddata);
    }
    elsif( $len == -1 )  # Variable length
    {
      my $va_header = unpack("l", $fielddata);   # Decode the first byte
      if( $va_header & 0x80000000 )  # External tuple?
      {
        # We decode the location, but don't look it up
        my @vals = unpack("llLL", substr($fielddata,4));
        $val = "extern(raw=$vals[0],real=$vals[1],oid=$vals[2],relid=$vals[3])";
        $len = 20;
      }
      elsif( $va_header & 0x40000000 )  # Compressed tuple?
      {
        # Again, we don't decode it. Too hard.
        my $clen = unpack("l", substr($fielddata,4));
        $val = "compressed($clen)";
        $len = ($va_header & 0x3fffffff);
      }
      else  # Ordinary inline value
      {
        $val = substr($fielddata,4,($va_header & 0x3fffffff)-4);
        $len = ($va_header & 0x3fffffff);

        if( defined $outputfunc )
        {
          $val = $outputfunc->( $val );   # Use the output function
        }
      }
    }
    else   # All other fixed lengths
    {
      $val = unpack("A$len", $fielddata);   # Default to NULL termination
    }
    # Store value and move along
    $data{$name} = $val;
    $off += $len;
  }
  # Final check
  if( $off != length($tupledata) )
  {
    print ErrorContext( "Tuple incorrect length (parsed data=$off,length=".length($tupledata).")" );
  }

  return \%data;
}

# Scan an entire table and return a set of tuples. Takes:
# - A name of a table
# - Function reference. When passed the tuple, return true to accept.
sub TableScan ($$;$);
sub TableScan ($$;$)
{
  my ($class,$qualify,$filename) = @_;

  ### Get the OID of the table, looking up pg_class if necessary ###

  my $classoid;
  if( defined $tableoid{$class} )
  { $classoid = $tableoid{$class} }
  else
  {
    my $rels = TableScan( "pg_class", sub { $_[0]{relname} eq $class } );
    if( @$rels == 0 )
    { die "Couldn't find class '$class'\n" }
    if( @$rels  > 1 )
    { die "Found multiple versions of '$class' ???\n" }

    $classoid = $tableoid{$class} = $rels->[0]->{oid};
  }

  PushContext( "Table $class($classoid)" );

  ### Get the attribute structure ###
  if( not defined $tableattrs{$class} )
  {
    GetAttributeInfo( $class );
  }

  my $file;
  if( defined $filename )
  {
    $file = new FileHandle "<$filename" or die "Couldn't open file $filename ($!)\n";
  }
  else
  {
    $file = OpenRelation( $class, $classoid );
  }

  my $buffer;
  my @res;
  # Look through each block
  my $blockid = 0;
  while( read $file, $buffer, $blocksize )
  {
    PushContext( "Page $blockid" );

    # Decode the page header
    my $headerdata = $header->decode( $buffer );
    my $i;

    if( defined $headerdata->{_error} )
    {
      print ErrorContext( $headerdata->{_error} );
      next;
    }

    if( $headerdata->{opaque} != $blocksize or
        $headerdata->{lower} & 3 or
        $headerdata->{lower} > $headerdata->{upper} )
    {
      print ErrorContext("Incorrect value in header (incorrect blocksize?)");
      print "-- ",(map { sprintf "%02X ", $_ } unpack("C*", substr( $buffer, 0, $headerdata->{_sizeof} ) )),"\n";
      next;
    }

    PushContext( "" );

    my $numtuples = ($headerdata->{lower} - $headerdata->{_sizeof}) / 4;

    # Cycle through the item data pointers
    for( $i = 0; $i < $numtuples; $i ++)
    {
      PopContext();
      PushContext( "Tuple $i" );

      my $num = unpack("L", substr( $buffer, $headerdata->{_sizeof}+4*$i, 4));

      next unless (($num >> 15)&3)&1;  # Ignore tuples not marked as used

      my $offset = $num & 0x7fff;
      my $length = $num>>17;

      if( ( $offset < $headerdata->{upper} ) or ($offset >= $headerdata->{special} ) )
      {
        print ErrorContext( "Bad tuple offset. Should be: $headerdata->{upper} <= $offset < $headerdata->{special}" );
        next;
      }

      # Extract the tuple and decode the header of the tuple
      my $tuple = substr( $buffer, $offset, $length );

      if( length($tuple) < $length )
      {
        print ErrorContext( "Tuple exceeds block (offset=$offset,length=$length)" );
      }

      if( length($tuple) < 32 )
      {
        print ErrorContext( "Tuple not even 32 bytes long ($length)" );
        next;
      }
      my $h = $tupleheader->decode( $tuple );

      if( defined $h->{_error} )
      {
        print ErrorContext( "TupleHeader: ".$h->{_error} );
        next;
      }

      $h->{page} = $blockid;
      $h->{tuple} = $i;

      my $tup;

      if( ref( $tableattrs{$class} ) eq "DiskStruct" )  # Bootstrap structure?
      {
        $tup = $tableattrs{$class}->decode( substr( $tuple, $h->{size} ) );
      }
      else   # Proper attribute info, do proper decoding
      {
        $tup = ParseTuple( substr( $tuple, $h->{size} ),        # Tuple data
                           $tableattrs{$class},                 # Attribute info
                           $h,                                  # Header info 
                           ($h->{infomask}&1)?substr( $tuple, $h->{_sizeof}, $h->{size}-$h->{_sizeof} ):"" ); # Null structure
      }

      # Generate tuple, test for match and remember if accepted
      my %res = (%$h,%$tup);

      next unless $qualify->(\%res);

      push @res, \%res;
    }
    PopContext();

    $blockid++;
  } continue { PopContext() }

  close $file;

  PopContext();

  return \@res;
}

# Get list of names and oids of databases
sub GetDatabaseList ()
{
  my $dblist = TableScan( "pg_database", sub {1} );

  %dboidmap = map { ( $_->{datname}, $_->{oid} ) } @$dblist;
}

sub PushContext ($)
{
  push @context, shift;
}

sub PopContext ()
{
  pop @context;
}

sub ErrorContext ($)
{
  return "-- ".join(":",@context).": ".shift()."\n";
}

# template1 first, since it's the only one with know OID
SetDatabase("template1",1) || die;

# Update with versions from disk. Should make it less sensetive to version changes
GetAttributeInfo( "pg_class" );
GetAttributeInfo( "pg_attribute" );

# Read pg_database to get list
GetDatabaseList();

if( not defined $dboidmap{$database} )
{
  die "Unknown database '$database'\n";
}

# Switch database
SetDatabase( $database, $dboidmap{$database});

# Read the types. Chicken and egg. Read before or after switch DB?
GetTypeInfo( "pg_type" );   # Load type information from dosl

# Process the tables
my @tables;

if( @ARGV )
{
  @tables = @ARGV;
}
else
{
  my $tables = TableScan( "pg_class", sub { $_[0]->{relname} !~ /^pg_/ and $_[0]->{relkind} eq "r" } );

  @tables = map { $_->{relname} } @$tables;
}

if( defined $readfile and @tables != 1 )
{
  die "The -r switch can only be used on a single table\n";
}

foreach my $table (@tables)
{
  my $res;

  if( not $dumptable )
  {
     $res = TableScan( $table, sub { 0 }, $readfile );
  }
  elsif( $dumptable and not $dumpall ) 
  {
     $res = TableScan( $table, sub { $_[0]->{xmax} == 0 }, $readfile );
  }
  else  # Dump everything
  {
     $res = TableScan( $table, sub { 1 }, $readfile );
  }

  my @attrs = map { $_->[0] } @{ $tableattrs{$table} };

  foreach my $row (@$res)
  {
    print "insert into $table (",join(",",@attrs),") values (",
             join(",", map { defined($row->{$_})?"'$row->{$_}'":'null' } @attrs),
          "); -- page=$row->{page},tuple=$row->{tuple},oid=$row->{oid},xmin=$row->{xmin},xmax=$row->{xmax},cmin=$row->{cmin},cmax=$row->{cmax}\n";
  }
}
