#!/usr/bin/perl

#
# My2Pg: MySQL to PostgreSQL dump conversion utility
# (c) Valentine Danilchuk	<valdan@ziet.zhitomir.ua>
# (c) Maxim Rudensky		<fonin@ziet.zhitomir.ua>
#
# $Id: my2pg.pl,v 1.1 2000/11/15 16:47:34 fonin Exp $

# Warning: This version is extremely bugsome. Use at your own risk!

# TODO:
# + Handle UNIQUE fields (with this version, they need manual correction)
# - Handle ENUMs commonly
# - Handle SETs
# - Handle tables w/o PRIMARY KEY (with this version, they need manual correction)
# + Handle UNSIGNED correctly
# + Use double quotes with all identifiers to avoid problems with reserved words
# + It is better to create the indices _after_ the data is inserted (pg_dump fixes it)

$|=1;

print("-- My2Pg 1.0 translated dump\n\n");

print("\nBEGIN;\n\n\n");

my %index;
my $j=-1;
my @check;

while (<>) {

# Comments start with -- in SQL
    s/#/--/;
# Convert integers
    s/tinyint\(\d+\)/INT2/i;
    s/smallint\(\d+\)/INT2/i;
    s/mediumint\(\d+\)/INT4/i;
    s/bigint\(\d+\)/INT8/i;
    s/int\(\d+\)/INT4/i;
    s/float(\(\d+,\d*\))/DECIMAL$1/i;
# Change all AUTO_INCREMENT fields to SERIAL ones with a pre-defined sequence
    s/int.*?AUTO_INCREMENT/SERIAL/i;

# Fix timestamps
    s/0000-00-00/0000-01-01/;

#<Hackzone> ---------------------------------------------------

# convert UNSIGNED to CHECK constraints
    if(/^\s+?([\w\d_]+).*?unsigned/i) {
	$check.=",\n  CHECK ($1>=0)";
    }
    s/unsigned//i;

# Limited ENUM support - little heuristic
    s/enum\('N','Y'\)/BOOL/i;
    s/enum\('Y','N'\)/BOOL/i;
# ENUM support
    if(/^\s+?([\w\d_]+).*?enum\((.*?)\)/i) {
	my $enumlist=$2;
	my @item;
	$item[0]='';
	while($enumlist=~s/'([\d\w_]+)'//i) {
	    $item[++$#item]=$1;
	}
	$typename='enum_'.$table_name.'_'.$item[1];
# creating input type function
	$func_in="
	DECLARE
	    invalue ALIAS for \$1;
	BEGIN";
	for(my $i=0;$i<=$#item;$i++) {
	    $func_in.="\n\t\tIF invalue=''$item[$i]'' OR invalue=''$i'' THEN RETURN $i; END IF;";
	}
	$func_in.="\n\t\tRAISE EXCEPTION ''incorrect input value: %'',invalue;
	END;";
	$types.="\nCREATE FUNCTION $typename"."_in (opaque)
	RETURNS $typename
	AS '$func_in'
	LANGUAGE 'plpgsql'
	WITH (ISCACHABLE);";

# creating output function
	$func_out="
	DECLARE
	    outvalue ALIAS for \$1;
	BEGIN";
	for(my $i=0;$i<=$#item;$i++) {
	    $func_out.="\n\t\tIF outvalue=$i THEN RETURN ''$item[$i]''; END IF;";
	}
	$func_out.="\n\t\tRAISE EXCEPTION ''incorrect output value: %'',outvalue;
	END;";

	$types.="\nCREATE FUNCTION $typename"."_out (opaque)
	RETURNS opaque
	AS '$func_out'
	LANGUAGE 'plpgsql'
	WITH (ISCACHABLE);";
	$types.="\nCREATE TYPE $typename (
	internallength = 2,
	input = $typename\_in,
	output = $typename\_out,
	PASSEDBYVALUE
);";
	s/enum\(.*?\)/$typename/i;
    }

# Change multy-field keys to multi-field indices
# MySQL Dump usually ends the CREATE TABLE statement like this:
# CREATE TABLE bids (
#   ...
#   PRIMARY KEY (bids_id),
#   KEY offer_id (offer_id,user_id,the_time),
#   KEY bid_value (bid_value)
# );
# We want to replace this with smth like
# CREATE TABLE bids (
#   ...
#   PRIMARY KEY (bids_id),
# );
#   CREATE INDEX offer_id ON bids (offer_id,user_id,the_time);
#   CREATE INDEX bid_value ON bids (bid_value);
    if (/CREATE TABLE (.*) /i) {
	if($oldtable ne $table_name) {
	    $oldtable=$table_name;
	    $j=-1;
	    $check='';
	}
	$table_name=$1;
    }
    s/^\);/\);\n-- Possible bugs here!!!/;

# output CHECK constraints instead UNSIGNED modifiers
    s/(PRIMARY KEY \(.*\)).*/$1$check\n/i;
    
    if(/^\s*KEY (.+) (\(.*\)).*/i) {
	$index{$table_name}[++$j]="CREATE INDEX $1_$table_name\_index ON $table_name $2;";
    }
    if(/^\s*UNIQUE (.+) (\(.*\)).*/i) {
	$index{$table_name}[++$j]="CREATE UNIQUE INDEX $1_$table_name\_index ON $table_name $2;";
    }
    s/^\s*UNIQUE (.+) (\(.*\)).*\n//i;
    s/^\s*KEY (.+) (\(.*\)).*\n//i;

# Quote lowercase identifiers in double quotes
    while(!/^--/ && s/\s([a-z_\d]+)\s/ "$+" /) {;}


#</Hackzone> --------------------------------------------------
    print();
}

# Output indices for tables
while(my($table,$ind)=each(%index)) {
    print "\n\n--";
    print "\n-- Indexes for table $table";
    print "\n--\n";
    for(my $i=0;$i<=$#{$ind};$i++) {
	print "\n$ind->[$i]";
    }
}

print("\n\nEND;\n");

### debug
print "\n".$types;
