#!/usr/bin/perl

use v5.10;
use LWP::Simple;
use Data::Dumper;
use JSON::Any;
use List::MoreUtils qw(none any uniq firstidx indexes);
use Scalar::Util qw(reftype);
use Lingua::EN::Inflect::Phrase qw(to_PL);
use DBI;

use subs qw(newobj dbiobj process_hash_tree foundin notin col2word print_debug);

#my $debug = 1;

my $json = get 'http://maps.googleapis.com/maps/api/directions/json?origin=Adelaide,SA&destination=Adelaide,SA&waypoints=optimize:true|Barossa+Valley,SA|Clare,SA|Connawarra,SA|McLaren+Vale,SA&sensor=false';
my $obj = JSON::Any->jsonToObj($json);
#my $tbl = newobj 'json', $obj;
#say Data::Dumper->new([$tbl], ['*tbl'])->Indent(2)->Dump;
my $dbh = dbiobj 'json', $obj;

my $cmd = 'SELECT steps_id, html_instructions, points, travel_mode FROM json_steps JOIN json_polylines USING (polyline_id)';
my $rows = $dbh->selectall_hashref($cmd, 'steps_id');
say Data::Dumper->new([$rows], ['*rows'])->Indent(2)->Sortkeys(1)->Dump;

sub newobj ($$) {
   my ($table_name, $tree) = @_;
   &process_hash_tree($table_name, $tree, 0);
   ### TODO: Normalize single row tables to other tables? ###
   ### TODO: Rename 'jsons' to 'json' ###

   my $tref = $tables;
   $tables = undef;  # remove only the first reference
   undef $columns;
   undef $ids;

   return $tref;
}

sub dbiobj ($$) {
   my ($table_name, $tree) = @_;
   my $tref = &newobj($table_name, $tree);

   my $dbh = DBI->connect('dbi:AnyData(RaiseError=>1):');
   foreach my $table (keys %$tref) {
      $dbh->func("json_$table", 'ARRAY', [$tref->{$table}{columns}, @{$tref->{$table}{data}}], 'ad_import');
   }

   return $dbh;
}

our $tables, $columns, $ids;

sub process_hash_tree {
   my ($col, $tree, $depth) = @_;
   my ($col_id, $serialized_tree);

   if ($depth > 100) {
      warn "Too deep down the rabbit hole; crawling back...\n";
      return $col => undef;
   }

   print_debug($depth, "$depth - $col => ".(reftype($tree) || substr($tree, 0, 20)));

   state $id_names = ['group', 'matrix', 'cube', 'hypercube'];  # if you go past here, you've gone too far...

   given (reftype $tree) {
      # Common code for both HASHs and ARRAYs
      when (/HASH|ARRAY/) {
         $col    = col2word($col);
         $col_id = $col.'_id';

         # compare serialized trees for the same IDs
         unless ($depth) {  # no point if this is the first node
            $serialized_tree = Data::Dumper->new([$tree], ['*'.$col_id])->
               # (options for consistency, for exact matches)
               Reset->Sortkeys(1)->
               # (options designed to use the smallest possible footprint, as these can get rather large)
               Indent(0)->Quotekeys(0)->Pair('=')->Dump;

            # already exists, makes this easier
            my $id = $ids->{trees}{$serialized_tree};
            if ($id) {
               print_debug($depth+1, "TREE <=== Dumper match ID ".join(' => ', split('|', $id)));
               return split('|', $id);
            }
         }
         continue;
      }
      # HASHs control the main tables, providing column names and data for the rows
      # Table = $col (plural)
      # ID = $col.'_id'
      when ('HASH') {
         # parse out a table name (with plural form)
         my $table_name = to_PL($col);
         $table_name =~ s/ /_/g;

         # now run through the columns and data (with recursive loop goodness)
         my %data = map {
            my $dat = $$tree{$_};
            # clean up the column names
            $_ = col2word($_);
            s/ /_/g;
            &process_hash_tree($_ => $dat, $depth + 1);
         } keys %$tree;
         ### FIXME: don't forget about undef in keys ###

         # check the new column names to see if we've seen this table before
         my @cols = ($col_id, sort keys %data);  # new ID column first
         my $col_key = join('|', @cols);

         if ($columns->{$col_key}) {               # known table
            $table_name = $columns->{$col_key};

            print_debug($depth+1, "HASH ===> Found known table '$table_name'");
         }
         elsif ($tables->{$table_name}) {  # errr, known table, but not with this column structure
            foreach my $j ('', 2 .. 200) {  # loop through a bunch of potential table names
               my $tname = $table_name.$j;
               my $t     = $tables->{$tname};

               if ($t) {
                  my @tcols = @{$t->{columns}};
                  my @ucols = uniq(@cols, @tcols);
                  # have to be the same ID columns  && need to have at least one field in common
                  # (remove keys while we're at it)
                  if (shift(@tcols) eq shift(@cols) && uniq(@cols, @tcols) < (@tcols + @cols)) {
                     # add all items not in the current column list
                     # (since they are on the end, there is no need to shift data around)
                     push(@{$t->{columns}}, notin(@tcols, @cols));

                     print_debug($depth+1, "HASH ===> Found known table with different columns '$table_name'");
                     last;
                  }

                  # wrong table to use; try next name
                  next;
               }
               else {  # just treat this as as new table, then
                  warn "Found a table with a dupe name, but totally different columns; calling it '$tname'...\n" if ($j);
                  $table_name = $tname;
                  $tables->{$table_name} = {};
                  $tables->{$table_name}->{columns} = [@cols];
                  $tables->{$table_name}->{data}    = [];
                  $columns->{$col_key} = $table_name;

                  print_debug($depth+1, "HASH ===> Creating new table '$table_name' because of conflicting columns");
                  last;
               }
            }

            $col_key = join('|', @{$t->{columns}});
         }
         else {                                    # new table
            $tables->{$table_name} = {};
            $tables->{$table_name}->{columns} = [@cols];
            $tables->{$table_name}->{data}    = [];
            $columns->{$col_key} = $table_name;

            print_debug($depth+1, "HASH ===> Creating new table '$table_name'");
         }

         # Add new row
         my $t = $tables->{$table_name};
         my $id = ++($ids->{table}{$table_name});
         $ids->{trees}{$serialized_tree} = $col_id.'|'.$id;
         push(@{$t->{data}}, [ $id, map { $data{$_} } grep { $_ ne $col_id } @{$t->{columns}} ]);

         # Since we're done with this table, send back the col_id and id#
         print_debug($depth+1, "HASH <=== $col_id => $id");
         return $col_id => $id;
      }
      # ARRAYs provide ID grouping tables, capturing the individual rows in a group
      # These are going to be two-column tables with two different IDs
      # Table = $col.'_groups' (plural)
      # ID = $col.(group|matrix|cube|etc.).'_id'
      when ('ARRAY') {
         # Pass the data on down first (ARRAY of ARRAYS to prevent de-duplication of keys)
         my @data = map { $_ = [ &process_hash_tree($col => $_, $depth + 1) ] } @$tree;

         # Okay, we could end up with several different scenarios:

         # A. All items have the same column name (as a ID)
         # B. All items appear to be some form of data
         # C. A mixture of IDs and data (scary!)

         # Process both groups individually (and hope for the best)
         ### TODO: Trouble shoot this in-depth (possible Perl bug): my @id_cols = indexes { $_->[0] =~ /_id$/; say $_->[0]." -----"; } @data;
         my @id_cols   = grep { $data[$_]->[0] =~ /_id$/; } (0 .. (@data - 1));
         my @data_cols = grep { $data[$_]->[0] !~ /_id$/; } (0 .. (@data - 1));
         @id_cols = () unless ($depth);  # skip any group ID tables if this is the very first node

         warn "Inconsistant sets of data within an array near '$col'; going to process it as best as possible...\n" if (@id_cols && @data_cols);

         # Items of IDs
         my (@max_id, @group_id);
         foreach my $i (@id_cols, @data_cols) {
            my ($icol, $item) = @{$data[$i]};

            # Process group ID names
            my $ncol = $icol;
            my $i;
            if ($ncol =~ s/_id$//i) {
               $i = firstidx { $ncol =~ s/(?<=_)$_$//; } @$id_names;  # that's underscore + $_ + EOL
               if (++$i > 3) {   # start whining here
                  $ncol .= '_hypercube_'.$id_names->[$i -= 4];

                  warn "Seriously?!  We're using ridiculous names like '$ncol"."_id' at this point...\n";
               }
               else { $ncol .= '_'.$id_names->[$i]; }
            }

            # Parse out a table name (with plural form)
            my $table_name = ($icol =~ /_id$/) ? col2word($ncol) : col2word($icol.'_group');
            $table_name = to_PL($table_name);  # like blah_groups
            $table_name =~ s/ /_/g;

            if ($icol =~ /_id$/) {
               $ncol .= '_id';
               $max_id[$i] = $ncol;
            }

            # Create new group table (if it doesn't already exist)
            my $t;
            if ($depth) {  # skip any group ID tables if this is the very first node
               unless ($tables->{$table_name}) {
                  ### FIXME: Assuming that table doesn't exist with the same columns ###
                  print_debug($depth+1, "ARRAY ===> Creating new group table '$table_name'");

                  $tables->{$table_name} = {};
                  $tables->{$table_name}->{columns} = [ ($icol =~ /_id$/) ? ($ncol, $icol) : ($icol.'_group_id', $icol.'_id') ];
                  $tables->{$table_name}->{data}  ||= [];
               }
               $t = $tables->{$table_name};
               my $col_key = join('|', @{$t->{columns}});
               $columns->{$col_key} = $table_name;
            }

            # Add new row
            $group_id[$i] ||= ++($ids->{table}{$table_name});  # only increment once (per group type)
            if ($icol =~ /_id$/) {
               push(@{$t->{data}}, [ $group_id[$i], $item ]);
            }
            # If this is a data column, add the data in a different table
            else {
               my $new_tname = col2word($icol);
               $new_tname = to_PL($new_tname);  # like blahs
               $new_tname =~ s/ /_/g;

               # Create new id table (if it doesn't already exist)
               unless ($tables->{$new_tname}) {
                  print_debug($depth+1, "ARRAY ===> Creating new ID table '$new_tname'");

                  $tables->{$new_tname} = {};
                  $tables->{$new_tname}->{columns} = [ $icol.'_id', $icol ];
                  $tables->{$new_tname}->{data}  ||= [];
               }
               my $n = $tables->{$new_tname};
               $n->{columns} = [ $icol.'_id', $icol ];
               $n->{data}    = [] unless ($n->{data});

               my $col_key = join('|', @{$n->{columns}});
               $columns->{$col_key} = $new_tname;
               $max_id[$i] = $icol.'_id';
               ### FIXME: Assuming that table doesn't exist with the same columns ###

               # First, check serial tree with single value
               my $stree = Data::Dumper->new([$item], ['*'.$icol.'_id'])->Reset->Indent(0)->Dump;
               if ($ids->{trees}{$stree}) {
                  # Add new group row (with proper col_id)
                  my $id = (split('|', $ids->{trees}{$stree}))[1];
                  push(@{$t->{data}}, [ $group_id[$i], $id ] ) if ($depth);

                  # (no need to add into main table; already exists)
               }
               else {
                  # Add new group row (with proper col_id)
                  my $id = ++($ids->{table}{$new_tname});
                  push(@{$t->{data}}, [ $group_id[$i], $id ]) if ($depth);

                  # Add new id row
                  $ids->{trees}{$stree} = $icol.'_id|'.$id;
                  push(@{$n->{data}}, [ $id, $item ]);
               }
            }
         }

         # Pass back an ID
         my ($gid_col, $gid) = (pop(@max_id) || $col, pop(@group_id));  # undef @max_id might happen with an empty array

         print_debug($depth+1, "ARRAY <=== $gid_col => $gid");
         $ids->{trees}{$serialized_tree} = $gid_col.'|'.$gid;
         return $gid_col => $gid;
      }
      # An actual scalar; return back the proper column name and data
      when ('' || undef) {
         return $col => $tree;
      }
      # De-reference
      when (/SCALAR|VSTRING/) {
         return $col => $$tree;
      }
      # Warn and de-reference
      when (/Regexp|LVALUE/i) {
         warn "Found a ".(reftype $tree)."; just going to treat this like a SCALAR...\n";
         return $col => $$tree;
      }
      # Warn and de-reference (for further examination)
      when ('REF') {
         warn "Found a REF; going to dive in the rabbit hole...\n";
         return &process_hash_tree($col => $$tree, $depth + 1);
      }
      # Warn and de-reference (for further examination)
      when ('GLOB') {
         foreach my $t qw(Regexp VSTRING IO FORMAT LVALUE GLOB REF CODE HASH ARRAY SCALAR) {  # scalar last, since a ref is still a scalar
            if (defined *$$tree{$t}) {
               warn "Found a GLOB (which turn out to be a $t); going to dive in the rabbit hole...\n";
               return &process_hash_tree($col => *$$tree{$t}, $depth + 1);
            }
         }
         warn "Found a GLOB, but it didn't point to anything...\n";
         return $col => undef;
      }
      # Warn and throw away
      when ('CODE') {
         ### TODO: Warn immediately, eval block with timer to use as output, then continue ###
         ### Definitely need a switch, though ###
         warn "Found a CODE block; not going to even touch this one...\n";
         return $col => undef;
      }
      default {
         warn "Found a ".(reftype $tree)."; WTF is this?  Can't use this at all...\n";
         return $col => undef;
      }
   }

   die "WTF?!  Perl broke my given/when!  Alert the Pumpking!!!";
}

# Find items in @B that are in @A
sub foundin (\@\@) {
   my ($A, $B) = @_;
   return grep { my $i = $_; any { $i eq $_ } @$A; } @$B;
}

# Find items in @B that are not in @A
sub notin (\@\@) {
   my ($A, $B) = @_;
   return grep { my $i = $_; none { $i eq $_ } @$A; } @$B;
}

sub col2word ($) {
   my $word = $_[0];
   $word = lc($word);
   $word =~ s/\W+/ /g;
   $word =~ s/^\s+|\s+(?:id)?$//g;
   return $word;
}

sub print_debug ($$) {
   my ($depth, $msg) = @_;
   return unless ($debug);

   print ("   " x $depth);
   say $msg;
}