#!/usr/bin/perl -w

# dbicsv - run an SQL query, return CSV data

use strict;
use warnings;
use Data::Dumper;

use DBI;
use Text::CSV;
use Getopt::Long;
use File::Basename;
use File::Slurp;

my $binmode = ':utf8';  # ':raw'

binmode STDIN, $binmode;
binmode STDOUT, $binmode;

sub usage {
    my $prog = basename($0);
    return "Usage: $prog [options] sql-query [param ...]" . <<'End';

Runs an SQL query, returns CSV data.

  option     description            default

  -dbms ?    DBMS, determines DSN   $DB_DBMS || InterBase
  -dsn ?     DBI data source name   $DB_DSN || (from DBMS)
  -db ?      database name          $DB_NAME
  -user ?    database user          $DB_USER || $ISC_USER || "sysdba"
  -pass ?    database password      $DB_PASS || $ISC_PASSWORD || "sysdba"
  -host ?    database hostname      $DB_HOST
  -port ?    database port          $DB_PORT
  -sql ?     sql query file         expects SQL query on command line

  -in        input data from stdin  $DB_IN
  -ins       insert data            $DB_INS
  -head ?    column head = -|O|I|IO $DB_HEAD || IO
  -rollback  rollback, not commit   $DB_ROLLBACK
  -dates ?   date format            $DB_DATES || 'ISO' (%Y-%m-%d)
  -times ?   time format            $DB_TIMES || 'ISO' (%H:%M:%S.%f)
  -stamps ?  datetime format        $DB_STAMPS || 'ISO' (%Y-%m-%d %H:%M:%S.%f)
  -quoty ?   quote all non-NULL     1

  -debug   show more info         $DB_DEBUG
  -help    show this message
End
}

our $dbms_dsn_map = {
    interbase  => 'DBI:InterBase:dbname=$db;host=$host;port=$port;ib_dialect=3',
    firebird   => 'DBI:Firebird:dbname=$db;host=$host;port=$port;ib_dialect=3',
    mysql      => 'DBI:mysql:database=$db;host=$host;port=$port',
    postgresql => 'DBI:Pg:dbname=$db;host=$host;port=$port',
    sybase     => 'DBI:Sybase:database=$db;host=$host;port=$port',
};

our $dbms  = $ENV{DB_DBMS}  || "InterBase";
our $dsn   = $ENV{DB_DSN};
our $db    = $ENV{DB_NAME}  || "";
our $user  = $ENV{DB_USER}  || $ENV{ISC_USER} || "sysdba";
our $pass  = $ENV{DB_PASS}  || $ENV{ISC_PASSWORD} || "sysdba";
our $host  = $ENV{DB_HOST}  || "";
our $port  = $ENV{DB_PORT}  || "";
our $in    = $ENV{DB_IN}    || 0;
our $ins   = $ENV{DB_INS}   || 0;
our $head  = $ENV{DB_HEAD}  || 'IO';
our $rollback  = $ENV{DB_ROLLBACK}  || 0;
our $dates  = $ENV{DB_DATES}  || 'ISO';
our $times  = $ENV{DB_TIMES}  || 'ISO';
our $stamps  = $ENV{DB_STAMPS}  || 'ISO';
our $quoty = exists $ENV{DB_QUOTY} ? $ENV{DB_QUOTY} : 1;
our $debug = $ENV{DB_DEBUG} || 0;
our $sql_file = "";
our $help;

sub debug {
    return if !$debug;
    my ($k, @v) = @_;    
    local $Data::Dumper::Indent = 0;
    my $v = Dumper(\@v);
    $v =~ s/.*?\[//;
    $v =~ s/\];$//;
    warn "$k: $v\n";
}

sub sym_name {
    my ($field_name) = @_;
    my $sym_name = $field_name;
    $sym_name =~ s/[^a-z0-9_]/_/gi;
    $sym_name =~ s/^([0-9])/_$1/;
    return $sym_name;
}

sub main {
    GetOptions(
        "dbms=s" => \$dbms,
        "dsn=s"  => \$dsn,
        "db=s"   => \$db,
        "user=s" => \$user,
        "pass=s" => \$pass,
        "host=s" => \$host,
        "port=s" => \$port,
        "in"     => \$in,
        "ins"    => \$ins,
        "head=s"   => \$head,
        "rollback"   => \$rollback,
        "help"   => \$help,
        "debug"  => \$debug,
        "sql=s"    => \$sql_file,
        "dates=s"    => \$dates,
        "times=s"    => \$times,
        "stamps=s"    => \$stamps,
        "quoty=s"    => \$quoty,
    )
        or die "failed: GetOptions";

    if ($help) { print usage; exit(0); }

    my $query;
    if ($sql_file) {
        $query = read_file($sql_file, binmode => $binmode);
    } else {
        $query = shift @ARGV;
    }
    if (!defined $query) { die usage; }

    my @params = @ARGV;

    if ($in and @params) {
        die "do not use -in with params on the command line";
    }
    if ($ins and !@params) {
        $in = 1;
    }

    my $add_insert_names_values = 0;
    my $add_bind_markers = 0;
    if ($query !~ /\s/) {
        if ($ins) {
            $query = "insert into $query";
            $add_insert_names_values = 1;
        } else {
            $query = "select * from $query";
            $add_bind_markers = 1;
        }
    }

    my $rx_quoted = qr{(?:[^"']|"(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*')};

    my @named_bind_vars;
    my @named_bind_slice;
    my $add_named_bind_var = sub {
        my ($pre, $name) = @_;
        $name =~ s/\A{|}\z//g;
        push @named_bind_vars, $name;
        return $pre."?";
    };
    $query =~ s/\G($rx_quoted*?(?:\A|[^a-z0-9_\$"']))\$(\w+|{.*?})/$add_named_bind_var->($1, $2)/gsie;
    debug "named_bind_vars", \@named_bind_vars;
    debug "query", $query;

    if ($host eq "" && $db =~ /:/) {
        ($host, $db) = split /:/, $db, 2;
    }

    if ($dbms) {
        if ($dsn) {
            die "do not specify both DBMS and DSN";
        }
        $dsn = $dbms_dsn_map->{lc $dbms}
            or die "unknown DBMS: $dbms, known are: @{[sort keys %$dbms_dsn_map]}.\nyou can set a DSN instead";
    } else {
        ($dbms) = $dsn =~ /DBI:(\w+)/;
        $dbms ||= "unknown";
    }

    if ($dsn =~ s{\$db}{$db}) {
        if (!$db) {
            die "no database name was specified, use -db or -dsn";
        }
    }
    $dsn =~ s{\$host}{$host};
    $dsn =~ s{\$port}{$port};
    $dsn =~ s{\w+=(;|$)}{}g;
    $dsn =~ s{;$}{};

    debug "dsn", $dsn;

    my $dbh = DBI->connect($dsn, $user, $pass, { PrintError => 0, RaiseError => 1, AutoCommit => 0 });

    # date / time formats
    # use ISO with milliseconds by default, to be sane and avoid losing data
    # TODO make sure other DBMSs are configured for this also
    if ($dbms =~ /^(interbase|firebird)$/i) {
        $dbh->{ib_dateformat} = $dates;
        $dbh->{ib_timeformat} = $times;
        $dbh->{ib_timestampformat} = $stamps ? $stamps : $dates eq 'ISO' ? 'ISO' : "$dates $times";
    } elsif ($dbms =~ /^sybase$/i) {
        if ($dates ne 'ISO' or $times ne 'ISO') {
            warn "using ISO date/time format";
        }
        $dbh->syb_date_fmt('ISO');
    } elsif ($dbms =~ /^postgresql$/i) {
        $dbh->pg_enable_utf8(1);
    }

    my $csv = Text::CSV->new({ binary => 1, always_quote => $quoty, blank_is_undef => $quoty })
        or die "Cannot use CSV: ".Text::CSV->error_diag;
    $csv->eol("\n");
    my $out = \*STDOUT;
    binmode($out, ":utf8");
    if ($in) {
        $in = \*STDIN;
        binmode($in, ":utf8");
    }

    my $sth;
    my $row = \@params;
    my $first = 1;

    if ($in && $head =~ /I/) {
        my $in_names = $csv->getline($in);
        if (!$in_names || grep {!defined} @$in_names) {
            die "bad input, bad or missing table header\n";
        }
        $_ = sym_name(lc($_)) for @$in_names;
        my %ix = map { $in_names->[$_] => $_ } 0..$#$in_names;
        my $fail = 0;
        for my $k (@named_bind_vars) {
            my $i = $ix{lc $k};
            if (!defined $i) {
                warn "missing column: $k\n";
                $fail = 1;
            }
            push @named_bind_slice, $i;
        }
        exit 1 if $fail;

        if ($add_insert_names_values) {
            my $n = @$in_names;
            $query .= "(". join(", ", @$in_names) .") VALUES (". join(", ", ("?") x $n) .")";
        }
    }

    while(1) {
        if ($in) {
            $row = $csv->getline($in);
            if (!$row) {
                $csv->eof or die $csv->error_diag();
                last;
            }
        }

        if ($first) {
            if ($add_bind_markers) {
                my $n = @$row;
                if ($n) {
                    $query .= "(". join(", ", ("?") x $n) .")";
                }
            }
            debug "query", $query;
            $sth = $dbh->prepare($query);
        }

#        debug "input", @$row;

        if (@named_bind_slice) {
            $row = [ @$row[@named_bind_slice] ];
#            debug "sliced", @$row;
        }

        eval {
            $sth->execute(@$row);
        };
        if ($@) {
            warn "$@\n";
            if ($in) {
                $csv->print(\*STDERR, $row);
            }
            die "\n";
        }

        if ($sth->{NAME} && @{$sth->{NAME}}) {
            if ($first && $head =~ /O/) {
                $csv->print($out, $sth->{NAME})
                    or die "failed: \$csv->print: $!";
            }
            while (my $row = $sth->fetchrow_arrayref) {
                $csv->print($out, $row)
                    or die "failed: \$csv->print: $!";
            }
        }
        if (!$in) {
            last;
        }
        $first = 0;
    }

    close $out
        or die "failed: close: $!";

    if ($sth) {
        if ($rollback) {
            $dbh->rollback;
        } else {
            $dbh->commit;
        }
    }
    $dbh->disconnect;
}

main;

# TODO:
# 
# - allow input with headers, and output without headers
# - use ISO date/time format by default for all drivers
# - other data formats: TSV, key:value; and perhaps: xls, xlsx, json, html, printf
#   - separate converters for this?
# - preserve delimiters and comments from input stream in output stream (how to format a comment?)
# - error stream: include input record, input line number, and error message
# - port to C using libdbi
#
# - -null option to show NULL in a special way ?
# - option to specify date and time formats?
# - allow column names instead of ? in query
# - -conf option to load config from a file
#     check for .dbconf in parent dirs?
