#!/usr/bin/perl -w

#use lib '../lib'; #XXX FOR DEBUG ONLY!
use Data::Dumper;  #XXX
$Data::Dumper::Terse = 1;
$Data::Dumper::Indent = 0;

use Getopt::Long qw(:config no_ignore_case);

use Relation::Tools qw/read_file read_tsv read_nosql write_file write_tsv
		       write_nosql And Or lscmp/;

my $progname = "cjoin";

my ($which, $which_column, $debug, $help);

GetOptions(
	'w|which=s' => \$which,
	'W|which-column' => \$which_column,
	'D|debug' => \$debug,
	'help' => \$help,
	) or help(1);

$help and help(0);

sub help {
	my $out = $_[0] ? \*STDERR : \*STDOUT;
	print $out <<End;
syntax: $progname [options] file ...
-w  --which         - which parts to include, e.g. '100 110 111'
-W  --which-column  - include a "which" column
-D  --debug         - print debugging info on stderr
-h  --help          - this help
End
	exit $_[0];
}

use strict;

my %which;
if ($which) {
	for (split /[\s,]/, $which) {
		$which{$_} = 1;
	}
}

my @files = @ARGV;
unless (@files) {
	print "\n\n";
	exit;
}

my @readers;
my @fields;

for my $filename (@files) {
	my ($reader, $fields) = read_nosql(read_tsv(read_file($filename)));
	push @fields, $fields;
}

my $key = "the universe!";
my $out_fields = [$which_column ? ('which') : ()];
for my $fields (@fields) {
	$key = And($key, $fields);
	$out_fields = Or($out_fields, $fields);
}

for my $filename (@files) {
	my ($reader, $fields) = read_nosql(read_tsv(read_file("nosql sort @$key < $filename |")));
	push @readers, $reader;
}

sub key {
	my $row = shift;
	$row or return undef;
	return [@$row{@$key}];
}

my $writer = write_nosql(write_tsv(write_file(\*STDOUT)), $out_fields);

my @rows;
for (@readers) {
	push @rows, &$_;
}
while (1) {
	my $least_indices = [0];
	my $least_key = key($rows[0]);
	for (1..$#rows) {
		my $k = key($rows[$_]);
		my $comp = lscmp($k, $least_key);
		if ($comp == 0) {
			push @$least_indices, $_;
		} elsif ($comp < 0) {
			@$least_indices = $_;
			$least_key = $k;
		}
	}
	my $wh = "";
	if ($which_column || $which ) {
		my @in = (0)x@readers;
		for (@$least_indices) {
			$in[$_] = 1;
		}
		for (@in) {
			$wh .= $_ ? "1":"0";
		}
	}
	$least_key or last;
	if (!$which || $which{$wh}) {
		if ($debug) {
			for (@rows) {
				print STDERR Dumper key($_);
				print STDERR "\n";
			}
			print STDERR "@$least_indices\n";
			print STDERR "\n";
		}
		my %orow = $which_column ? (which => $wh) : ();
		for my $i (@$least_indices) {
			while (my ($k, $v) = each %{$rows[$i]}) {
				$orow{$k} = $v;
			}
		}
		for my $f (@$out_fields) {
			$orow{$f} ||= "\\";
		}
		&$writer(\%orow);
	}
	for my $i (@$least_indices) {
		$rows[$i] = &{$readers[$i]};
	}
}
&$writer(undef);
