#!/bin/bash

read_record() {
	i=0
	echo "section: $section"
	if [ -n "$subsection" ]; then
		echo "subsection: $subsection"
	fi
	while read line; do
		case "$line" in
		'</tr>')
			echo
			break
			;;
		'<td'*)
			val=""
			read line
			while [ "$line" != '</td>' ]; do
				if [ -n "$val" ]; then
					val="$val  "
				fi
				val="$val$line"
				read line || { echo >&2 file broken in middle of record; exit 1; }
			done
			echo "${th[$i]}: $val"
			i=$[$i+1]
			;;
		esac
	done
}
read_table() {
	cols=0
	while read line; do
		case "$line" in
		'</table>')
			break
			;;
		'<th'*)
			read th[$cols]
			cols=$[$cols+1]
			;;
		'<tr'*)
			if [ "$cols" -gt 0 ]; then
				read_record
			fi
		esac
	done
}

grep -v -E '^</?span' |
while read line; do
	case "$line" in
	'<h1'*)
		read section
		;;
	'<h2'*)
		read subsection
		;;
	'<table class="wikitable sortable" '*)
		read_table
		;;
	esac
done | sed 's/ class=[^>]*>/>/g'
