#!/lang/perl -n # this converts the text of Great Expectations into a play, approximately! # should run pg2line and splitsentences2 first chomp; $orig = $_; if (/^CHAPTER /) { # get chapter headings too $copy = 3; print "\n\n"; $non_speech = 0; $break = 1; } if ($copy > 0) { print "$_\n"; --$copy; } else { $line = ""; while (s/(^|\s|-)'(.*?[^\w\s])'/$1\@/) { my $q = $2; if (length $q > 12) { $q =~ s/,$/./; # not always right, but mostly } if ($q =~ /,$/) { $q .= " "; } else { $q .= " "; } $line .= $q; } if ($line) { if ($break) { print "\n"; $break = 0; } $non_speech = 0; $line =~ s/ +$//; $line =~ s/,$/./; # try to work out who the speaker was... hoho! $speaker = $_; $verb = ""; $speaker =~ s/^[^@]*[.!?] *//; # get rid of any sentences before the speech $speaker =~ s/[.!?] .*//; # get rid of any sentences at the end if ($speaker eq "\@") { # FIXME it's probably the last but one who spoke... or perhaps someone who hasn't been introduced yet? $speaker = ""; } elsif ($speaker =~ /^\@/) { $speaker =~ s/^\@[\s\W]*//; if ($speaker =~ s/^([a-z]+[ei]d) //) {$verb = $1;} $speaker =~ s/([^\s\w.'-]|( - )).*//; } else { $speaker =~ s/[\s\W]*\@.*//; if ($speaker =~ s/ ([a-z]+[ei]d)$//) {$verb = $1;} $speaker =~ s/.*([^\s\w'-]|( - ))//; } if ($speaker =~ s/.*?([A-Z])/$1/) { $speaker =~ s/ [^A-Z].*//; } else { $speaker =~ s/.*?(^| )the /the /; $speaker =~ s/\b(with|to|at|in (a|an))\b.*//; } $speaker =~ s/\b[a-z]+ly\b//g; # no adverbs! $speaker =~ s/\bthe\b//g; $speaker =~ s/\W+$//; $speaker =~ s/^ *//; $speaker =~ s/ *$//; $speaker =~ s/ +/ /g; $speaker =~ s/^Jew/the Jew/; # special treatment for the villain # I was going to change to Fagin, but it might be Barney. # I think Dickens was a bit anti-semetic. if (!$verb && $speaker =~ s/ ([a-z]+[ei]d)$//) {$verb = $1;} if ($speaker eq "") { $speaker = "?"; } print "$speaker: $line\n"; } else { # put a break if we've had 700 characters of non-speech text... $non_speech+=length($_); if ($non_speech >= 700) { $break = 1; $non_speech = -1000000000; # hehe } } }