#!/usr/bin/perl #for (qw(load tokenize)) { eval "sub $_" } sub load; sub tokenize; #print join "\n", map ">$_<", tokenize load 'play.pl'; sub load { local $/; local *FILE; open FILE, shift; return ; } @punct = qw( + - * / % & | ^ ? : ! ( ) [ ] < > = { } ~ ' " ; . , == && || ** -> ++ -- += -= *= /= => >= <= ); @keywords = qw( if else for while do return int double char ); ($PUNCT, $PART_PUNCT, $KEYWORD) = qw(1 2 4 8); %symbols = (); for (@punct) { my $punct = $_; $symbols{$punct} |= $PUNCT; for (;;) { substr $punct, -1, 1, ''; last if $punct eq ''; $symbols{$punct} |= $PART_PUNCT; } } for (@keywords) { $symbols{$_} |= $KEYWORD; } print "@{[%symbols]}"; sub tokenize { my @pass1 = shift =~ /(\w+|\s+|\W)/g; my ($state, $memory, @pass2); for (@pass1) { my $type = $symbols{$_}; $type & $PART_PUNCT and do { $memory .= $_; last } $type & $PUNCT and do { push @pass2, bless \$_, 'punct'; last } $memory and do { push @pass2, bless \(my $m = $memory), 'punct'; undef $memory; redo } $type & elsif ($memory) { push if (/[\s]/) { push @pass2, bless \$_, 'whitespace' } elsif (/\w/) { push @pass2, bless \$_, $'keyword' } } }