For JS, see instead: https://github.com/TinoDidriksen/regtest/blob/main/static/inspect.js#L8
Graphviz usage:
... | niceline.pl 'en' | niceline2dot.pl | perl -wpne 's#</?b>##g;' | dot -Tpng
For JS, see instead: https://github.com/TinoDidriksen/regtest/blob/main/static/inspect.js#L8
Graphviz usage:
... | niceline.pl 'en' | niceline2dot.pl | perl -wpne 's#</?b>##g;' | dot -Tpng
| #!/usr/bin/perl | |
| # -*- mode: cperl; indent-tabs-mode: nil; tab-width: 3; cperl-indent-level: 3; -*- | |
| BEGIN { $| = 1; } | |
| use warnings; | |
| use strict; | |
| use utf8; | |
| sub recase { | |
| my ($a) = @_; | |
| $a =~ s/=/ /g; | |
| $a =~ s/\*(.)/uc($1)/eg; | |
| return $a; | |
| } | |
| my $lang = $ARGV[0] || ''; | |
| print "<dl>\n"; | |
| while (<STDIN>) { | |
| chomp; | |
| s/^\$\\*//; | |
| if (/\$START/) { | |
| next; | |
| } | |
| #s/^<\"\$([^\"]+)\">/$1/; | |
| s/^\$//; | |
| s/\$//; | |
| s/&/\&/g; | |
| s/</\</g; | |
| s/>/\>/g; | |
| # word form | |
| if (m%^\"<(.+)>\"%) { | |
| s%^\"<(.+)>\"%"<dt><b><font color=\"maroon\">".recase($1)."</font></b> "%e; | |
| } | |
| else { | |
| s%^(\S+)([ \t])%"<dt><b><font color=\"maroon\">".recase($1)."</font></b>$2"%e; | |
| } | |
| # base form | |
| s%\[(.+?)\]%<font color="maroon">[$1]</font> %; | |
| s%^[\t ]+\"(.+?)\"%<font color="maroon">[$1]</font> %; | |
| # form/morphology tags | |
| if ($lang eq "da") { | |
| s% (ADJ|ADV|ART|CONJ|DET|GER|IN|INDP|INFM|KC|KP|KS|N|NUM|PED|PERS|PING|PREF|PRON|PROP|PRP|SPEC|V|V PCP[12])( [^\@]*)% <font color="blue"><b>$1</b>$2</font> %; | |
| } | |
| if ($lang eq "en") { | |
| s% (ADJ|ADV|ART|CONJ|DET|GER|IN|INDP|INFM|KC|KP|KS|N|NEGPART|NUM|PCP|PED|PERS|PING|PRON|PRP|V|V PED|V PING|PU)( [^\@]*)% <font color="blue"><b>$1</b>$2</font> %; | |
| } | |
| if ($lang eq "pt") { | |
| s% (ADJ|ADV|DET|EC|IN|INDP|KC|KS|N|NUM|PERS|PROP|PRP|SPEC|V)( [^\@]*)% <font color="blue"><b>$1</b>$2</font> %; | |
| } | |
| if ($lang =~ /(de|fr|it)/) { | |
| s% (AD*J*(\(PART\)| PART)*|ADV( PRON)*|CONJ|[insu]KONJ|ART|INFM|PROP|[NS](\((AD*J*|PART)\)| EIGEN| PROP| NADJ| PART)*|NEGPART|NUM|PERS|DET|KC|PCP|GER|V PING|V PED|PRP|PRÄP|PRON( INDP| DET)*|PRONADV|ART|V|IN|INTERJ|PR[ÄE]F|ABK)( [^\@]*)% <font color="blue"><b>$1</b>$7</font> %; | |
| } | |
| if ($lang =~ /(eo|nl)/) { | |
| s% (ADJ|ADV|ART|DET|IN|INDP|KC|KS|N|NUM|PERS|PRON|PROP|PRP|V)( [^\@\#\&]*)% <font color="blue"><b>$1</b>$2</font> %; | |
| } | |
| if ($lang eq "jpn") { | |
| s% (ADJ|ADV|ART|DET|IN|INDP|KC|KS|N|NUM|PART|PERS|PRON|PROP|PRP|V)( [^\@\#\&]*)% <font color="blue"><b>$1</b>$2</font> %; | |
| } | |
| if ($lang eq "gl") { | |
| s% ([A-Z][^\@\#]*?)( [^\@]*)% <font color="blue"><b>$1</b>$2</font> %; | |
| } | |
| # function tags | |
| s%(\@[^ ]+)%<font color="darkgreen">$1</font>%g; | |
| # function tags | |
| s%([ >])((\&|&[A-Z])[^ ]+)%$1<font color="red">$2</font>%g; | |
| # semantic roles | |
| s%([ >])(§[A-Z][^ ]+)%$1<font color="red">$2</font>%g; | |
| # frames | |
| s%([ >])(<fn:.*?>)%$1<font color="orange">$2</font>%g; | |
| # subclause tags | |
| s%(\@\#[^ ]+)%<b>$1</b>%g; | |
| # translation tags | |
| s%(\'[^\']+\')%<font color="orange"><b>$1</b></font>%g; | |
| s%(<(DA|EN|DE|ES|EO)[0-9]*:([^ ]*?)>)%<font color="orange"><b>$1</b></font>%g; | |
| print "$_\n"; | |
| } | |
| print "</dl>\n"; |
| #!/usr/bin/env perl | |
| # -*- mode: cperl; indent-tabs-mode: nil; tab-width: 3; cperl-indent-level: 3; -*- | |
| BEGIN { $| = 1; } | |
| use warnings; | |
| use strict; | |
| use utf8; | |
| my $header = <<HEAD; | |
| digraph G { | |
| \ttruecolor = true; | |
| \tgraph [ratio=fill]; | |
| \tgraph [center=\"1\"]; | |
| \tratio = 1.00; | |
| \tedge [arrowsize = 0.8]; | |
| \tsplines = true; | |
| \trankdir = \"BT\"; | |
| \tnode [shape=box]; | |
| \t0 [label="ROOT"]; | |
| HEAD | |
| print $header; | |
| my $labels = ''; | |
| my $edges = ''; | |
| #<dt><b><font color="maroon">such=as</font></b> <font color="maroon">[such=as]</font> <font color="blue"><b>PRP</b> </font><font color="green">@<ADVL</font> #17->14 | |
| while (<STDIN>) { | |
| chomp; | |
| my ($form,$lemma,$pos,$func,$num,$parent); | |
| if (m%^<dt>(<b><font color="maroon">[^<]+</font></b>)\s+(<font color="maroon">\[[^<]+\]</font>)\s+(.+?<font color="blue">.*?</font>)\s+.*?\s+#(\d+)(?:(?:->)|(?:→))(\d+)%) { | |
| ($form,$lemma,$pos,$num,$parent) = ($1,$2,$3,$4,$5); | |
| } | |
| # elsif (m%^<dt>(<b><font color="maroon">[^<]+</font>).*(<font color="maroon">\[[^<]+\]</font>)\s+(.+?<font color="blue">.*?</font>)\s+.*?\s+#(\d+)(?:(?:->)|(?:→))(\d+)%) { | |
| # ($form,$lemma,$pos,$num,$parent) = ($1,$2,$3,$4,$5); | |
| # } | |
| else { | |
| next; | |
| } | |
| if (m%\s+(<font color="darkgreen">@[^<]+</font>)\s+%) { | |
| $func = $1; | |
| } | |
| if ($num == $parent) { | |
| $parent = 0; | |
| } | |
| $pos =~ s@(>) (<font color="blue">)@$1<br/>$2@g; | |
| $pos =~ s@(</font>) (<font color="blue">)@$1<br/>$2@g; | |
| if ($form =~ m@<font color="maroon">([^<]+)</font>@) { | |
| my $tf = $1; | |
| $tf =~ s/ /=/g; | |
| if ($lemma !~ m@<font color="maroon">\[$tf\]</font>@i) { | |
| $form .= '<br/>'.$lemma; | |
| } | |
| } | |
| $labels .= "\t".$num.' [label=<'.$num.'<br/>'.$form.'<br/>'.$pos.'>];'."\n"; | |
| if ($func) { | |
| $edges .= "\t".$num.'->'.$parent.' [label=<'.$func.'>];'."\n"; | |
| } | |
| else { | |
| $edges .= "\t".$num.'->'.$parent.';'."\n"; | |
| } | |
| } | |
| print $labels; | |
| print $edges; | |
| print "}\n"; |