Skip to content

Instantly share code, notes, and snippets.

@TinoDidriksen
Created April 15, 2025 10:22
Show Gist options
  • Select an option

  • Save TinoDidriksen/4ae14c65aca186945ac973ff27c0f06b to your computer and use it in GitHub Desktop.

Select an option

Save TinoDidriksen/4ae14c65aca186945ac973ff27c0f06b to your computer and use it in GitHub Desktop.
CG-3 to Graphviz/SVG/PNG
#!/usr/bin/perl
# -*- mode: cperl; indent-tabs-mode: nil; tab-width: 3; cperl-indent-level: 3; -*-
BEGIN { $| = 1; }
use warnings;
use strict;
use utf8;
sub recase {
my ($a) = @_;
$a =~ s/=/ /g;
$a =~ s/\*(.)/uc($1)/eg;
return $a;
}
my $lang = $ARGV[0] || '';
print "<dl>\n";
while (<STDIN>) {
chomp;
s/^\$\\*//;
if (/\$START/) {
next;
}
#s/^<\"\$([^\"]+)\">/$1/;
s/^\$//;
s/\$//;
s/&/\&amp;/g;
s/</\&lt;/g;
s/>/\&gt;/g;
# word form
if (m%^\"&lt;(.+)&gt;\"%) {
s%^\"&lt;(.+)&gt;\"%"<dt><b><font color=\"maroon\">".recase($1)."</font></b> "%e;
}
else {
s%^(\S+)([ \t])%"<dt><b><font color=\"maroon\">".recase($1)."</font></b>$2"%e;
}
# base form
s%\[(.+?)\]%<font color="maroon">[$1]</font> %;
s%^[\t ]+\"(.+?)\"%<font color="maroon">[$1]</font> %;
# form/morphology tags
if ($lang eq "da") {
s% (ADJ|ADV|ART|CONJ|DET|GER|IN|INDP|INFM|KC|KP|KS|N|NUM|PED|PERS|PING|PREF|PRON|PROP|PRP|SPEC|V|V PCP[12])( [^\@]*)% <font color="blue"><b>$1</b>$2</font> %;
}
if ($lang eq "en") {
s% (ADJ|ADV|ART|CONJ|DET|GER|IN|INDP|INFM|KC|KP|KS|N|NEGPART|NUM|PCP|PED|PERS|PING|PRON|PRP|V|V PED|V PING|PU)( [^\@]*)% <font color="blue"><b>$1</b>$2</font> %;
}
if ($lang eq "pt") {
s% (ADJ|ADV|DET|EC|IN|INDP|KC|KS|N|NUM|PERS|PROP|PRP|SPEC|V)( [^\@]*)% <font color="blue"><b>$1</b>$2</font> %;
}
if ($lang =~ /(de|fr|it)/) {
s% (AD*J*(\(PART\)| PART)*|ADV( PRON)*|CONJ|[insu]KONJ|ART|INFM|PROP|[NS](\((AD*J*|PART)\)| EIGEN| PROP| NADJ| PART)*|NEGPART|NUM|PERS|DET|KC|PCP|GER|V PING|V PED|PRP|PRÄP|PRON( INDP| DET)*|PRONADV|ART|V|IN|INTERJ|PR[ÄE]F|ABK)( [^\@]*)% <font color="blue"><b>$1</b>$7</font> %;
}
if ($lang =~ /(eo|nl)/) {
s% (ADJ|ADV|ART|DET|IN|INDP|KC|KS|N|NUM|PERS|PRON|PROP|PRP|V)( [^\@\#\&]*)% <font color="blue"><b>$1</b>$2</font> %;
}
if ($lang eq "jpn") {
s% (ADJ|ADV|ART|DET|IN|INDP|KC|KS|N|NUM|PART|PERS|PRON|PROP|PRP|V)( [^\@\#\&]*)% <font color="blue"><b>$1</b>$2</font> %;
}
if ($lang eq "gl") {
s% ([A-Z][^\@\#]*?)( [^\@]*)% <font color="blue"><b>$1</b>$2</font> %;
}
# function tags
s%(\@[^ ]+)%<font color="darkgreen">$1</font>%g;
# function tags
s%([ >])((\&amp;|&[A-Z])[^ ]+)%$1<font color="red">$2</font>%g;
# semantic roles
s%([ >])(§[A-Z][^ ]+)%$1<font color="red">$2</font>%g;
# frames
s%([ >])(&lt;fn:.*?&gt;)%$1<font color="orange">$2</font>%g;
# subclause tags
s%(\@\#[^ ]+)%<b>$1</b>%g;
# translation tags
s%(\'[^\']+\')%<font color="orange"><b>$1</b></font>%g;
s%(&lt;(DA|EN|DE|ES|EO)[0-9]*:([^ ]*?)&gt;)%<font color="orange"><b>$1</b></font>%g;
print "$_\n";
}
print "</dl>\n";
#!/usr/bin/env perl
# -*- mode: cperl; indent-tabs-mode: nil; tab-width: 3; cperl-indent-level: 3; -*-
BEGIN { $| = 1; }
use warnings;
use strict;
use utf8;
my $header = <<HEAD;
digraph G {
\ttruecolor = true;
\tgraph [ratio=fill];
\tgraph [center=\"1\"];
\tratio = 1.00;
\tedge [arrowsize = 0.8];
\tsplines = true;
\trankdir = \"BT\";
\tnode [shape=box];
\t0 [label="ROOT"];
HEAD
print $header;
my $labels = '';
my $edges = '';
#<dt><b><font color="maroon">such=as</font></b> <font color="maroon">[such=as]</font> <font color="blue"><b>PRP</b> </font><font color="green">@&lt;ADVL</font> #17-&gt;14
while (<STDIN>) {
chomp;
my ($form,$lemma,$pos,$func,$num,$parent);
if (m%^<dt>(<b><font color="maroon">[^<]+</font></b>)\s+(<font color="maroon">\[[^<]+\]</font>)\s+(.+?<font color="blue">.*?</font>)\s+.*?\s+#(\d+)(?:(?:-&gt;)|(?:→))(\d+)%) {
($form,$lemma,$pos,$num,$parent) = ($1,$2,$3,$4,$5);
}
# elsif (m%^<dt>(<b><font color="maroon">[^<]+</font>).*(<font color="maroon">\[[^<]+\]</font>)\s+(.+?<font color="blue">.*?</font>)\s+.*?\s+#(\d+)(?:(?:-&gt;)|(?:→))(\d+)%) {
# ($form,$lemma,$pos,$num,$parent) = ($1,$2,$3,$4,$5);
# }
else {
next;
}
if (m%\s+(<font color="darkgreen">@[^<]+</font>)\s+%) {
$func = $1;
}
if ($num == $parent) {
$parent = 0;
}
$pos =~ s@(&gt;) (<font color="blue">)@$1<br/>$2@g;
$pos =~ s@(</font>) (<font color="blue">)@$1<br/>$2@g;
if ($form =~ m@<font color="maroon">([^<]+)</font>@) {
my $tf = $1;
$tf =~ s/ /=/g;
if ($lemma !~ m@<font color="maroon">\[$tf\]</font>@i) {
$form .= '<br/>'.$lemma;
}
}
$labels .= "\t".$num.' [label=<'.$num.'<br/>'.$form.'<br/>'.$pos.'>];'."\n";
if ($func) {
$edges .= "\t".$num.'->'.$parent.' [label=<'.$func.'>];'."\n";
}
else {
$edges .= "\t".$num.'->'.$parent.';'."\n";
}
}
print $labels;
print $edges;
print "}\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment