Commit 6ca1aaa7 authored by Tomaž Erjavec's avatar Tomaž Erjavec
Browse files

Fix Stanford dashes.

parent 2b86c0a3
test-msd:
grep -v '#' CLARIN/*.conllu | cut -f5 | sort | uniq > siius-msds.log
test-tei: test-tei:
bin/conllu2tei.pl CLARIN/CPZ.dep < CLARIN/CPZ.xml > CLARIN/CPZ.ana.xml bin/conllu2tei.pl CLARIN/CPZ.dep < CLARIN/CPZ.xml > CLARIN/CPZ.ana.xml
$j schema/tei_clarin.rng CLARIN/CPZ.ana.xml $j schema/tei_clarin.rng CLARIN/CPZ.ana.xml
......
...@@ -94,6 +94,7 @@ sub sent2tei { ...@@ -94,6 +94,7 @@ sub sent2tei {
next unless $line =~ /^\d+\t/; next unless $line =~ /^\d+\t/;
my ($n, $token, $lemma, $upos, $xpos, $ufeats, $link, $role, $extra, $local) my ($n, $token, $lemma, $upos, $xpos, $ufeats, $link, $role, $extra, $local)
= split /\t/, $line; = split /\t/, $line;
$xpos =~ /-+$//; # Get rid of trailing dashes introduced by Stanford NLP
if ($xpos =~ /Z/) {$tag = 'pc'} else {$tag = 'w'} if ($xpos =~ /Z/) {$tag = 'pc'} else {$tag = 'w'}
#$role =~ s/:/_/; #Leave for now, although backwards incompatibility! #$role =~ s/:/_/; #Leave for now, although backwards incompatibility!
my $feats = "UposTag=$upos"; my $feats = "UposTag=$upos";
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment