Commit 6ca1aaa7 authored by Tomaž Erjavec's avatar Tomaž Erjavec
Browse files

Fix Stanford dashes.

parent 2b86c0a3
grep -v '#' CLARIN/*.conllu | cut -f5 | sort | uniq > siius-msds.log
bin/ CLARIN/CPZ.dep < CLARIN/CPZ.xml > CLARIN/CPZ.ana.xml
$j schema/tei_clarin.rng CLARIN/CPZ.ana.xml
......@@ -94,6 +94,7 @@ sub sent2tei {
next unless $line =~ /^\d+\t/;
my ($n, $token, $lemma, $upos, $xpos, $ufeats, $link, $role, $extra, $local)
= split /\t/, $line;
$xpos =~ /-+$//; # Get rid of trailing dashes introduced by Stanford NLP
if ($xpos =~ /Z/) {$tag = 'pc'} else {$tag = 'w'}
#$role =~ s/:/_/; #Leave for now, although backwards incompatibility!
my $feats = "UposTag=$upos";
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment