Commit 081c74f8 authored by Tomaž Erjavec's avatar Tomaž Erjavec
Browse files

Fix bug with spaces

parent b2a8aa00
......@@ -94,7 +94,7 @@ sub sent2tei {
next unless $line =~ /^\d+\t/;
my ($n, $token, $lemma, $upos, $xpos, $ufeats, $link, $role, $extra, $local)
= split /\t/, $line;
$xpos =~ /-+$//; # Get rid of trailing dashes introduced by Stanford NLP
$xpos =~ s/-+$//; # Get rid of trailing dashes introduced by Stanford NLP
if ($xpos =~ /Z/) {$tag = 'pc'} else {$tag = 'w'}
#$role =~ s/:/_/; #Leave for now, although backwards incompatibility!
my $feats = "UposTag=$upos";
......@@ -117,7 +117,7 @@ sub sent2tei {
}
unless (@deps) { #No parse
$tei .= join "\n", @toks;
$tei =~ s|<c> </c>\n$|\n|;
$space = $tei =~ s|<c> </c>\n$|\n|;
}
else { # Parsed
#Give IDs to tokens as we have a parse
......@@ -126,7 +126,7 @@ sub sent2tei {
$element =~ s| | xml:id="$id" |;
$tei .= "$element\n";
}
$tei =~ s|<c> </c>\n$|\n|;
$space = $tei =~ s|<c> </c>\n$|\n|;
$tei .= "<linkGrp type=\"$ud_type\" targFunc=\"head argument\" corresp=\"#$id\">\n";
foreach $dep (@deps) {
my ($head, $arg, $role) = split /\t/, $dep;
......@@ -137,7 +137,8 @@ sub sent2tei {
}
$tei .= "</linkGrp>";
}
$tei .= "\n</s>\n<c> </c>\n";
$tei .= "\n</s>\n";
$tei .= "<c> </c>\n" if $space;
return $tei
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment