#!/bin/bash if [ -z "$2" ];then echo "preprocess.sh

" exit 0 fi hypothesisCTM=$1 tmpFile=$2 output=$3 # apply some simplifications to the recognition echo "preprocess.sh ${hypothesisCTM} ${tmpFile} ${output}" cat ${hypothesisCTM} | sed -e 's,loc-,,g' -e 's,cl-,,g' -e 's,qu-,,g' -e 's,poss-,,g' -e 's,lh-,,g' -e 's,S0NNE,SONNE,g' -e 's,HABEN2,HABEN,g'|sed -e 's,__EMOTION__,,g' -e 's,__PU__,,g' -e 's,__LEFTHAND__,,g' |sed -e 's,WIE AUSSEHEN,WIE-AUSSEHEN,g' -e 's,ZEIGEN ,ZEIGEN-BILDSCHIRM ,g' -e 's,ZEIGEN$,ZEIGEN-BILDSCHIRM,' -e 's,^$[A-Z]$ $[A-Z][+ ]$,\1+\2,g' -e 's,[ +]$[A-Z]$ $[A-Z]$ , \1+\2 ,g'| sed -e 's,$[ +][A-Z]$ $[A-Z][ +]$,\1+\2,g'|sed -e 's,$[ +][A-Z]$ $[A-Z][ +]$,\1+\2,g'| sed -e 's,$[ +][A-Z]$ $[A-Z][ +]$,\1+\2,g'|sed -e 's,$[ +]SCH$ $[A-Z][ +]$,\1+\2,g'|sed -e 's,$[ +]NN$ $[A-Z][ +]$,\1+\2,g'| sed -e 's,$[ +][A-Z]$ $NN[ +]$,\1+\2,g'| sed -e 's,$[ +][A-Z]$ $[A-Z]$$,\1+\2,g'| sed -e 's,$[A-Z][A-Z]$RAUM,\1,g'| sed -e 's,-PLUSPLUS,,g' | perl -ne 's,(? ${tmpFile} # make sure empty recognition results get filled with [EMPTY] tags - so that the alignment can work out on all data. cat ${tmpFile} | sed -e 's,\s*$,,' | awk 'BEGIN{lastID="";lastRow=""}{if (lastID!=$1 && cnt[lastID]<1 && lastRow!=""){print lastRow" [EMPTY]";}if ($5!=""){cnt[$1]+=1;print $0;}lastID=$1;lastRow=$0}' |sort -k1,1 -k3,3 > ${output} rm ${tmpFile} echo `date` echo "Preprocess Finished."