#!/bin/bash
# unknown2srt
# usage: unknown2srt file.txt > file.srt
#set -x
# required
command -v dos2unix >/dev/null 2>&1 || { >&2 echo "I need dos2unix: sudo apt install dos2unix" ; exit 1; }
# tmp dir
tmp="/tmp/$RANDOM-$$"
trap '[ -n "$tmp" ] && rm -fr "$tmp"' EXIT
mkdir -m 700 "$tmp" || { echo '!! unable to create a tmpdir' >&2; tmp=; exit 1; }
# dos2unix
dos2unix -n "$1" "$tmp/file.txt"
# check if it is allready unicode, otherwise assume 1250 and convert to utf
if [[ $(file "$tmp/file.txt" | grep UTF) ]]; then
#echo "is utf"
mv "$tmp/file.txt" "$tmp/file_utf.txt"
else
#echo "not utf, assuming cp1250"
iconv -f cp1250 -t utf8 "$tmp/file.txt" > "$tmp/file_utf.txt"
fi
handleItalics () {
# NEW - handling italics
# replace * and * to and
cat "$tmp/file_utf.txt" | awk -F '*' '{for (i=1; i":FS; printf "%s%s", $i, OFS; c++} print $NF}' | sed -s 's/*//g' > "$tmp/file_utf2.txt"
mv "$tmp/file_utf2.txt" "$tmp/file_utf.txt"
# ^ still not good enough for premiere pro 2022, it expects closing tags on the same line.
# disabled
}
#handleItalics
# var
count="1"
# bc calc
calc () {
echo "scale=5; $*" | bc -l
}
# action
cat "$tmp/file_utf.txt" | while read -r line; do
# if it looks like timecode line then
if echo "$line" | grep -q '[0-9]:' ; then
echo "$count"
# split
read -r in out <<< "$(echo "$line")"
# assume hh:mm:ss:ff 25 fps timecode input format
# and convert to hh:mm:ss,mmm srt format
# 1 frame is 40 ms
fin="${in:9:2}"; fout="${out:9:2}" # just frames part
hmsin="${in:0:8}" ; hmsout="${out:0:8}" # hh:mm:ss part
msin="$(calc "$fin"*40)" # frames to miliseconds
msout="$(calc "$fout"*40)" # frames to miliseconds
printf -v msin "%03d" "$msin" # pad
printf -v msout "%03d" "$msout" # pad
echo "$hmsin,$msin --> $hmsout,$msout"
count="$(( count + 1 ))"
else
echo "$line"
fi
done
exit
# example unknown input format
#00:00:00:13 00:00:04:08
#*Lorem ipsum dolor sit amet, consectetur adipiscing elit,
#sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.*
#00:00:44:06 00:00:47:03
#At vero eos et accusamus?
#00:00:47:07 00:00:52:24
#Itaque earum rerum hic,
#tenetur a sapiente delectus.
#00:00:53:04 00:00:56:08
#Ex ea commodi consequatur?
#-Velit esse.