DependencyControl = require "l0.DependencyControl" version = DependencyControl { name: "ASSParser", version: "0.0.4", description: "Utility function for parsing ASS files", author: "Myaamori", url: "http://github.com/TypesettingTools/Myaamori-Aegisub-Scripts", moduleName: "myaa.ASSParser", feed: "https://raw.githubusercontent.com/TypesettingTools/Myaamori-Aegisub-Scripts/master/DependencyControl.json", { "aegisub.re", "aegisub.util", {"l0.Functional", version: "0.6.0", url: "https://github.com/TypesettingTools/Functional", feed: "https://raw.githubusercontent.com/TypesettingTools/Functional/master/DependencyControl.json"} } } re, util, F = version\requireModules! import lshift, rshift, band, bor from bit parser = {} STYLE_FORMAT_STRING = "Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, " .. "OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, " .. "Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, " .. "MarginV, Encoding" EVENT_FORMAT_STRING = "Layer, Start, End, Style, Name, MarginL, MarginR, " .. "MarginV, Effect, Text" DATA_FORMAT_STRING = "Id, Key, Value" DIALOGUE_DEFAULTS = actor: "", class: "dialogue", comment: false, effect: "", start_time: 0, end_time: 0, layer: 0, margin_l: 0, margin_r: 0, margin_t: 0, section: "[Events]", style: "Default", text: "", extra: nil STYLE_DEFAULTS = class: "style", section: "[V4+ Styles]", name: "Default", fontname: "Arial", fontsize: 45, color1: "&H00FFFFFF", color2: "&H000000FF", color3: "&H00000000", color4: "&H00000000", bold: false, italic: false, underline: false, strikeout: false, scale_x: 100, scale_y: 100, spacing: 0, angle: 0, borderstyle: 1, outline: 4.5, shadow: 4.5, align: 2, margin_l: 23, margin_r: 23, margin_t: 23, encoding: 1 create_line_from = (line, fields)-> line = util.copy line if fields for key, value in pairs fields line[key] = value return line parser.create_dialogue_line = (fields)-> line = create_line_from DIALOGUE_DEFAULTS, fields line.extra = line.extra or {} line parser.create_style_line = (fields)-> create_line_from STYLE_DEFAULTS, fields parser.decode_extradata_value = (value)-> enc, data = value\match "^([eu])(.*)$" if enc == 'e' return parser.inline_string_decode data else return parser.uudecode data parse_format_line = (format_string)-> [match for match in format_string\gmatch "([^, ]+)"] parser.raw_to_line = (raw, extradata=nil, format=nil)-> line_type, value = raw\match "^([^:]+):%s*(.*)$" if not value return nil default_format = {Dialogue: EVENT_FORMAT_STRING, Comment: EVENT_FORMAT_STRING, Style: STYLE_FORMAT_STRING, Data: DATA_FORMAT_STRING} if line_type == "Format" return {class: "format", format: parse_format_line value} elseif not default_format[line_type] return {class: "info", key: line_type, value: value} format = format or parse_format_line default_format[line_type] elements = F.string.split value, ",", 1, true, #format - 1 return nil if #elements != #format fields = {format[i], elements[i] for i=1,#elements} if line_type == "Dialogue" or line_type == "Comment" line = parser.create_dialogue_line actor: fields.Name, comment: line_type == "Comment" effect: fields.Effect, start_time: F.util.assTimecode2ms(fields.Start) end_time: F.util.assTimecode2ms(fields.End), layer: tonumber(fields.Layer) margin_l: tonumber(fields.MarginL), margin_r: tonumber(fields.MarginR) margin_t: tonumber(fields.MarginV), style: fields.Style text: fields.Text -- handle extradata (e.g. '{=32=33}Line text') extramatch = re.match line.text, "^\\{((?:=\\d+)+)\\}(.*)$" if extramatch line.text = extramatch[3].str if extradata for id in extramatch[2].str\gmatch "=(%d+)" id = tonumber id if extradata[id] eline = extradata[id] line.extra[eline.key] = eline.value else aegisub.log 2, "WARNING: Found extradata ID, but no extradata mapping provided: " .. "#{raw}\n" return line elseif line_type == "Style" boolean_map = {["-1"]: true, ["0"]: false} line = parser.create_style_line name: fields.Name, fontname: fields.Fontname fontsize: tonumber(fields.Fontsize), color1: fields.PrimaryColour color2: fields.SecondaryColour, color3: fields.OutlineColour color4: fields.BackColour, bold: boolean_map[fields.Bold] italic: boolean_map[fields.Italic], underline: boolean_map[fields.Underline] strikeout: boolean_map[fields.StrikeOut], scale_x: tonumber(fields.ScaleX) scale_y: tonumber(fields.ScaleY), spacing: tonumber(fields.Spacing) angle: tonumber(fields.Angle), borderstyle: tonumber(fields.BorderStyle) outline: tonumber(fields.Outline), shadow: tonumber(fields.Shadow) align: tonumber(fields.Alignment), margin_l: tonumber(fields.MarginL) margin_r: tonumber(fields.MarginR), margin_t: tonumber(fields.MarginV) encoding: tonumber(fields.Encoding) return line elseif line_type == "Data" return {class: "data", id: tonumber(fields.Id), key: fields.Key, value: parser.decode_extradata_value fields.Value} parser.line_to_raw = (line)-> if line.class == "dialogue" prefix = if line.comment then "Comment" else "Dialogue" "#{prefix}: #{line.layer},#{F.util.ms2AssTimecode line.start_time}," .. "#{F.util.ms2AssTimecode line.end_time},#{line.style},#{line.actor}," .. "#{line.margin_l},#{line.margin_r},#{line.margin_t},#{line.effect},#{line.text}" elseif line.class == "style" map = {[true]: "-1", [false]: "0"} clr = (color)-> util.ass_style_color util.extract_color color "Style: #{line.name},#{line.fontname},#{line.fontsize},#{clr line.color1}," .. "#{clr line.color2},#{clr line.color3},#{clr line.color4},#{map[line.bold]}," .. "#{map[line.italic]},#{map[line.underline]},#{map[line.strikeout]}," .. "#{line.scale_x},#{line.scale_y},#{line.spacing},#{line.angle}," .. "#{line.borderstyle},#{line.outline},#{line.shadow},#{line.align}," .. "#{line.margin_l},#{line.margin_r},#{line.margin_t},#{line.encoding}" elseif line.class == "info" "#{line.key}: #{line.value}" parser.inline_string_encode = (input)-> output = {} for i=1,#input c = input\byte i if c <= 0x1F or c >= 0x80 or c == 0x23 or c == 0x2C or c == 0x3A or c == 0x7C table.insert output, string.format "#%02X", c else table.insert output, input\sub i,i return table.concat output parser.inline_string_decode = (input)-> output = {} i = 1 while i <= #input if (input\sub i, i) != "#" or i + 1 > #input table.insert output, input\sub i, i else table.insert output, string.char tonumber (input\sub i+1, i+2), 16 i += 2 i += 1 return table.concat output parser.uuencode = (input)-> ret = {} for pos=1,#input,3 chunk = input\sub pos, pos+2 src = [c\byte! for c in chunk\gmatch "."] while #src < 3 src[#src+1] = 0 dst = {(rshift src[1], 2), (bor (lshift (band src[1], 0x3), 4), (rshift (band src[2], 0xF0), 4)), (bor (lshift (band src[2], 0xF), 2), (rshift (band src[3], 0xC0), 6)), (band src[3], 0x3F)} for i=1,math.min(#input - pos + 2, 4) table.insert ret, dst[i] + 33 return table.concat [string.char i for i in *ret] parser.uudecode = (input)-> ret = {} pos = 1 while pos <= #input chunk = input\sub pos, pos+3 src = [(string.byte c) - 33 for c in chunk\gmatch "."] if #src > 1 table.insert ret, bor (lshift src[1], 2), (rshift src[2], 4) if #src > 2 table.insert ret, bor (lshift (band src[2], 0xF), 4), (rshift src[3], 2) if #src > 3 table.insert ret, bor (lshift (band src[3], 0x3), 6), src[4] pos += #src return table.concat [string.char i for i in *ret] class ASSFile new: (file)=> @sections = {} @styles = {} @events = {} @script_info = {} @script_info_mapping = {} @aegisub_garbage = {} @aegisub_garbage_mapping = {} @extradata = {} @extradata_mapping = {} @parse file parse: (file)=> @read_sections file @parse_extradata! @script_info = @parse_section "Script Info", {"info": true} @aegisub_garbage = @parse_section "Aegisub Project Garbage", {"info": true} @styles = @parse_section "V4+ Styles", {"style": true} @events = @parse_section "Events", {"dialogue": true} for info in *@script_info @script_info_mapping[info.key] = info.value for garbage in *@aegisub_garbage @aegisub_garbage_mapping[garbage.key] = garbage.value read_sections: (file)=> current_section = nil -- read lines from file, sort into sections for row in file\lines! -- remove BOM if present, remove newlines, and trim leading spaces row = F.string.trimLeft (row\gsub "^\xEF\xBB\xBF", "")\gsub "[\r\n]*$", "" if row == "" or row\match "^;" continue section = row\match "^%[(.*)%]$" if section current_section = section @sections[current_section] = {} continue table.insert @sections[current_section], row parse_extradata: => if @sections["Aegisub Extradata"] for row in *@sections["Aegisub Extradata"] line = parser.raw_to_line row if not line or line.class != "data" aegisub.log 2, "WARNING: Malformed data line: #{row}\n" continue @extradata[line.id] = line @extradata_mapping[line.key] = @extradata_mapping[line.key] or {} @extradata_mapping[line.key][line.value] = line.id parse_section: (section, expected_classes)=> lines = {} return lines if not @sections[section] format = nil for row in *@sections[section] line = parser.raw_to_line row, @extradata, format if not line aegisub.log 2, "WARNING: Malformed line: #{line}\n" elseif line.class == "format" format = line.format elseif expected_classes[line.class] table.insert lines, line else aegisub.log 2, "WARNING: Unexpected type #{line.class} in section #{section}\n" return lines parser.parse_file = (file)-> return ASSFile file parser.generate_styles_section = (styles, callback)-> callback "[V4+ Styles]\n" callback "Format: #{STYLE_FORMAT_STRING}\n" for line in *styles callback parser.line_to_raw(line) .. "\n" parser.generate_events_section = (events, extradata_mapping, callback)-> callback "[Events]\n" callback "Format: #{EVENT_FORMAT_STRING}\n" -- find the largest extradata ID seen so far last_eid = 0 if extradata_mapping for key, v in pairs extradata_mapping for value, eid in pairs v last_eid = math.max last_eid, eid extradata_to_write = {} for line in *events -- handle extradata if line.extra and extradata_mapping lineindices = {} for key, value in pairs line.extra -- look for data in the original file's extradata cached_id = extradata_mapping[key] and extradata_mapping[key][value] if not cached_id -- if new extradata, generate new ID and cache it last_eid += 1 cached_id = last_eid extradata_mapping[key] = extradata_mapping[key] or {} extradata_mapping[key][value] = cached_id table.insert lineindices, cached_id extradata_to_write[cached_id] = {key, value} -- add indices to line text (e.g. {=32=33}Text) if #lineindices > 0 table.sort lineindices indexstring = table.concat ["=#{ind}" for ind in *lineindices] line.text = "{#{indexstring}}" .. line.text callback parser.line_to_raw(line) .. "\n" out_indices = [ind for ind, _ in pairs extradata_to_write] if #out_indices > 0 callback "\n[Aegisub Extradata]\n" table.sort out_indices for ind in *out_indices {key, value} = extradata_to_write[ind] encoded_data = parser.inline_string_encode value -- a mystical incantation passed down from subtitle_format_ass.cpp if 4*#value < 3*#encoded_data value = "u" .. parser.uuencode value else value = "e" .. encoded_data callback "Data: #{ind},#{key},#{value}\n" parser.generate_script_info_section = (lines, callback, bom=true)-> if bom callback "\xEF\xBB\xBF" callback "[Script Info]\n" for line in *lines callback parser.line_to_raw(line) .. "\n" parser.generate_aegisub_garbage_section = (lines, callback)-> callback "[Aegisub Project Garbage]\n" for line in *lines callback parser.line_to_raw(line) .. "\n" parser.generate_file = (script_info, aegisub_garbage, styles, events, extradata_mapping, callback)-> sec_added = false new_section = -> if sec_added callback "\n" sec_added = true if script_info new_section! parser.generate_script_info_section script_info, callback if aegisub_garbage new_section! parser.generate_aegisub_garbage_section aegisub_garbage, callback if styles new_section! parser.generate_styles_section styles, callback if events new_section! parser.generate_events_section events, extradata_mapping, callback parser.version = version return version\register parser