#!/usr/bin/perl
#
# Converts an IRC log to formatted minutes in HTML.
#
# See scribe2doc.html for the manual.
# This is a rewrite of David Booth's scribe.perl
#
# TODO: option --inputFormat to select the format, rather than try
# each parser in turn.
#
# TODO: Allow (and ignore) the unused options of old scribe.perl?
#
# TODO: Add a command ('oops'? 'undo'? 'ignore'? u///g?) to remove an
# incorrect s///g, because s|s/.../.../g|| doesn't remove it.
#
# TODO: Warn about unrecognized or impossible dates after "Date: ..."
#
# TODO: A streaming mode (using --inputFormat) that formats each line
# as soon as it is read? (s/// and i//// will not work. ScribeNick is
# not retroactive. Broken lines, as in Mirc logs, are not recombined.)
#
# TODO: Make "next meeting" accept a date ("7 Aug") or a period ("in 2
# weeks") and infer a URL?
#
# TODO: If trackbot assigns a number ("ISSUE-3") to an issue, use that
# number instead of the generic "Issue". Also use it in the
# #IssueSummary.
#
# TODO: An option to omit the special handling of W3C's bots
# (currently zakim, rrsagent, agendabot and trackbot).
#
# TODO: Make commands such as scribeoptions:-implicit and
# scribeoptions:-allowspace apply only until they are overridden by
# another?
#
# TODO: RRSAgent has commands to edit or drop actions (because it
# doesn't understand s///). Should we support those?
#
# TODO: An option to add rel=nofollow to links? (In case RRSAgent is
# used to create Google karma for sites.)
#
# TODO: A command to skip several lines or end the minutes before the
# end of the input. ("StopMinutesHere", "ResumeMinutesHere"?)
#
# TODO: Should s/// commands ignore lines by the W3C bots?
#
# TODO: Ivan's minutes generator distinguishes participants (present+)
# from guests (guest+). Should scribe.perl, too?
#
# TODO: Syntax highlighting of verbatim text if there is a language
# indicated after the backquotes (as in GitHub's markdown)? (```java
# ...```)
#
# TODO: Also allow three tildes (~~~) instead of three backquotes, as
# in Markdown?
#
# TODO: A way to include (phrase-level) HTML directly?
#
# TODO: When the minutes don't start with "topic:", the first
# ACTION: %3\$s <%1\$s> %3\$s <%1\$s> %3\$s %2\$s %3\$s <%1\$s> %3\$s <%1\$s> <%1\$s> %3\$s RESOLUTION: %3\$s %1\$s: %3\$s ISSUE: %3\$s Slideset: %3\$s Repository: %3\$s Repository- %3\$s
between continuation lines
my $final = 0; # If 1, don't include "DRAFT" warning in minutes
my $scribenick; # Nick of the current scribe in lowercase
my $dash_topics = 0; # If 1, "--" means the next line is a topic
my $use_zakim = 1; # If 1, treat conversations with Zakim specially
my $scribeonly = 0; # If 1, omit IRC comments by others
my $emphasis = 0; # If 1, _xxx_, *xxx* and /xxx/ highlight things
my $old_style = 0; # If 1, use the old (pre-2017) style sheets
my $url_display = 'break'; # How to display in-your-face URLs
my $logo; # undef = W3C logo; string = HTML fragment
my $collapse_limit = 30; # Longer participant lists are collapsed
my $stylesheet; # URL of style sheet, undef = use defaults
my $mathjax = # undef = no math; string is MathJax URL
'https://www.w3.org/scripts/MathJax/3/es5/mml-chtml.js';
my $islide = # String is i-slide library URL
'https://w3c.github.io/i-slide/i-slide-2.js?selector=a.islide';
my $github = 1; # If 0, don't make links for GitHub issues
my $ghurlbot = 1; # If 0, hide conversations with GHURLbot
# Global variables:
my $has_math = 0; # Set to 1 by to_mathml()
my @diagnostics; # Collected warnings and other info
my $recordingstart; # Time of recording start secs since midnight)
my $recordingend; # Time of recording end (secs since midnight)
my @repositories = (); # List of repo URLs for expanding issue refs
# Each parser takes a reference to an array of text lines (without
# newlines) and a reference to an array of records. It returns 0
# (failed to parse) or 1 (success) and it appends successfully parsed
# lines to the array of records, with {type} set to 'i' and {speaker}
# and {text} set to the text and the nick of the person who typed that
# text. If a line includes a time stamp, the parser converts it to
# seconds since midnight and puts it in {time}.
# It should not try to parse the text futher for actions, resolutions,
# etc.
#
# IRC messages ("X joined channel Y"), private messages,
# and off-the-record text ("/me waves") are omitted.
#
# The parsers are tried in turn until one succeeds, so their order is
# important. E.g., the Plain_Text_Format should probably be towards
# the end.
my @parsers = (\&RRSAgent_text_format, \&Bip_Format, \&Mirc_Text_Format,
\&Yahoo_IM_Format, \&Bert_IRSSI_Format, \&Irssi_Format,
\&Qwebirc_paste_format, \&IRCCloud_format,
\&Quassel_paste_format, \&Plain_Text_Format);
# RRSAgent_text_format -- parse an IRC log as generated by RRSAgent
sub RRSAgent_text_format($$$$)
{
my ($lines_ref, $records_ref, $nlines_ref, $err_ref) = @_;
$$nlines_ref = 0;
foreach (@$lines_ref) {
$$nlines_ref++;
$$err_ref = $_;
if (/^(?:\d\d:\d\d:\d\d )?<([^ >]+)> \1 has (?:joined|left|changed the topic to:) /) {
# Ignore lines like "';
} elsif ($anchortext ne '') {
$s .= esc($anchortext, $emphasis);
} else {
$s .= break_url($url); # Otherwise the URL itself is the anchor text
}
return "$s";
} else {
return $anchortext ne '' ? esc($anchortext) : break_url($url);
}
}
# esc -- escape HTML delimiters (<>&"), optionally handle emphasis & Ralph links
sub esc($;$$$$);
sub esc($;$$$$)
{
my ($s, $emph, $link, $break_urls, $github) = @_;
my ($replacement, $pre, $url, $post, $type, $r);
if ($link) {
# Wrap Ralph-links and bare URLs in .
# 1a) A double-quoted Ralph link: ... -> URL "ANCHOR" ...
# 1b) A single-quoted Ralph link: ... -> URL 'ANCHOR' ...
# 1a) An unquoted Ralph link: ... -> URL ANCHOR
# 2) A Xueyuan link: ANCHOR -> URL
# 3) An Ivan link: ... -> ANCHOR URL ...
# 4a) A double-quoted inverted Xueyuan link: ... URL -> "ANCHOR" ...
# 4b) A single-quoted inverted Xueyuan link: ... URL -> 'ANCHOR' ...
# 4c) An unquoted inverted Xueyuan link: ... URL -> ANCHOR
# 5) A markdown link: ... [ANCHOR](URL)
# 6) A bare URL: ... URL ...
# With --> instead of ->, the link is embedded as an image (
).
# If $link < 0, omit the tag and just insert the text or image.
# Loop until we found all URLs.
$replacement = '';
while (($pre, $url, $post) = $s =~ /^(.*?)($urlpat)(.*)$/i) {
# Look for "->" or "-->" before or after the URL.
if ($pre =~ /(--?>) *$/p) { # Ralph, Xueyuan or missing anchor text
$type = $1;
$pre = $`;
if ($post =~ /^ *"([^"\t]*)"/p || $post =~ /^ *'([^'\t]*)'/p ||
$post =~ /^ *([^'" \t][^\t]*[^ \t]) */p ||
$post =~ /^ *([^'" \t]) */p) { # Ralph link
$replacement .= esc($pre, $emph, 0, 0, $github)
. mklink($link, $type, $url, $1);
$s = $';
} elsif ($pre =~ / *([^ \t][^\t]*[^ \t]|[^ \t]) *$/p) { # Xueyuan link
$replacement .= esc($`, $emph, 0, 0, $github)
. mklink($link, $type, $url,$1);
$s = $post;
} else { # Missing anchor text
$replacement .= esc($pre, $emph, 0, 0, $github)
. mklink($link, $type, $url, '');
$s = $post;
}
} elsif ($pre =~ /(--?>) *(.+?) *$/p) { # Ivan link
$replacement .= esc($`, $emph, 0, 0, $github)
. mklink($link, $1, $url, $2);
$s = $post;
} elsif ($post =~ /^ *(--?>) *"([^"\t]*)"/p ||
$post =~ /^ *(--?>) *'([^'\t]*)'/p ||
$post =~ /^ *(--?>) *([^ \t][^\t]*[^ \t]) */p ||
$post =~ /^ *(--?>) *([^ \t]) */p ||
$post =~ /^ *(--?>) *()/p) { # Inverted Xueyuan link
$replacement .= esc($pre, $emph, 0, 0, $github)
. mklink($link, $1, $url, $2);
$s = $';
} elsif ($post =~ /^\)/ && $pre =~ /!\[([^\]]+)\]\($/p) { # Markdown image
$r = $1;
$replacement .= esc($`) . mklink($link, "-->", $url, $r);
$s = $post =~ s/^\)//r;
} elsif ($post =~ /^\)/ && $pre =~ /\[([^\]]+)\]\($/p) { # Markdown link
$r = $1;
$replacement .= esc($`, $emph) . mklink($link, "->", $url, $r);
$s = $post =~ s/^\)//r;
} else { # Bare URL.
$replacement .= esc($pre, $emph, 0, 0, $github)
. mklink($link, '->', $url, '');
$s = $post;
}
}
$s = $replacement . esc($s, $emph, 0, 0, $github);
} elsif ($break_urls) { # Shorten or break URLs
$s = esc($s, $emph);
$s =~ s/($urlpat)/break_url($1)/gie;
} elsif ($github) {
$replacement = '';
while ($s =~ /(?:^|\W)\K((?:[a-z0-9._-]+\/)?[a-z0-9._-]+)?#([0-9]+)(?=\W|$)/i) {
$s = $';
my ($repo, $issue) = ($1 // '', $2);
$replacement .= esc($`, $emph);
$replacement .= ($r = repository_to_url($repo))
? '$repo#$issue"
: "$repo#$issue";
}
$s = $replacement . esc($s, $emph);
} else {
$s =~ s/&/&/g;
$s =~ s/</g;
$s =~ s/>/>/g;
$s =~ s/"/"/g;
if ($emph) {
$s =~ s/:-\)/☺/g;
$s =~ s/;-\)/😉\x{FE0E}/g;
$s =~ s/:-\(/☹/g;
$s =~ s{:-/}{😕\x{FE0E}}g;
$s =~ s/,-\)/😜\x{FE0E}/g;
$s =~ s{\\o/}{🙌\x{FE0E}}g;
$s =~ s/(?:^|[^-])\K-->/⟶/g;
$s =~ s/(?:^|[^-])\K->/→/g;
$s =~ s/(?:^|[^=])\K==>/⟹/g;
$s =~ s/(?:^|[^=])\K=>/⇒/g;
$s =~ s/<--(?!-)/⟵/g;
$s =~ s/<-(?!-)/←/g;
$s =~ s/<==(?!=)/⟸/g;
$s =~ s/<=(?!=)/⇐/g;
$s = to_emph($s); # Italics, bold, underline, monospace, math
}
}
return $s;
}
# is_cur_scribe -- true if $nick is in %$curscribes_ref, ignores trailing "_"
sub is_cur_scribe($$)
{
my ($nick, $curscribes_ref) = @_;
return $$curscribes_ref{fc($nick =~ s/_+$//r)} || $$curscribes_ref{'*'};
}
# add_scribes -- add scribes to the scribe list and the current scribes
sub add_scribes($$$$)
{
my ($names, $curscribes_ref, $scribes_ref, $scribenames_ref) = @_;
# We may assume $names matches zero or more comma-separated $scribepat
foreach (split(/ *, */, $names)) { # Split at commas
my ($nick, $real) = /^$scribepat$/; # Split into nick and real name
my $n = fc($nick =~ s/_+$//r); # Case-insensitive, without trailing _
$$curscribes_ref{$n} = 1; # Add nick as current scribe
push @$scribes_ref, $n; # Add nick to overall scribe list
# Add a new real name, or use the nick as real name if there was none.
if ($real) {$$scribenames_ref{$n} = $real;}
elsif (!$$scribenames_ref{$n}) {$$scribenames_ref{$n} = $nick;}
}
}
# delete_scribes -- remove from current scribe list
sub delete_scribes($$)
{
my ($names, $curscribes_ref) = @_;
# We may assume $names matches zero or more comma-separated $scribepat
foreach (split(/ *, */, $names)) { # Split at commas
my ($nick, $real) = /^$scribepat$/; # Split into name and real name
my $n = fc($nick =~ s/_+$//r); # Case-insensitive, without trailing _
delete $$curscribes_ref{$n}; # Remove from curscribes
}
}
# link_to_recording -- return an HTML link to the recording at $time, or ""
sub link_to_recording($$)
{
my ($url, $time) = @_;
my ($offset, $endoffset);
return '' if !defined $url || !defined $recordingstart || !defined $time;
$offset = $time - $recordingstart;
# If they are more than 8 hours apart, we assume we're comparing
# across midnight and add 24 hours.
$offset += 24 * 3600 if $offset < -8 * 3600;
return '' if $offset < 0; # We're before the start of the recording
# Check if we are already after the end of the recording.
if (defined $recordingend) {
$endoffset = $recordingend - $time;
$endoffset += 24 * 3600 if $endoffset < -8 * 3600;
return '' if $endoffset <= 0;
}
return sprintf " 🎞\x{FE0E}", esc($url), $offset;
}
# add_repositories -- expand repository names to full URLs and remember them
sub add_repositories($)
{
my $repos = shift;
my $r;
# $repos is a comma- or space-separated list of possibly abbreviated
# repository names. Expand them to full URLs and prefix the
# resulting list of URLs to the global @repositories. E.g.,
# "foo/bar, other/bar, baz" is expanded to
# "https://github.com/foo/bar, https://github.com/other/bar,
# https://github.com/other/baz".
#
foreach (split /[ ,]+/, $repos) {
if (($r = repository_to_url($_))) {
unshift @repositories, $r;
} else {
push @diagnostics, "Could not interpret as a repository: $_";
}
}
}
# remove_repositories -- remove one or more repositories from the list
sub remove_repositories($)
{
my ($repos) = @_;
my $r;
foreach (split /[ ,]+/, $repos) {
if (($r = repository_to_url($_))) {
@repositories = grep $_ ne $r, @repositories;
} else {
push @diagnostics, "Could not interpret as a repository: $_";
}
}
}
# Main body
my $revision = '$Revision: 229 $'
=~ s/\$Revision: //r
=~ s/ \$//r;
my $versiondate = '$Date: Thu Jul 25 08:38:54 2024 UTC $'
=~ s/\$Date: //r
=~ s/ \$//r;
my @scribes; # List of scribes
my %scribenames; # Map scribe nicknames to real names
my @records; # Array of parsed lines
my $date; # Date of the meeting
my $meeting = "(MEETING TITLE)"; # Name of the meeting (HTML-escaped)
my $prev_meeting = ''; # HTML-formatted link to previous meeting
my $next_meeting = ''; # HTML-formatted link to next meeting
my %present; # List of participants
my %regrets; # List of regrets
my $minutes_url; # URL of the minutes according to RRSAgent
my $logging_url; # URL of the log according to RRSAgent
my $agenda = ''; # HTML-formatted link to an agenda
my %chairs; # List of meeting chairs
my %lastspeaker; # Current speaker (separate for each scribe)
my $speakerid = 's00'; # Generates unique ID for each speaker
my $has_slides = 0; # Set to 1 if there is at least one slideset
my $lastslideset; # URL of the slideset being presented
my $recording; # URL of the recording of the meeting
my $recording_link; # HTML-formatted link to the recording
my $topicid = 't00'; # Generates unique ID for each topic
my $actionid = 'a00'; # Generates unique ID for each action
my $resolutionid = 'r00'; # Generates unique ID for each resolution
my $issueid = 'i00'; # Generates unique ID for each issue
my $lineid = 'x000'; # Generates unique ID for each line
my %speakers; # Unique ID for each speaker
my %namedanchors; # Set of already used IDs for NamedAnchorsHere
my %curscribes; # Indexes are the current scribenicks
my %verbatim; # End of preformatted mode for nick: ``` or ]]
my $agenda_icon = '
';
my $irclog_icon = '
';
my $previous_icon = '
';
my $next_icon = '
';
my $w3clogo = '
';
my %bots = (fc('RRSAgent') => 1, # Nicks that probably aren't scribe
fc('trackbot') => 1,
fc('ghurlbot') => 1,
fc('gb') => 1,
fc('github-bot') => 1,
fc('agendabot') => 1,
fc('Zakim') => 1);
my %options = ("team" => sub {$styleset = 'team'},
"member" => sub {$styleset = 'member'},
"fancy" => sub {$styleset = 'fancy'},
"embedDiagnostics!" => \$embed_diagnostics,
"implicitContinuations!" => \$implicitcont,
"allowSpaceContinuation!" => \$spacecont,
"keepLines!" => \$keeplines,
"urlDisplay=s" => sub {
if ($_[1] =~ /^(?:break|shorten|full$)/i) {$url_display=$_[1]}
else {die "--urlDisplay must be break, shorten or full\n"}},
"final!" => \$final,
"draft!" => sub {$final = ! $_[1]},
"scribenick=s" => \$scribenick,
"dashTopics!" => \$dash_topics,
"useZakimTopics!" => \$use_zakim,
"scribeOnly!" => \$scribeonly,
"emphasis!" => \$emphasis,
"mathjax=s" => \$mathjax,
"islide=s" => \$islide,
"oldStyle!" => \$old_style,
"stylesheet:s" => \$stylesheet,
"logo:s" => \$logo,
"nologo" => sub {$logo = ''},
"collapseLimit:i" => \$collapse_limit,
"githubIssues!" => \$github,
"ghurlbot!" => \$ghurlbot,
"minutes=s" => \$minutes_url);
my @month = ('', 'January', 'February', 'March', 'April', 'May', 'June', 'July',
'August', 'September', 'October', 'November', 'December');
# Automatically encode output to stdout and stderr as UTF-8. We do not
# automatically decode stdin as UTF-8, because the program might
# occasionally be used on old files that are in Latin-1.
# guess_encoding() below detects that case.
#
binmode(STDOUT, ':utf8');
binmode(STDERR, ':utf8');
GetOptionsFromString($ENV{"SCRIBEOPTIONS"}, %options) if $ENV{"SCRIBEOPTIONS"};
GetOptions(%options) or pod2usage(2);
# Step 1: Read all lines into a temporary array; replace tabs by
# spaces and remove carriage returns and newlines; then try each
# parser in turn to parse them into records, until one succeeds.
#
do {
local $/;
my $input = <>;
# Try to guess the encoding: ASCII, UTF-8/16/32 or Latin-1.
my $decoder = guess_encoding($input, 'latin-1');
# Decode the input. If not known or ambiguous, try UTF-8.
$input = ref($decoder) ? $decoder->decode($input) : decode('UTF-8', $input);
# Split into lines, remove newlines, replace tabs by spaces.
my @input = map tr/\t/ /r, split(/\r?\n/, $input);
$input[0] =~ s/^\x{FEFF}// if scalar @input; # Remove the BOM, if any
my ($nlines, $errline, $n, $e) = (0, '', 0, '');
do {
last if &$_(\@input, \@records, \$n, \$e);
($nlines, $errline) = ($n, $e) if $n > $nlines;
@records = ();
} foreach (@parsers);
push(@diagnostics, 'Input is empty.') if !@records && !$nlines;
push(@diagnostics, "Unrecognized input at line $nlines: $errline")
if !@records && $nlines;
};
# Step 2: Process s/old/new/ and i/where/what/ commands.
#
# First mark all s/// and i/// lines as 'c', so that they don't get
# changed by other s/// lines. Then loop over all lines again and
# apply the substitutions and insertions. Successful s/// and i///
# become of type 'o' (omit).
#
# If people try to use s/// to replace URLs and they copy-paste the
# URLs from certain old minutes, there might be zero-width non-joiner
# characters in the URLs. Remove them before matching.
#
foreach (@records) {
$_->{type} = 'c' if
$_->{text} =~ /^ *(s|i)(\/|\|)(.*?)\2(.*?)(?:\2([gG])? *)?$/;
}
for (my $i = 0; $i < @records; $i++) {
if ($records[$i]->{type} eq 'c' &&
$records[$i]->{text} =~ /^ *(s|i)(\/|\|)(.*?)\2(.*?)(?:\2([gG])? *)?$/) {
my ($cmd, $delim, $old, $new, $global) = ($1, $2, $3, $4, $5);
my $old2 = $old =~ s/\x{200C}//gr; # Version without any U+200C
push(@diagnostics, "Warning: ‘$records[$i]->{text}’ interpreted as replacing ‘$old’ by ‘$new’")
if $cmd eq 's' && $new =~ /\Q$delim\E/;
push(@diagnostics, "Warning: ‘$records[$i]->{text}’ interpreted as inserting ‘$new’ before ‘$old’")
if $cmd eq 'i' && $new =~ /\Q$delim\E/;
if ($cmd eq 'i') { # i/where/what/
my $j = $i - 1;
$j-- until $j < 0 || ($records[$j]->{type} eq 'i' &&
($records[$j]->{text} =~ /\Q$old\E/ ||
$records[$j]->{text} =~ /\Q$old2\E/));
if ($j >= 0) {
splice(@records, $j, 0,
{type=>'i',speaker=>$records[$i]->{speaker},text=>$new});
$i++; # All records shifted by the splice
$records[$i]->{type} = 'o';
push(@diagnostics, 'Succeeded: ' . $records[$i]->{text});
} else {
push(@diagnostics, 'Failed: ' . $records[$i]->{text});
}
} elsif (! defined $global) { # s/old/new/
my $j = $i - 1;
$j-- until $j < 0 || ($records[$j]->{type} eq 'i' &&
($records[$j]->{text} =~ s/\Q$old\E/$new/ ||
$records[$j]->{text} =~ s/\Q$old2\E/$new/));
push(@diagnostics,
($j >= 0 ? 'Succeeded: ' : 'Failed: ') . $records[$i]->{text});
$records[$i]->{type} = 'o' if $j >= 0; # Omit successful command
} else { # s/old/new/g or .../G
my $n = 0;
for (0 .. ($global eq 'g' ? $i-1 : @records-1)) {
$n++ if $records[$_]->{type} eq 'i' &&
($records[$_]->{text} =~ s/\Q$old\E/$new/ ||
$records[$_]->{text} =~ s/\Q$old2\E/$new/);
}
push(@diagnostics,
($n ? "Succeeded $n times: " : "Failed: ") . $records[$i]->{text});
$records[$i]->{type} = 'o' if $n; # Omit successful command
}
}
}
# Step 3: Search for scribeOptions, as they may affect the whole log.
#
foreach my $p (@records) {
if ($p->{text} =~ /^ *scribeoptions *: *(.*?) *$/i) {
Getopt::Long::Configure("pass_through");
my ($ret, $args) = GetOptionsFromString($1, %options);
push(@diagnostics, 'Unknown option in scribeoptions: ' . join(' ', @$args))
if scalar @$args;
$p->{type} = 'o'; # Omit line from output
}
}
# Step 4: Find the initial scribe(s).
#
# The first scribe/scribenick command is also assumed to apply to the
# lines that come before it, so search for that first command (unless
# --scribenick was given on the command line). If no command is found,
# assume the person who typed most was the scribe. And if nobody typed
# anything, set the scribe to '*'.
#
# The hash %count is also used further down to print the list of
# people who were active on IRC in the diagnostics.
#
my %count;
foreach (@records) {
$count{$_->{speaker}}++ if $_->{type} eq 'i' && !$bots{fc($_->{speaker})};
}
while (!defined $scribenick && (my ($i,$p) = each @records)) {
if ($p->{text} =~ /^ *scribe(?:nick)? *\+[::]? *$/i) {
$scribenick = $p->{speaker};
} elsif ($p->{text} =~ /^ *scribe(?:nick)? *(?:[::]|\+[::]?) *($scribepat(?:, *$scribepat)*)$/i) {
$scribenick = $1;
}
}
if (!defined $scribenick) {
$scribenick = (sort {$count{$b} <=> $count{$a}} sort keys %count)[0];
# If still undef, it means there are no lines at all...
$scribenick = '*' if !defined $scribenick;
push(@diagnostics, "No scribenick or scribe found. Guessed: $scribenick");
}
add_scribes($scribenick, \%curscribes, \@scribes, \%scribenames);
# Step 5: Interpret each record, collect topics, actions, etc.
#
# Interpret each line. %curscribes is the current set of scribes in lowercase.
# $lastspeaker is the current speaker, for use in continuation lines.
# $lastspeaker is set to foo whenever the scribe writes "foo: ...".
#
for (my $i = 0; $i < @records; $i++) {
my $is_scribe = is_cur_scribe($records[$i]->{speaker}, \%curscribes);
$_ = $records[$i]->{text};
if ($records[$i]->{type} eq 'o') {
# This record was already processed
} elsif (/^ *$/) {
$records[$i]->{type} = 'o'; # Omit empty line
} elsif (/^ *(```|\[\[) *$/ && # Start preformatted text
!exists $verbatim{$records[$i]->{speaker}}) {
$verbatim{$records[$i]->{speaker}} = $1 eq "```" ? "```" : "]]";
if ($is_scribe) {
$records[$i]->{text} = ""; # Next lines will be appended
$records[$i]->{type} = 'D'; # Preformatted text by scribe
} else {
$records[$i]->{type} = 'o'; # Omit this record
}
} elsif (/ *(```|\]\]) *$/ && # End of preformatted text
($verbatim{$records[$i]->{speaker}} // "") eq $1) {
$records[$i]->{type} = 'o'; # Omit this record
delete $verbatim{$records[$i]->{speaker}}; # Remove verbatim mode
} elsif (exists $verbatim{$records[$i]->{speaker}}) { # Preformatted text
if ($is_scribe) {
# Scribe's verbatim text is collected into a single record
my $j = $i - 1;
$j-- while $records[$j]->{type} eq 'o' ||
$records[$j]->{speaker} ne $records[$i]->{speaker};
$records[$j]->{text} .= $records[$i]->{text} . "\n"; # Append to 1st line
$records[$i]->{type} = 'o'; # Omit this record
} else {
$records[$i]->{type} = 'I'; # Mark as preformatted line
}
} elsif (/^ *present *[::=] *(.*?) *$/i) {
if ($records[$i]->{speaker} eq 'Zakim' && !$use_zakim) {} # Ignore Zakim?
elsif ($1 eq '(no one)') {%present = ()}
else {%present = map {fc($_) => $_} split(/ *, */, $1)}
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *present *\+[::]? *$/i) {
$present{fc $records[$i]->{speaker}} = $records[$i]->{speaker};
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *present *\+[::]? *(.*?) *$/i) {
$present{fc $_} = $_ foreach split(/ *, */, $1);
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *present *-[::]? *(.*?) *$/i) {
delete $present{fc $_} foreach split(/ *, */, $1);
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *regrets? *[::] *(.*?) *$/i) {
%regrets = map { fc($_) => $_ } split(/ *, */, $1);
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *regrets? *\+[::]? *$/i) {
$regrets{fc $records[$i]->{speaker}} = $records[$i]->{speaker};
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *regrets? *\+[::]? *(.*?) *$/i) {
$regrets{fc $_} = $_ foreach split(/ *, */, $1);
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *regrets? *-[::]? *(.*?) *$/i) {
delete $regrets{fc $_} foreach split(/ *, */, $1);
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *repo(?:s|sitory|sitories)? *(?:[::]|\+[::]?) *(.*?) *$/i ||
/^ *(?:ghurlbot|gb) *, *(?:discuss(?:ing)?|use|using|take +up|taking +up|this +(?:will +be|is)) +(.*?) *$/i) {
$records[$i]->{type} = 'repo'; # Mark as repository line
$records[$i]->{text} = $1;
} elsif (/^ *repo(?:s|sitory|sitories)? *-[::]? *([^ ].*)$/i ||
/^ *(?:ghurlbot|gb) *, *(?:forget|drop|remove|don't +use|do +not +use) +([^ ].*)$/i) {
$records[$i]->{type} = 'drop'; # Mark as drop-repository line
$records[$i]->{text} = $1;
} elsif (/^ *slides(?:et)? *[::] *(.*?($urlpat).*)$/i) {
$records[$i]->{type} = 'slideset'; # Mark as slideset line
$records[$i]->{text} = $1;
$lastslideset = $2;
$has_slides = 1;
} elsif (/^ *slides(?:et)? *[::]/i) { # Forget the slideset
$records[$i]->{type} = 'd' if $is_scribe;
$lastslideset = undef;
} elsif (/^ *\[ *slide *(\d+) *\] *$/i && $lastslideset) {
$records[$i]->{type} = 'slide'; # Mark as slide line
my $slidenumber = $1;
# Put link in {id}, with fragment ID "#n" (or #page=n for PDF URLs).
$records[$i]->{id} = $lastslideset . "#" .
($lastslideset =~ /\.pdf/ ? "page=" : "") . $slidenumber;
$records[$i]->{text} = "$slidenumber";
} elsif (/^ *recording *[::] *(.*?($urlpat).*)$/i) {
$records[$i]->{type} = 'o'; # Omit line from output
$recording_link = esc($1, $emphasis, 1, 1);
$recording = $2;
} elsif (/^ *recording *[::]/i) { # Recording but without a URL. Error?
$records[$i]->{type} = 'd' if $is_scribe;
$recording = undef;
} elsif (/^ *recording +(?:is +starting|starts)[. ]*$/i) {
$records[$i]->{type} = 'o'; # Omit line from output
$recordingstart = $records[$i]->{time} if defined $records[$i]->{time};
} elsif (/^ *recording +start(?:ed|s) +at +[::]([0-9][0-9])[. ]*$/i) {
$records[$i]->{type} = 'o'; # Omit line from output
$recordingstart = 3600*floor(($records[$i]->{time} - 60*$1)/3600) + 60*$1
if defined($records[$i]->{time});
} elsif (/^ *recording +ends[. ]*$/i) {
$records[$i]->{type} = 'o'; # Omit line from output
$recordingend = $records[$i]->{time} if defined $records[$i]->{time};
} elsif (/^ *recording +end(?:ed|s) +at +[::]([0-9][0-9])[. ]*$/i) {
$records[$i]->{type} = 'o'; # Omit line from output
$recordingend = 3600*floor(($records[$i]->{time} - 60*$1)/3600) + 60*$1
if defined $records[$i]->{time};
} elsif (/^ *topic *[::] *(.*?) *$/i) {
$records[$i]->{type} = 't'; # Mark as topic line
$records[$i]->{text} = $1;
$records[$i]->{id} = ++$topicid; # Unique ID
} elsif (/^ *sub-?topic *[::] *(.*?) *$/i) {
$records[$i]->{type} = 'T'; # Mark as subtopic line
$records[$i]->{text} = $1;
$records[$i]->{id} = ++$topicid; # Unique ID
} elsif ($dash_topics && /^ *-+ *$/) {
my $topicfound = 0;
for (my $j = $i + 1; $j < @records; $j++) {
if ($records[$j]->{speaker} eq $records[$i]->{speaker}) {
$records[$i]->{type} = 't';
$records[$i]->{text} = $records[$j]->{text} =~ s/^ *(.*?) *$/$1/r;
$records[$i]->{id} = ++$topicid;
$records[$j]->{type} = 'o';
$topicfound = 1;
last;
}
}
} elsif ($records[$i]->{speaker} eq 'RRSAgent' && / to generate ([^ #]+)/) {
$minutes_url = $1;
$records[$i]->{type} = 'o'; # Ignore this line
} elsif ($records[$i]->{speaker} eq 'RRSAgent' &&
/(?:[Ll]ogging to|recorded in|See) ([^ #]+)/){
$logging_url = $1;
$records[$i]->{type} = 'o'; # Ignore this line
} elsif (/^ *rrsagent,/i) {
$records[$i]->{type} = 'o'; # Ignore this line
} elsif ($records[$i]->{speaker} eq 'RRSAgent') {
# Ignore RRSAgent's list of actions, etc.
$records[$i]->{type} = 'o'; # Ignore this line
} elsif (/^ *action *[::] *(.*?) *$/i ||
/^ *action +(.*?(?: to |[::]).*?) *$/i) {
$records[$i]->{type} = 'a'; # Mark as action line
$records[$i]->{text} = $1;
$records[$i]->{id} = ++$actionid; # Unique ID
} elsif (/^ *resol(?:ved|ution) *[::] *(.*?) *$/i) {
$records[$i]->{type} = 'r'; # Mark as resolution line
$records[$i]->{text} = $1;
$records[$i]->{id} = ++$resolutionid;
} elsif (/^ *issue *[::] *(.*?) *$/i) {
$records[$i]->{type} = 'u'; # Mark as issue line
$records[$i]->{text} = $1;
$records[$i]->{id} = ++$issueid; # Unique ID
} elsif (/^ *agenda *[::] *($urlpat) *$/i) {
$agenda = '$agenda_icon\n";
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *agenda *[::] *(.*?) *$/i) {
push(@diagnostics, "Found 'Agenda:' not followed by a URL: '$1'.");
# $records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *meeting *[::] *(.*?) *$/i) {
$meeting = esc($1);
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *previous +meeting *[::] *($urlpat) *$/i) {
$prev_meeting = '$previous_icon\n";
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *next +meeting *[::] *($urlpat) *$/i) {
$next_meeting = '$next_icon\n";
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *(previous|next) +meeting *[::] *(.*?) *$/i) {
push(@diagnostics,"Found '$1 meeting:' not followed by a URL: '$2'.");
# $records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *chairs? *-[::]? *$/i) {
delete $chairs{fc $records[$i]->{speaker}}; # Remove speaker from chairs
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *chairs? *-[::]? *(.*?) *$/i) {
delete $chairs{fc $_} foreach (split(/ *, */, $1)); # Remove given chairs
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *chairs? *\+[::]? *$/i) {
my $s = $records[$i]->{speaker};
$chairs{fc $s} = $s; # Add to collected chairs
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *chairs? *[::] *$/i) {
push(@diagnostics, "Ignored empty command \"$records[$i]->{text}\"");
} elsif (/^ *chairs? *([::]|\+[::]?) *(.*?) *$/i) {
%chairs = () if $1 eq ':' || $1 eq ':'; # Reset the list of chairs
$chairs{fc $_} = $_ foreach (split(/ *, */, $2)); # Add all to chairs list
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *date *[::] *(\d+ \w+ \d+)/i) {
$date = $1;
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *scribe(?:nick)? *-[::]? *$/i) {
delete_scribes($records[$i]->{speaker}, \%curscribes);
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *scribe(?:nick)? *\+[::]? *$/i) {
add_scribes($records[$i]->{speaker}, \%curscribes, \@scribes,\%scribenames);
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *scribe(?:nick)? *[::] *$/i) {
push(@diagnostics, "Ignored empty command \"$records[$i]->{text}\"");
} elsif (/^ *scribe(?:nick)? *([::]|\+[::]?) *($scribepat(?:, *$scribepat)*)$/i) {
%curscribes = () if $1 eq ':' || $1 eq ':'; # Reset scribe nicks
add_scribes($2, \%curscribes, \@scribes, \%scribenames);
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *scribe *[::] *([^ ].*?) *$/i) {
# Probably an old-fashioned scribe command without a nick
push @scribes, $1; # Add to collected scribe list
$records[$i]->{type} = 'o'; # Omit line from output
} elsif (/^ *scribe(?:nick)? *-[::]? *([^ ].*)? *$/i) {
delete_scribes($1, \%curscribes);
$records[$i]->{type} = 'o'; # Omit line from output
} elsif ($use_zakim && $records[$i]->{speaker} eq 'Zakim' &&
(/^agendum \d+\. "(.*)" taken up/ || # Old Zakim
/^agendum \d+ -- (.*)/)) { # New Zakim
$records[$i]->{type} = 't'; # Mark as topic line
$records[$i]->{text} = $1;
$records[$i]->{text} =~ s/ -- taken up \[from.*//;
$records[$i]->{id} = ++$topicid; # Unique ID
} elsif ($use_zakim && $records[$i]->{speaker} eq 'Zakim' &&
/the attendees (?:were|have been) (.*?),?$/){
$present{fc $_} = $_ foreach split(/, */, ($1 =~ s/\(no one\)//r));
$records[$i]->{type} = 'o'; # Omit line from output
} elsif ($use_zakim && $records[$i]->{speaker} eq 'Zakim' &&
/^\.\.\. (.*)$/) {
my $s = $1; # See what this is a continuation of
my $j = $i - 1;
$j-- while $j >= 0 && ($records[$j]->{text} =~ /^\.\.\. / ||
$records[$j]->{speaker} ne 'Zakim');
if ($j >= 0 && $records[$j]->{text} =~ /the attendees (?:were|have been) /){
$present{fc $_} = $_ foreach grep($_ ne '', split(/, */, $s));
} elsif ($j >= 0 && $records[$j]->{text} =~ /, you wanted /) {
$records[$j]->{text} .= ' ' . $s;
} elsif ($j >= 0 && $records[$j]->{type} eq 't') { # Continued agendum
$records[$j]->{text} .= ' ' . $s;
$records[$j]->{text} =~ s/ -- taken up \[from.*//;
}
$records[$i]->{type} = 'o'; # Omit line from output
} elsif ($use_zakim && $records[$i]->{speaker} eq 'Zakim' &&
/[^ ,]+, you wanted /) {
# Leave Zakim's lines of the form: "Jim, you wanted to ..."
} elsif ($use_zakim && $records[$i]->{speaker} eq 'Zakim') {
$records[$i]->{type} = 'o'; # Ignore most conversations with Zakim
} elsif ($use_zakim &&
( /^ *zakim,/i ||
/^ *(?:chair +)?(?:ack|recognize)s? \w/i ||
/^ *agg?enda *\d* *[\+\-\=\?]/i ||
/^ *(?:delete|drop|forget|remove) +agend(?:um|a) +\d+ *$/i ||
/^ *(?:take +up +|open +|move +to +)?(?:agend(?:um|a) +|next +agend(?:um|a))/i ||
/^ *next +agend(?:um|a) *$/i ||
/^ *(?:skip|(?:really +)?close) +(?:this +agend(?:um|a)|agend(?:um|a) +\d+) *$/i ||
/^ *q(?:ueue|q)? *[-+=?]/i ||
/^ *(?:ple?a?se? +)?(?:show +)?(?:the +)?(?:verbose +|full +)?q(?:ueue)?\?? *$/i ||
/^ *(?:vqueue|vq|qv)\?/i ||
/^ *[-+=?] *q(?:ueue|q)?\b/i ||
/^ *(?:ple?a?se? +)?clear +(?:the +)?agenda *$/i ||
/^ *(?:(?:list|show) +(?:all +)?(?:the +)?questions|questions *\?) *$/i ||
/^ *(?:drop|close) +question +[0-9]+ *$/i)) {
$records[$i]->{type} = 'o'; # Ignore most conversations with Zakim
} elsif (/^ *trackbot *, *(?:(?:dis)?associate|bye|start|end|status)\b/i) {
$records[$i]->{type} = 'o'; # Ignore some commands to trackbot
} elsif ($records[$i]->{speaker} eq 'trackbot' &&
/^([a-zA-Z]+-[0-9]+) -- (.*)$/) {
$records[$i]->{type} = 'B'; # A structured response from trackbot
$records[$i]->{id} = $2;
$records[$i]->{text} = $1;
} elsif ($records[$i]->{speaker} eq 'trackbot' && /^$urlpat$/i) {
my $j = $i - 1; # A URL response from trackbot
$j-- while $j >= 0 && ($records[$j]->{type} eq 'o' ||
$records[$j]->{speaker} ne 'trackbot');
if ($j < 0) { # URL belongs to nothing?
$records[$i]->{type} = 'b';
} else { # Make previous line into a link
$records[$j]->{text} = '->'.$records[$i]->{text}.' '.$records[$j]->{text};
$records[$i]->{type} = 'o';
}
} elsif ($records[$i]->{speaker} eq 'trackbot') {
$records[$i]->{type} = 'b' # A response from trackbot
} elsif ($records[$i]->{speaker} eq 'agendabot') {
$records[$i]->{type} = 'o'; # Ignore most conversations w/ agendabot
} elsif (/^ *agendabot *,/i) {
$records[$i]->{type} = 'o'; # Ignore most conversations w/ agendabot
} elsif (! $ghurlbot && /^ *(?:ghurlbot|gb) *,/i) {
$records[$i]->{type} = 'o'; # Ignore other commands to ghurlbot
} elsif (! $ghurlbot && $records[$i]->{speaker} =~ /^ghurlbot$|^gb$/) {
$records[$i]->{type} = 'o'; # Ignore if --noghurlbot was set
} elsif ($records[$i]->{speaker} =~ /^ghurlbot$|^gb$/ &&
/^($urlpat) -> ((?:Issue |Action |Pull Request |\#)[0-9]+) ?(.*)$/i) {
$records[$i]->{type} = 'B'; # A structured response from ghurlbot
$records[$i]->{id} = $3;
$records[$i]->{text} = "->$1 $2";
} elsif ($records[$i]->{speaker} =~ /^ghurlbot$|^gb$/ &&
/^($urlpat) -> (@.*)$/i) { # A link to a GitHub user
$records[$i]->{type} = 'B';
$records[$i]->{id} = '';
$records[$i]->{text} = "->$1 $2";
} elsif ($records[$i]->{speaker} =~ /^ghurlbot$|^gb$/ &&
/^(?:Cannot|Closed|Reopened|Created)/) {
$records[$i]->{type} = 'b';
} elsif ($records[$i]->{speaker} =~ /^ghurlbot$|^gb$/ &&
/^(?:[^ ,]+, )?OK\.?/) {
$records[$i]->{type} = 'o'; # Ignore "OK" responses from ghurlbot
} elsif (/^ *namedanchorhere *[::] *(.*?) *$/i) {
my $a = $1 =~ s/ /_/gr;
if ($a =~ /^$/) {
push(@diagnostics, "Empty named anchor ignored.");
} elsif ($a =~ /^x[0-9][0-9]+$/) {
push(@diagnostics, "Named anchor \"$a\" ignored. (\"xNN\" is reserved.)");
} elsif ($a =~ /^(?:(?:Action|Resolution)Summary|links|attendees|toc|meeting)$/) {
push(@diagnostics, "Named anchor \"$a\" ignored. (The name is reserved.)");
} elsif (exists $namedanchors{$a}) {
push(@diagnostics, "Duplicate named anchor \"$a\" ignored.");
} else {
$records[$i]->{type} = 'n';
$records[$i]->{id} = esc($a);
$namedanchors{$a} = 1;
}
} elsif (/^ *\Q<$records[$i]->{speaker}\E>/i) {
# Ralph's escape for a scribe's personal remarks: "
\n%3\$s
\n", 0],
i => [$scribeonly ? '' : "%3\$s
%3\$s%6\$s
\n", 1],
t => ["%3\$s%6\$s
\n", 1],
slideset => ["
\n… "|e; # First line
$line =~ s|\t|"
\n… "|ge; # Others
$line =~ s|
$logo
\n\n" if defined $logo && $logo ne ''; $logo = '' if !defined $logo && ($styleset eq 'fancy'); $logo = "$w3clogo
\n\n" if !defined $logo; my $draft = $final ? "" : "– DRAFT –" . esc($_) . "
\n"} @diagnostics) . "Recording: $recording_link\n" . "