#!/usr/bin/env perl # # AGI script that renders speech to text using Google's Cloud Speech API. # # Copyright (C) 2011 - 2016, Lefteris Zafiris # # This program is free software, distributed under the terms of # the GNU General Public License Version 2. See the COPYING file # at the top of the source tree. # # ----- # Usage # ----- # agi(speech-recog.agi,[lang],[timeout],[intkey],[NOBEEP],[rtimeout],[speechContexts]) # Records from the current channel until 2 seconds of silence are detected # (this can be set by the user by the 'timeout' argument, -1 for no timeout) or the # interrupt key (# by default) is pressed. If NOBEEP is set, no beep sound is played # back to the user to indicate the start of the recording. If 'rtimeout' is set, # overwrite to the absolute recording timeout. 'SpeechContext' provides hints to # favor specific words and phrases in the results. Usage: [Agamemnon,Midas] # The recorded sound is send over to Google speech recognition service and the # returned text string is assigned as the value of the channel variable 'utterance'. # The scripts sets the following channel variables: # utterance : The generated text string. # confidence : A value between 0 and 1 indicating how 'confident' the recognition engine # feels about the result. Values bigger than 0.95 usually mean that the # resulted text is correct. # # User defined parameters: # Speech API key from Google: # $key # # Default language: # $language # # Default timeout: # $timeout (value in seconds of silence before recording is stopped) # # Default interrupt key: # $intkey (can be any digit from 0 to 9 or # and *, or a combination of them) # # Sample rate: # $samplerate (value in Hz. 0 for automatic detection per channel/call, 16000 for # use with wideband codecs, 8000 for traditional codecs. # # Profanity filter: # $pro_filter ('false':disable, 'true': remove profanities) # use warnings; use strict; use File::Copy qw(move); use File::Temp qw(tempfile); use LWP::UserAgent; use JSON; use Encode qw(encode); use MIME::Base64; $| = 1; # ----------------------------- # # User defined parameters: # # ----------------------------- # # Speech API key # my $key = ""; # Default language # my $language = "en-US"; # Default max silence timeout # my $timeout = 2; # Absolute Recording timeout # my $abs_timeout = -1; # Default interrupt key # my $intkey = "#"; # Input audio sample rate # # Leave blank to auto-detect # my $samplerate = ""; # Profanity filter # my $pro_filter = "false"; # Verbose debugging messages # my $debug = 0; # ----------------------------- # my %AGI; my $format; my @result; my $silence; my $results = 1; my $beep = "BEEP"; my $comp_level = -8; my $ua_timeout = 30; my $tmpdir = "/tmp"; my $url = "https://speech.googleapis.com/v1/speech"; my $phrases = ""; my @phrases = []; # Store AGI input # ($AGI{arg_1}, $AGI{arg_2}, $AGI{arg_3}, $AGI{arg_4}, $AGI{arg_5}, $AGI{arg_6}) = @ARGV; while () { chomp; last if (!length); $AGI{$1} = $2 if (/^agi_(\w+)\:\s+(.*)$/); } my $name = " -- $AGI{request}:"; # Reset variables. # warn "$name Clearing channel variables.\n" if ($debug); my %response = ( utterance => -1, confidence => -1, ); set_channel_vars(%response); # Abort if key is missing or required programs not found. # if (!$key) { print "VERBOSE \"API key is missing. Aborting.\" 3\n"; checkresponse(); die "$name API key is missing. Aborting.\n"; } my $flac = `/usr/bin/which flac`; die "$name flac is missing. Aborting.\n" if (!$flac); chomp($flac); warn "$name Found flac in: $flac\n" if ($debug); # Setting language, timeout, interrupt keys and BEEP indication # if (length($AGI{arg_1})) { $language = $AGI{arg_1} if ($AGI{arg_1} =~ /^[a-z]{2}(-[a-zA-Z]{2,6})?$/); } if (length($AGI{arg_2})) { if ($AGI{arg_2} == -1) { $silence = ""; } elsif ($AGI{arg_2} =~ /^\d+$/) { $silence = "s=$AGI{arg_2}"; } else { $silence = "s=$timeout"; } } else { $silence = "s=$timeout"; } if (length($AGI{arg_3})) { $intkey = "0123456789#*" if ($AGI{arg_3} eq "any"); $intkey = $AGI{arg_3} if ($AGI{arg_3} =~ /^[0-9*#]+$/); } if (length($AGI{arg_4})) { $beep = "" if ($AGI{arg_4} eq "NOBEEP"); } if (length($AGI{arg_5})) { $abs_timeout = $AGI{arg_5}; } if (length($AGI{arg_6})) { $phrases = $AGI{arg_6}; $phrases = substr($phrases,1,-1); @phrases = split(',',$phrases); } # Answer channel if not already answered # warn "$name Checking channel status.\n" if ($debug); print "CHANNEL STATUS\n"; @result = checkresponse(); if ($result[0] == 4) { warn "$name Answering channel.\n" if ($debug); print "ANSWER\n"; @result = checkresponse(); if ($result[0] != 0) { die "$name Failed to answer channel.\n"; } } # Setting recording file format according to sample rate. # if (!$samplerate) { ($format, $samplerate) = detect_format(); } elsif ($samplerate == 12000) { $format = "sln12"; } elsif ($samplerate == 16000) { $format = "sln16"; } elsif ($samplerate == 32000) { $format = "sln32"; } elsif ($samplerate == 44100) { $format = "sln44"; } elsif ($samplerate == 48000) { $format = "sln48"; } else { ($format, $samplerate) = ("sln", 8000); } # Initialize User agent # my $ua = LWP::UserAgent->new(ssl_opts => {verify_hostname => 1}); $ua->agent("Asterisk AGI speech recognition script"); $ua->env_proxy; $ua->timeout($ua_timeout); # Handle interrupts # $SIG{'INT'} = \&int_handler; $SIG{'HUP'} = \&int_handler; # Record file # my ($fh, $tmpname) = tempfile("stt_XXXXXX", DIR => $tmpdir, UNLINK => 1); print "RECORD FILE $tmpname $format \"$intkey\" \"$abs_timeout\" $beep \"$silence\"\n"; @result = checkresponse(); die "$name Failed to record file, aborting...\n" if ($result[0] == -1); if ($debug) { warn "$name Recording Format: $format, Rate: $samplerate Hz, ", "Language: $language, ", "$silence, Interrupt keys: $intkey\n"; } # Encode audio data to flac # my $endian = (unpack("h*", pack("s", 1)) =~ /01/) ? "big" : "little"; system($flac, $comp_level, "--totally-silent", "--channels=1", "--endian=$endian", "--sign=signed", "--bps=16", "--force-raw-format", "--sample-rate=$samplerate", "$tmpname.$format") == 0 or die "$name $flac failed: $?\n"; open($fh, "<", "$tmpname.flac") or die "Can't read file: $!"; my $audio = do { local $/; <$fh> }; close($fh); my %config = ( "encoding" => "FLAC", "sampleRateHertz" => $samplerate, "languageCode" => $language, "profanityFilter" => $pro_filter, "speechContexts" => {"phrases" => \@phrases}, ); my %audio = ( "content" => encode_base64($audio, "") ); my %json = ( "config" => \%config, "audio" => \%audio, ); # Send audio data for analysis # my $uaresponse = $ua->post( "$url:recognize?key=$key", Content_Type => "application/json", Content => encode_json(\%json), ); warn "$name The response was:\n", $uaresponse->content if ($debug); if (!$uaresponse->is_success) { print "VERBOSE \"Unable to get recognition data.\" 3\n"; checkresponse(); die "$name Unable to get recognition data.\n"; } my $jdata = decode_json($uaresponse->content); $response{utterance} = encode('utf8', $jdata->{"results"}[0]->{"alternatives"}[0]->{"transcript"}); $response{confidence} = $jdata->{"results"}[0]->{"alternatives"}[0]->{"confidence"}; set_channel_vars(%response); exit; sub set_channel_vars { my %resp = @_; foreach (keys %resp) { warn "$name Setting variable: $_ = $response{$_}\n" if ($debug); print "SET VARIABLE \"$_\" \"$response{$_}\"\n"; checkresponse(); } } sub checkresponse { my $input = ; my @values; chomp $input; if ($input =~ /^200 result=(-?\d+)\s?(.*)$/) { warn "$name Command returned: $input\n" if ($debug); @values = ("$1", "$2"); } else { $input .= if ($input =~ /^520-Invalid/); warn "$name Unexpected result: $input\n"; @values = (-1, -1); } return @values; } sub detect_format { # Detect the sound format used # my @format; print "GET FULL VARIABLE \${CHANNEL(audionativeformat)}\n"; my @reply = checkresponse(); for ($reply[1]) { if (/(silk|sln)12/) { @format = ("sln12", 12000); } elsif (/(speex|slin|silk)16|g722|siren7/) { @format = ("sln16", 16000); } elsif (/(speex|slin|celt)32|siren14/) { @format = ("sln32", 32000); } elsif (/(celt|slin)44/) { @format = ("sln44", 44100); } elsif (/(celt|slin)48/) { @format = ("sln48", 48000); } else { @format = ("sln", 8000); } } return @format; } sub int_handler { die "$name Interrupt signal received, terminating...\n"; } END { if ($tmpname) { warn "$name Cleaning temp files.\n" if ($debug); unlink glob "$tmpname.*"; } }