#!/usr/bin/env perl # # AGI script that renders speech to text using Google's speech recognition engine. # # Copyright (C) 2011 - 2012, Lefteris Zafiris # # Version 0.3 # # This program is free software, distributed under the terms of # the GNU General Public License Version 2. See the COPYING file # at the top of the source tree. # # ----- # Usage # ----- # agi(speech-recog.agi,[lang]) # Records from the current channel untill the pound key (#) is pressed or the # timeout (15 seconds) is reached. The recording is send over to googles speech # recognition service and the returned text string is assigned as the value # of the channel variable 'utterance'. # The scripts sets the following channel variables: # status : Return status. 0 means success, non zero values indicate different errors. # id : Some id string that googles engine returns, not very useful(?). # utterance : The generated text string. # confidence : A value between 0 and 1 indicating how 'confident' the recognition engine # feels about the result. Values bigger than 0.95 usually mean that the # resulted text is correct. # # Parameters like default language, recording sample rate, use of sox and normalising level # can be set up by altering the following variables: # Default language: $language # Sample rate: $samplerate (value in Hz, 8000 or 16000 if used with wideband codecs) # Enable sox: $use_sox (0: disable, 1: enable) # Normalizing level: $gain (value in db, applicable only if sox is enabled) # use warnings; use strict; use File::Temp qw(tempfile); use LWP::UserAgent; $| = 1; # ----------------------------- # # User defined parameters: # # ----------------------------- # # Default language # my $language = "en-US"; # Input audio sample rate # my $samplerate = 8000; # Use sox for sound manipulation# my $use_sox = 0; # Max sound power gain in dB # my $gain = -5; # Verbose debugging messages # my $debug = 0; # ----------------------------- # my %AGI; my $ua; my $fh; my $tmpname; my $format; my @result; my $name; my $audio; my $uaresponse; my %response; my $endian; # Following two vars: 1st one receives timeout argument. 2nd one sanitizes value to phone number - Bruce my $timeout = $ARGV[1]; my $speechtype = $ARGV[2]; my $tmpdir = "/tmp"; my $filetype = "x-flac"; my $url = "http://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium"; my $sox = `/usr/bin/which sox`; my $flac = `/usr/bin/which flac`; # Check endianness # if (unpack("h*", pack("s", 1)) =~ /01/) { $endian = "big"; } else { $endian = "little"; } # Store AGI input # while () { chomp; last if (!length); $AGI{$1} = $2 if (/^agi_(\w+)\:\s+(.*)$/); } ($AGI{arg_1}) = @ARGV; $name = " -- $AGI{request}:"; # Abort if required programs not found. # die "$name sox is missing. Aborting.\n" if (!$sox && $use_sox); die "$name flac is missing. Aborting.\n" if (!$flac && !$use_sox); chomp($sox, $flac); print STDERR "$name Found sox in: $sox, flac in: $flac\n" if ($debug); # Setting recording file format according to sample rate. # if ($samplerate == 16000) { $format = "sln16"; } else { $format = "sln"; $samplerate = 8000; } # Setting language # if (length($AGI{arg_1})) { $language = $AGI{arg_1} if ($AGI{arg_1} =~ /^[a-z]{2}(-[a-zA-Z]{2,6})?$/); } # Answer channel if not already answered # print STDERR "$name Checking channel status.\n" if ($debug); print "CHANNEL STATUS\n"; @result = &checkresponse(); if ($result[0] == 4) { print STDERR "$name Answering channel.\n" if ($debug); print "ANSWER\n"; &checkresponse(); } # Reset variables. # %response = ( utterance => -1, status => -1, id => -1, confidence => -1, ); print STDERR "$name Clearing channel variables.\n" if ($debug); foreach (keys %response) { print "SET VARIABLE \"$_\" \"$response{$_}\"\n"; &checkresponse(); } # Hnadle interrupts # $SIG{'INT'} = \&int_handler; $SIG{'HUP'} = \&int_handler; # Record file # ($fh, $tmpname) = tempfile("stt_XXXXXX", DIR => $tmpdir, UNLINK => 1); print STDERR "$name Recording file.\n" if ($debug); print "RECORD FILE $tmpname $format \"#\" $timeout\n"; @result = &checkresponse(); die "$name Failed to record file, aborting...\n" if ($result[0] == -1); if ($debug) { print STDERR "$name Languge: $language, Rate: $samplerate, Format: $format\n"; print STDERR "$name Playing back recorded file.\n"; print "STREAM FILE $tmpname \"\"\n"; @result = &checkresponse(); print STDERR "$name Failed to play file\n" if ($result[0] == -1); } # Encode file to flac. # if ($use_sox) { # Covert file using sox, normalizing to -5db, cutting off low static noise (<100Hz) # and removing possible DC offset. system($sox, "-b", "16", "-t", "raw", "-r", $samplerate, "-e", "signed-integer", "--endian", $endian, "$tmpname.$format", "$tmpname.flac", "norm", $gain, "highpass", "100") == 0 or die "$name $sox failed: $?\n"; } else { # Convert sound file using flac. # system($flac, "-8", "--totally-silent", "--channels=1", "--endian=$endian", "--sign=signed", "--bps=16", "--force-raw-format", "--sample-rate=$samplerate", "$tmpname.$format") == 0 or die "$name $flac failed: $?\n"; } open($fh, "<", "$tmpname.flac") or die "Can't read file: $!"; $audio = do { local $/; <$fh> }; close($fh); $ua = LWP::UserAgent->new; $ua->agent("Mozilla/5.0 (X11; Linux) AppleWebKit/535.2 (KHTML, like Gecko)"); $ua->timeout(25); $uaresponse = $ua->post( "$url&lang=$language", Content_Type => "audio/$filetype; rate=$samplerate", Content => "$audio", ); die "$name Unable to get speech data.\n" if (!$uaresponse->is_success); if ($uaresponse->content =~ /^\{"status":(\d*),"id":"(.*)","hypotheses":\[(.*)\]\}$/) { $response{status} = "$1"; $response{id} = "$2"; print STDERR "Error reading audio file\n" if ($response{status} == 5); if ($3 =~ /^\{"utterance":"(.*)","confidence":(.*)\}/) { $response{utterance} = "$1"; $response{confidence} = "$2"; # Filtering utterance to phone number format - Bruce if ($speechtype eq "phoneNumb") { $response{utterance} =~ s/\s//g; } } } print STDERR "$name The response was:\n", $uaresponse->content if ($debug); foreach (keys %response) { print STDERR "$name Setting variable: $_ = $response{$_}\n" if ($debug); print "SET VARIABLE \"$_\" \"$response{$_}\"\n"; &checkresponse(); } exit; sub checkresponse { my $input = ; my @values; chomp $input; if ($input =~ /^200/) { $input =~ /result=(-?\d+)\s?(.*)$/; if (!length($1)) { print STDERR "$name Command failed: $input\n"; @values = ("-1"); } else { print STDERR "$name Command returned: $input\n" if ($debug); @values = ("$1", "$2"); } } else { print STDERR "$name Unexpected result: $input\n"; @values = ("-1"); } return @values; } sub int_handler { die "$name Interrupt signal received, terminating...\n"; } END { if ($tmpname) { print STDERR "$name Cleaning temp files.\n" if ($debug); unlink glob "$tmpname*"; } }