From ffa69513c4729fdb2f3721de5ca07d390c984e12 Mon Sep 17 00:00:00 2001 From: Aaron Gokaslan Date: Mon, 11 Jan 2016 14:41:04 -0500 Subject: [PATCH] Updated GSpeech API for better accuracy and code cleanup. --- .gitignore | 1 + .../speech/recognizer/GSpeechDuplex.java | 104 +++++++++--------- .../speech/recognizer/Recognizer.java | 7 -- .../speech/synthesiser/Synthesiser.java | 33 ++---- .../speech/translator/GoogleTranslate.java | 15 +-- 5 files changed, 67 insertions(+), 93 deletions(-) diff --git a/.gitignore b/.gitignore index ff3459f..b6d585c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ /bin .classpath .project +.settings /target diff --git a/src/main/java/com/darkprograms/speech/recognizer/GSpeechDuplex.java b/src/main/java/com/darkprograms/speech/recognizer/GSpeechDuplex.java index 8ead74a..a9dc61c 100644 --- a/src/main/java/com/darkprograms/speech/recognizer/GSpeechDuplex.java +++ b/src/main/java/com/darkprograms/speech/recognizer/GSpeechDuplex.java @@ -19,7 +19,6 @@ import javax.sound.sampled.LineUnavailableException; import javax.sound.sampled.TargetDataLine; -import com.darkprograms.speech.util.ChunkedOutputStream; import com.darkprograms.speech.util.StringUtil; //TODO Add a better logging system to GSpeechDuplex @@ -109,6 +108,8 @@ public void recognize(File flacFile, int sampleRate) throws IOException{ recognize(mapFileIn(flacFile), sampleRate); } + + /** * Send a byte[] to the URL with a specified sampleRate. * NOTE: The byte[] should contain no more than 15 seconds of audio. @@ -119,7 +120,6 @@ public void recognize(File flacFile, int sampleRate) throws IOException{ public void recognize(byte[] data, int sampleRate){ if(data.length >= MAX_SIZE){//Temporary Chunking. Does not allow for Google to gather context. - System.out.println("Chunking the audio into smaller parts..."); byte[][] dataArray = chunkAudio(data); for(byte[]array: dataArray){ recognize(array, sampleRate); @@ -162,9 +162,10 @@ public void recognize(TargetDataLine tl, AudioFormat af) throws IOException, Lin final String API_UP_URL = GOOGLE_DUPLEX_SPEECH_BASE + "up?lang=" + language + "&lm=dictation&client=chromium&pair=" + PAIR + "&key=" + API_KEY + "&continuous=true&interim=true"; //Tells Google to constantly monitor the stream; - + //Opens downChannel this.downChannel(API_DOWN_URL); + //Opens upChannel this.upChannel(API_UP_URL, tl, af); } @@ -191,15 +192,16 @@ public void run() { Scanner inStream = openHttpsConnection(url); if(inStream == null){ //ERROR HAS OCCURED + System.out.println("Error has occured"); + return; } - while(inStream.hasNextLine()){ - String response = inStream.nextLine(); + String response; + while(inStream.hasNext() && (response = inStream.nextLine()) != null){ if(response.length()>17){//Prevents blank responses from Firing GoogleResponse gr = new GoogleResponse(); parseResponse(response, gr); fireResponseEvent(gr); } - } inStream.close(); System.out.println("Finished write on down stream..."); @@ -233,7 +235,7 @@ public void run() { * @param af The AudioFormat to stream with. * @throws LineUnavailableException If cannot open or stream the TargetDataLine. */ - private void upChannel(String urlStr, TargetDataLine tl, AudioFormat af) throws LineUnavailableException{ + private void upChannel(String urlStr, TargetDataLine tl, AudioFormat af) throws IOException, LineUnavailableException{ final String murl = urlStr; final TargetDataLine mtl = tl; final AudioFormat maf = af; @@ -243,9 +245,8 @@ private void upChannel(String urlStr, TargetDataLine tl, AudioFormat af) throws } new Thread ("Upstream Thread") { public void run() { - openHttpsPostConnection(murl, mtl, maf); + openHttpsPostConnection(murl, mtl, (int)maf.getSampleRate()); } - }.start(); } @@ -258,8 +259,6 @@ public void run() { private Scanner openHttpsConnection(String urlStr) { int resCode = -1; try { - - URL url = new URL(urlStr); URLConnection urlConn = url.openConnection(); if (!(urlConn instanceof HttpsURLConnection)) { @@ -270,7 +269,6 @@ private Scanner openHttpsConnection(String urlStr) { // TIMEOUT is required httpConn.setInstanceFollowRedirects(true); httpConn.setRequestMethod("GET"); - httpConn.connect(); resCode = httpConn.getResponseCode(); if (resCode == HttpsURLConnection.HTTP_OK) { @@ -293,8 +291,7 @@ private Scanner openHttpsConnection(String urlStr) { * @param mtl The TargetDataLine you want to post data from. Note should be open * @param maf The AudioFormat of the data you want to post */ - private void openHttpsPostConnection(final String murl, - final TargetDataLine mtl, final AudioFormat maf) { + private void openHttpsPostConnection(String murl, TargetDataLine mtl, int sampleRate) { URL url; try { url = new URL(murl); @@ -302,6 +299,7 @@ private void openHttpsPostConnection(final String murl, if (!(urlConn instanceof HttpsURLConnection)) { throw new IOException ("URL is not an Https URL"); } + HttpsURLConnection httpConn = (HttpsURLConnection)urlConn; httpConn.setAllowUserInteraction(false); httpConn.setInstanceFollowRedirects(true); @@ -309,33 +307,37 @@ private void openHttpsPostConnection(final String murl, httpConn.setDoOutput(true); httpConn.setChunkedStreamingMode(0); httpConn.setRequestProperty("Transfer-Encoding", "chunked"); - httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + (int)maf.getSampleRate()); + httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + sampleRate); // also worked with ("Content-Type", "audio/amr; rate=8000"); httpConn.connect(); - + // this opens a connection, then sends POST & headers. - OutputStream out = httpConn.getOutputStream(); + final OutputStream out = httpConn.getOutputStream(); //Note : if the audio is more than 15 seconds // dont write it to UrlConnInputStream all in one block as this sample does. // Rather, segment the byteArray and on intermittently, sleeping thread // supply bytes to the urlConn Stream at a rate that approaches // the bitrate ( =30K per sec. in this instance ). System.out.println("Starting to write data to output..."); - AudioInputStream ais = new AudioInputStream(mtl); - ChunkedOutputStream os = new ChunkedOutputStream(out); - AudioSystem.write(ais, FLACFileWriter.FLAC, os); - out.write(FINAL_CHUNK); - System.out.println("IO WRITE DONE"); - out.close(); + final AudioInputStream ais = new AudioInputStream(mtl);; + AudioSystem.write(ais, FLACFileWriter.FLAC, out); + //Output Stream is automatically closed // do you need the trailer? // NOW you can look at the status. - int resCode = httpConn.getResponseCode(); + + //Diagonostic Code. + /*int resCode = httpConn.getResponseCode(); if (resCode / 100 != 2) { System.out.println("ERROR"); } - }catch(Exception ex){ + Scanner scanner = new Scanner(httpConn.getInputStream()); + while(scanner.hasNextLine()){ + System.out.println("UPSTREAM READS:" + scanner.nextLine()); + } + scanner.close();*/ + System.out.println("Upstream Closed..."); + }catch(IOException ex){ ex.printStackTrace(); - } } @@ -369,34 +371,32 @@ private Scanner openHttpsPostConnection(String urlStr, byte[][] data, int sample httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate=" + sampleRate); // also worked with ("Content-Type", "audio/amr; rate=8000"); httpConn.connect(); - - // this opens a connection, then sends POST & headers. - out = httpConn.getOutputStream(); - //Note : if the audio is more than 15 seconds - // dont write it to UrlConnInputStream all in one block as this sample does. - // Rather, segment the byteArray and on intermittently, sleeping thread - // supply bytes to the urlConn Stream at a rate that approaches - // the bitrate ( =30K per sec. in this instance ). - System.out.println("Starting to write"); - for(byte[] dataArray: mextrad){ - out.write(dataArray); // one big block supplied instantly to the underlying chunker wont work for duration > 15 s. - try { - Thread.sleep(1000);//Delays the Audio so Google thinks its a mic. - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - out.write(FINAL_CHUNK); - System.out.println("IO WRITE DONE"); - // do you need the trailer? - // NOW you can look at the status. - resCode = httpConn.getResponseCode(); - if (resCode / 100 != 2) { - System.out.println("ERROR"); + // this opens a connection, then sends POST & headers. + out = httpConn.getOutputStream(); + //Note : if the audio is more than 15 seconds + // dont write it to UrlConnInputStream all in one block as this sample does. + // Rather, segment the byteArray and on intermittently, sleeping thread + // supply bytes to the urlConn Stream at a rate that approaches + // the bitrate ( =30K per sec. in this instance ). + System.out.println("Starting to write"); + for(byte[] dataArray: mextrad){ + out.write(dataArray); // one big block supplied instantly to the underlying chunker wont work for duration > 15 s. + try { + Thread.sleep(1000);//Delays the Audio so Google thinks its a mic. + } catch (InterruptedException e) { + e.printStackTrace(); } - + } + out.write(FINAL_CHUNK); + System.out.println("IO WRITE DONE"); + // do you need the trailer? + // NOW you can look at the status. + resCode = httpConn.getResponseCode(); + if (resCode / 100 != 2) { + System.out.println("ERROR"); + } if (resCode == HttpsURLConnection.HTTP_OK) { - return new Scanner(httpConn.getInputStream()); + return new Scanner(httpConn.getInputStream(), "UTF-8"); } else{ System.out.println("HELP: " + resCode); diff --git a/src/main/java/com/darkprograms/speech/recognizer/Recognizer.java b/src/main/java/com/darkprograms/speech/recognizer/Recognizer.java index af00114..56a5707 100644 --- a/src/main/java/com/darkprograms/speech/recognizer/Recognizer.java +++ b/src/main/java/com/darkprograms/speech/recognizer/Recognizer.java @@ -7,7 +7,6 @@ import java.nio.charset.Charset; import org.json.*; -import com.darkprograms.speech.util.StringUtil; /*************************************************************** * Class that submits FLAC audio and retrieves recognized text @@ -124,12 +123,6 @@ public String toString(){ private boolean profanityFilter = true; private String language = null; private String apikey = null; - - /** - * Constructor - */ - private Recognizer() { - } /** * Constructor diff --git a/src/main/java/com/darkprograms/speech/synthesiser/Synthesiser.java b/src/main/java/com/darkprograms/speech/synthesiser/Synthesiser.java index 558979f..4b4af82 100644 --- a/src/main/java/com/darkprograms/speech/synthesiser/Synthesiser.java +++ b/src/main/java/com/darkprograms/speech/synthesiser/Synthesiser.java @@ -54,7 +54,6 @@ public class Synthesiser { /** * Constructor */ - @Deprecated public Synthesiser() { languageCode = "auto"; } @@ -96,16 +95,14 @@ public InputStream getMP3Data(String synthText) throws IOException{ String languageCode = this.languageCode;//Ensures retention of language settings if set to auto if(languageCode == null || languageCode.equals("") || languageCode.equalsIgnoreCase("auto")){ - try{ - languageCode = detectLanguage(synthText);//Detects language - if(languageCode == null){ - languageCode = "en-us";//Reverts to Default Language if it can't detect it. - //Throw an error message here eventually - } - } - catch(Exception ex){ - ex.printStackTrace(); + languageCode = detectLanguage(synthText);//Detects language + /* NOTE: Detect language relies on an entirely seperate endpoint. + * If the GoogleTranslate API stops working, do not use the auto parameter + * and switch to something else or a best guess. + */ + if(languageCode == null){ languageCode = "en-us";//Reverts to Default Language if it can't detect it. + //Throw an error message here eventually } } @@ -137,7 +134,8 @@ public InputStream getMP3Data(String synthText) throws IOException{ URLConnection urlConn = url.openConnection(); //Open connection //Adding header for user agent is required - urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) Gecko/20100101 Firefox/4.0"); + urlConn.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0) " + + "Gecko/20100101 Firefox/4.0"); return urlConn.getInputStream(); } @@ -201,7 +199,8 @@ private List parseString(String input, List fragments){ fragments.add(input.substring(0,100));//In case you sent gibberish to Google. return parseString(input.substring(100), fragments); }else{ - fragments.add(input.substring(0,lastWord));//Otherwise, adds the last word to the list for recursion. + fragments.add(input.substring(0,lastWord)); + //Otherwise, adds the last word to the list for recursion. return parseString(input.substring(lastWord), fragments); } } @@ -269,15 +268,5 @@ public InputStream call() throws IOException{ return getMP3Data(synthText); } } - - public static void main(String[] args){ - Synthesiser synth = new Synthesiser("en-US"); - try { - synth.getMP3Data("Hello, this is a test"); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } } diff --git a/src/main/java/com/darkprograms/speech/translator/GoogleTranslate.java b/src/main/java/com/darkprograms/speech/translator/GoogleTranslate.java index 7a5c4b8..59bc9da 100644 --- a/src/main/java/com/darkprograms/speech/translator/GoogleTranslate.java +++ b/src/main/java/com/darkprograms/speech/translator/GoogleTranslate.java @@ -7,8 +7,6 @@ import java.net.URLConnection; import java.net.URLEncoder; import java.nio.charset.Charset; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; @@ -217,16 +215,9 @@ private static boolean containsLettersOnly(String text){ * This function generates the b parameter for translation acting as the seed for the hashing algorithm. */ private static int generateB() { - SimpleDateFormat sdf = new SimpleDateFormat("dd/MM/yyyy", Locale.US); - - Date start; - Date now; - try { - start = sdf.parse("01/01/1970"); - now = new Date(); - } catch (ParseException e) { - return 402890; - } + Date start = new Date(0L); //Unix Epoch + Date now = new Date(); + long diff = now.getTime() - start.getTime(); long hours = diff / (60 * 60 * 1000) % 24; long days = diff / (24 * 60 * 60 * 1000);