누구에게 도움이되는지 잘 모르겠지만이 문제에 대한 해결책을 찾았습니다.
먼저, 녹음기를 사용하여 MediaRecorder.AudioSource 덕분에 원하는 입력을 사용하여 사운드를 녹음하고 파일에 저장했습니다.
private void startRecording() {
recorder = new AudioRecord(MediaRecorder.AudioSource.MIC,
RECORDER_SAMPLERATE, RECORDER_CHANNELS,
RECORDER_AUDIO_ENCODING, BufferElements2Rec * BytesPerElement);
recorder.startRecording();
isRecording = true;
recordingThread = new Thread(new Runnable() {
public void run() {
writeAudioDataToFile();
}
}, "AudioRecorder Thread");
recordingThread.start();
}
그 후, 나는 .flav에서 .wav를 인코딩하기 위해 찾은 flac 인코더를 사용했습니다.
마지막으로 Google API에 직접 flac 파일을 보내고 원하는 텍스트를받을 수있는 코드를 발견했습니다!
public void getTranscription(int sampleRate) {
File myfil = new File(fileName);
if (!myfil.canRead()) {
Log.d("ParseStarter", "FATAL no read access");
System.out.println("FATAL CAN'T READ");
}
// first is a GET for the speech-api DOWNSTREAM
// then a future exec for the UPSTREAM/chunked encoding used so as not
// to limit
// the POST body sz
PAIR = MIN + (long) (Math.random() * ((MAX - MIN) + 1L));
// DOWN URL just like in curl full-duplex example plus the handler
downChannel(API_DOWN_URL + PAIR, messageHandler);
// UP chan, process the audio byteStream for interface to UrlConnection
// using 'chunked-encoding'
FileInputStream fis;
try {
fis = new FileInputStream(myfil);
FileChannel fc = fis.getChannel(); // Get the file's size and then
// map it into memory
int sz = (int) fc.size();
MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, sz);
byte[] data2 = new byte[bb.remaining()];
Log.d("ParseStarter", "mapfil " + sz + " " + bb.remaining());
bb.get(data2);
// conform to the interface from the curl examples on full-duplex
// calls
// see curl examples full-duplex for more on 'PAIR'. Just a globally
// uniq value typ=long->String.
// API KEY value is part of value in UP_URL_p2
upChannel(root + up_p1 + PAIR + up_p2 + api_key, messageHandler2,
data2);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
private void downChannel(String urlStr, final Handler messageHandler) {
final String url = urlStr;
new Thread() {
Bundle b;
public void run() {
String response = "NAO FOI";
Message msg = Message.obtain();
msg.what = 1;
// handler for DOWN channel http response stream - httpsUrlConn
// response handler should manage the connection.... ??
// assign a TIMEOUT Value that exceeds by a safe factor
// the amount of time that it will take to write the bytes
// to the UPChannel in a fashion that mimics a liveStream
// of the audio at the applicable Bitrate. BR=sampleRate * bits
// per sample
// Note that the TLS session uses
// "* SSLv3, TLS alert, Client hello (1): "
// to wake up the listener when there are additional bytes.
// The mechanics of the TLS session should be transparent. Just
// use
// httpsUrlConn and allow it enough time to do its work.
Scanner inStream = openHttpsConnection(url);
// process the stream and store it in StringBuilder
while (inStream.hasNextLine()) {
b = new Bundle();
b.putString("text", inStream.nextLine());
msg.setData(b);
messageHandler.dispatchMessage(msg);
}
}
}.start();
}
private void upChannel(String urlStr, final Handler messageHandler,
byte[] arg3) {
final String murl = urlStr;
final byte[] mdata = arg3;
Log.d("ParseStarter", "upChan " + mdata.length);
new Thread() {
public void run() {
String response = "NAO FOI";
Message msg = Message.obtain();
msg.what = 2;
Scanner inStream = openHttpsPostConnection(murl, mdata);
inStream.hasNext();
// process the stream and store it in StringBuilder
while (inStream.hasNextLine()) {
response += (inStream.nextLine());
Log.d("ParseStarter", "POST resp " + response.length());
}
Bundle b = new Bundle();
b.putString("post", response);
msg.setData(b);
// in.close(); // mind the resources
messageHandler.sendMessage(msg);
}
}.start();
}
// GET for DOWNSTREAM
private Scanner openHttpsConnection(String urlStr) {
InputStream in = null;
int resCode = -1;
Log.d("ParseStarter", "dwnURL " + urlStr);
try {
URL url = new URL(urlStr);
URLConnection urlConn = url.openConnection();
if (!(urlConn instanceof HttpsURLConnection)) {
throw new IOException("URL is not an Https URL");
}
HttpsURLConnection httpConn = (HttpsURLConnection) urlConn;
httpConn.setAllowUserInteraction(false);
// TIMEOUT is required
httpConn.setInstanceFollowRedirects(true);
httpConn.setRequestMethod("GET");
httpConn.connect();
resCode = httpConn.getResponseCode();
if (resCode == HttpsURLConnection.HTTP_OK) {
return new Scanner(httpConn.getInputStream());
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
// GET for UPSTREAM
private Scanner openHttpsPostConnection(String urlStr, byte[] data) {
InputStream in = null;
byte[] mextrad = data;
int resCode = -1;
OutputStream out = null;
// int http_status;
try {
URL url = new URL(urlStr);
URLConnection urlConn = url.openConnection();
if (!(urlConn instanceof HttpsURLConnection)) {
throw new IOException("URL is not an Https URL");
}
HttpsURLConnection httpConn = (HttpsURLConnection) urlConn;
httpConn.setAllowUserInteraction(false);
httpConn.setInstanceFollowRedirects(true);
httpConn.setRequestMethod("POST");
httpConn.setDoOutput(true);
httpConn.setChunkedStreamingMode(0);
httpConn.setRequestProperty("Content-Type", "audio/x-flac; rate="
+ rate);
httpConn.connect();
try {
// this opens a connection, then sends POST & headers.
out = httpConn.getOutputStream();
// Note : if the audio is more than 15 seconds
// dont write it to UrlConnInputStream all in one block as this
// sample does.
// Rather, segment the byteArray and on intermittently, sleeping
// thread
// supply bytes to the urlConn Stream at a rate that approaches
// the bitrate (=30K per sec. in this instance).
Log.d("ParseStarter", "IO beg on data");
out.write(mextrad); // one big block supplied instantly to the
// underlying chunker wont work for duration
// > 15 s.
Log.d("ParseStarter", "IO fin on data");
// do you need the trailer?
// NOW you can look at the status.
resCode = httpConn.getResponseCode();
Log.d("ParseStarter", "POST OK resp "
+ httpConn.getResponseMessage().getBytes().toString());
if (resCode/100 != 2) {
Log.d("ParseStarter", "POST bad io ");
}
} catch (IOException e) {
Log.d("ParseStarter", "FATAL " + e);
}
if (resCode == HttpsURLConnection.HTTP_OK) {
Log.d("ParseStarter", "OK RESP to POST return scanner ");
return new Scanner(httpConn.getInputStream());
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
// DOWN handler
Handler messageHandler = new Handler() {
public void handleMessage(Message msg) {
super.handleMessage(msg);
switch (msg.what) {
case 1: // GET DOWNSTREAM json id="@+id/comment"
String mtxt = msg.getData().getString("text");
if (mtxt.length() > 20) {
final String f_msg = mtxt;
handler.post(new Runnable() { // This thread runs in the UI
// TREATMENT FOR GOOGLE RESPONSE
@Override
public void run() {
System.out.println(f_msg);
String message = "";
final ChatMessage chatMessage = new ChatMessage(user1, user2,
message, "" + random.nextInt(1000), true);
message = f_msg;
chatMessage.setMsgID();
chatMessage.setMsgID();
chatMessage.body = message;
chatMessage.Date = CommonMethods.getCurrentDate();
chatMessage.Time = CommonMethods.getCurrentTime();
msg_edittext.setText("");
chatAdapter.add(chatMessage);
chatAdapter.notifyDataSetChanged();
}
});
}
break;
case 2:
break;
}
}
}; // doDOWNSTRM Handler end
// UPSTREAM channel. its servicing a thread and should have its own handler
Handler messageHandler2 = new Handler() {
public void handleMessage(Message msg) {
super.handleMessage(msg);
switch (msg.what) {
case 1: // GET DOWNSTREAM json
Log.d("ParseStarter", msg.getData().getString("post"));
break;
case 2:
Log.d("ParseStarter", msg.getData().getString("post"));
break;
}
}
}; // UPstream handler end
나는 구글 API를 작품에 대한 연결되지만 파일 인코더가 오래된 것 같다
this 프로젝트에서 코드의이 부분을 얻었다.
무엇으로 변경 하시겠습니까? 사용 가능한 경우 블루투스를 사용합니다 ... 녹음 된 음성에 음성 인식을 실행하려는 경우 Google에서 API를 사용할 수 있으며 기꺼이 사용료를 부과합니다.) – 323go
사실 나는 이것을 안드로이드 전화 통화 중에받은 음성 인 VOICE_DOWNLINK는 스피커에서 전송 될 때 음성 만 받아들이는 솔루션을 찾았지만 스피커의 음성을 텍스트로 변환한다는 의미였습니다. 나는 그걸 원치 않는다. – unMaxEnRad