Basic for whisper support (via jarvis-server and whisper-asr)

This commit is contained in:
Mathieu B 2022-11-28 21:35:38 +01:00
parent c24d07bb26
commit f8796c4206
3 changed files with 42 additions and 10 deletions

View File

@ -45,6 +45,7 @@
<entry key="../../../../../layout/compose-model-1641396094843.xml" value="0.33" /> <entry key="../../../../../layout/compose-model-1641396094843.xml" value="0.33" />
<entry key="../../../../../layout/compose-model-1641399721181.xml" value="2.0" /> <entry key="../../../../../layout/compose-model-1641399721181.xml" value="2.0" />
<entry key="../../../../../layout/compose-model-1641463307897.xml" value="2.0" /> <entry key="../../../../../layout/compose-model-1641463307897.xml" value="2.0" />
<entry key="../../../../../layout/compose-model-1641566263645.xml" value="0.33" />
<entry key="app/src/main/res/drawable-v24/ic_launcher_foreground.xml" value="0.5307291666666667" /> <entry key="app/src/main/res/drawable-v24/ic_launcher_foreground.xml" value="0.5307291666666667" />
<entry key="app/src/main/res/drawable/ic_baseline_arrow_back_24.xml" value="0.38981481481481484" /> <entry key="app/src/main/res/drawable/ic_baseline_arrow_back_24.xml" value="0.38981481481481484" />
<entry key="app/src/main/res/drawable/ic_baseline_keyboard_24.xml" value="0.38981481481481484" /> <entry key="app/src/main/res/drawable/ic_baseline_keyboard_24.xml" value="0.38981481481481484" />

View File

@ -7,21 +7,25 @@ import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier import androidx.compose.ui.Modifier
import androidx.compose.ui.res.painterResource import androidx.compose.ui.res.painterResource
import androidx.compose.ui.res.stringResource import androidx.compose.ui.res.stringResource
import androidx.compose.ui.tooling.preview.Preview
import androidx.compose.ui.unit.DpOffset import androidx.compose.ui.unit.DpOffset
import androidx.compose.ui.unit.dp import androidx.compose.ui.unit.dp
import androidx.compose.ui.unit.sp import androidx.compose.ui.unit.sp
import androidx.navigation.NavController import androidx.navigation.NavController
import androidx.navigation.compose.rememberNavController
import ch.mathieubroillet.jarvis.android.R import ch.mathieubroillet.jarvis.android.R
import ch.mathieubroillet.jarvis.android.audio.AudioRecorder import ch.mathieubroillet.jarvis.android.audio.AudioRecorder
import ch.mathieubroillet.jarvis.android.chat.ConversationUiState import ch.mathieubroillet.jarvis.android.chat.ConversationUiState
import ch.mathieubroillet.jarvis.android.chat.Message import ch.mathieubroillet.jarvis.android.chat.Message
import ch.mathieubroillet.jarvis.android.chat.Messages import ch.mathieubroillet.jarvis.android.chat.Messages
import ch.mathieubroillet.jarvis.android.nav.Screen import ch.mathieubroillet.jarvis.android.nav.Screen
import ch.mathieubroillet.jarvis.android.ui.theme.JarvisComposeTheme
import ch.mathieubroillet.jarvis.android.ui.theme.productSansFont import ch.mathieubroillet.jarvis.android.ui.theme.productSansFont
import ch.mathieubroillet.jarvis.android.utils.DefaultBox import ch.mathieubroillet.jarvis.android.utils.DefaultBox
import ch.mathieubroillet.jarvis.android.utils.IconAlertDialogTextField import ch.mathieubroillet.jarvis.android.utils.IconAlertDialogTextField
import ch.mathieubroillet.jarvis.android.utils.contactServerWithFileAudioRecording import ch.mathieubroillet.jarvis.android.utils.contactServerWithFileAudioRecording
import com.github.squti.androidwaverecorder.RecorderState import com.github.squti.androidwaverecorder.RecorderState
import com.github.squti.androidwaverecorder.WaveRecorder
import org.json.JSONObject import org.json.JSONObject
import kotlin.concurrent.thread import kotlin.concurrent.thread
@ -150,13 +154,12 @@ fun DisplayMainPage(
val requestOutput = val requestOutput =
contactServerWithFileAudioRecording(audioRecorder.getOutputFile()) contactServerWithFileAudioRecording(audioRecorder.getOutputFile())
val json: JSONObject = JSONObject(requestOutput) val json = JSONObject(requestOutput)
val sent = JSONObject(requestOutput).getString("sent").replace("\"", "") val sent = json.getString("transcription")
.replace("[", "").replace("]", "").replace(",", " ")
sent.replaceFirstChar { sent.first().uppercase() }
uiState.addMessage(Message(false, sent)) uiState.addMessage(Message(false, sent))
Thread.sleep(1000) Thread.sleep(1000)
uiState.addMessage(Message(true, json.getString("response"))) uiState.addMessage(Message(true, json.getString("answer")))
audioRecorder.getOutputFile().delete() audioRecorder.getOutputFile().delete()
} }
} }
@ -172,14 +175,14 @@ fun DisplayMainPage(
} }
} }
/*@Preview(showBackground = true) @Preview(showBackground = true)
@Composable @Composable
fun MainPagePreview() { fun MainPagePreview() {
JarvisComposeTheme { JarvisComposeTheme {
DisplayMainPage( DisplayMainPage(
rememberNavController(), ConversationUiState( rememberNavController(), ConversationUiState(
listOf(Message(true, stringResource(id = R.string.demo_message_1))) listOf(Message(true, stringResource(id = R.string.demo_message_1)))
), null ), audioRecorder = AudioRecorder("", WaveRecorder(""))
) )
} }
}*/ }

View File

@ -1,9 +1,11 @@
package ch.mathieubroillet.jarvis.android.utils package ch.mathieubroillet.jarvis.android.utils
import okhttp3.Headers
import okhttp3.MediaType.Companion.toMediaType import okhttp3.MediaType.Companion.toMediaType
import okhttp3.OkHttpClient import okhttp3.OkHttpClient
import okhttp3.Request import okhttp3.Request
import okhttp3.RequestBody.Companion.asRequestBody import okhttp3.RequestBody.Companion.asRequestBody
import okhttp3.internal.http1.HeadersReader
import java.io.File import java.io.File
import java.io.IOException import java.io.IOException
@ -11,12 +13,38 @@ fun contactServerWithFileAudioRecording(file: File): String {
val client = OkHttpClient() val client = OkHttpClient()
val request = Request.Builder() val request = Request.Builder()
.url("http://192.168.1.130:5000/process_audio_request_file") .url("https://whisper.broillet.ch/process_audio_request_file")
.post(file.asRequestBody("audio/mpeg; charset=utf-8".toMediaType())) .post(file.asRequestBody("audio/x-wav; charset=utf-8".toMediaType()))
.build() .build()
client.newCall(request).execute().use { response -> client.newCall(request).execute().use { response ->
if (!response.isSuccessful) throw IOException("Unexpected code $response") if (!response.isSuccessful) throw IOException("Unexpected code $response")
return response.body!!.string() return response.body!!.string()
} }
/* # SEND STT REQUEST DIRECTLY TO WHISPER ASR WITHOUT JARVIS SERVER API
val okHttpClient = OkHttpClient()
.newBuilder()
.connectTimeout(60, TimeUnit.SECONDS)
.build()
val fileBody: RequestBody = RequestBody.create("audio/x-wav".toMediaTypeOrNull(), file.readBytes())
val multipartBody: MultipartBody = MultipartBody.Builder()
.setType(MultipartBody.FORM) // Header to show we are sending a Multipart Form Data
.addFormDataPart("audio_file", file.name, fileBody) // file param
.build()
val request = Request.Builder()
.addHeader("Accept", "application/json")
.addHeader("Content-Type", "multipart/form-data")
.url("http://192.168.1.208:9000/asr?task=transcribe&language=fr&output=json")
.post(multipartBody)
.build()
okHttpClient.newCall(request).execute().use { response ->
if (!response.isSuccessful) throw IOException("Unexpected code $response")
return response.body!!.string()
}*/
} }