Skip to content

Instantly share code, notes, and snippets.

@sajjadyousefnia
Created March 13, 2025 16:56
Show Gist options
  • Save sajjadyousefnia/c8e7175977c7af67c3dd53fd51e518e6 to your computer and use it in GitHub Desktop.
Save sajjadyousefnia/c8e7175977c7af67c3dd53fd51e518e6 to your computer and use it in GitHub Desktop.
package com.sajjady.recorder.Socket
import android.annotation.SuppressLint
import android.content.Context
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaCodec
import android.media.MediaCodecInfo
import android.media.MediaFormat
import android.media.MediaMuxer
import android.media.MediaRecorder
import android.media.audiofx.AcousticEchoCanceler
import android.media.audiofx.NoiseSuppressor
import android.os.Build
import android.widget.Toast
import com.google.gson.Gson
import com.neovisionaries.ws.client.DualStackMode
import com.neovisionaries.ws.client.WebSocket
import com.neovisionaries.ws.client.WebSocketAdapter
import com.neovisionaries.ws.client.WebSocketException
import com.neovisionaries.ws.client.WebSocketFactory
import com.neovisionaries.ws.client.WebSocketFrame
import com.sajjady.recorder.CallLog.CallLogType
import com.sajjady.recorder.CallLog.RecordCallLog
import com.sajjady.recorder.Database.AppDatabase
import com.sajjady.recorder.Storage.SharedPreferencesHelper
import com.simplemobiletools.commons.extensions.getCurrentFormattedDateTime
import com.simplemobiletools.commons.extensions.hasProperStoredFirstParentUri
import com.simplemobiletools.commons.helpers.isRPlus
import com.simplemobiletools.voicerecorder.extensions.config
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.cancel
import kotlinx.coroutines.launch
import kotlinx.coroutines.withContext
import timber.log.Timber
import java.io.File
import java.nio.BufferOverflowException
import java.nio.ByteBuffer
import java.nio.ByteOrder
import kotlin.math.max
import kotlin.math.min
class NeovisionariesSocketManager(val context: Context) {
private var scope: CoroutineScope? = null
private var audioRecord: AudioRecord? = null
private var audioEncoder: MediaCodec? = null
private val sampleRate = 44100 // 44100
// private val channelConfig = AudioFormat.CHANNEL_IN_STEREO
// private val audioFormat = AudioFormat.ENCODING_PCM_FLOAT
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
private val bufferSize = 4096
private var minimumBufferSize: Int = -1
private var lastSendTime = System.currentTimeMillis()
private val accumulatedData =
mutableListOf<Byte>() // Temporary storage for 500ms of audio data
private var socketLiveListener: SocketLiveListener? = null
private var ws: WebSocket? = null
public var noiseSuppressor: NoiseSuppressor? = null
//---------------------for file initialization--------------------------------
private var mediaCodec: MediaCodec? = null
private var mediaMuxer: MediaMuxer? = null
private var outputFile: File? = null
private var presentationTimeUs: Long = 0
var totalBytesRead: Long = 0
private var muxerTrackIndex = -1
private var isMuxerStarted = false
@Volatile
private var isStreaming = false
public fun setSocketLiveListener(socketLiveListener: SocketLiveListener) {
Timber.d("setSocketLiveListener")
this.socketLiveListener = socketLiveListener
}
public fun startTranscribe() {
scope = CoroutineScope(Dispatchers.Default)
scope!!.launch {
asr()
}
}
@SuppressLint("MissingPermission")
@Throws(Exception::class)
fun asr() {
val factory = WebSocketFactory()
factory.setDualStackMode(DualStackMode.IPV4_ONLY);
val ws: WebSocket = factory.createSocket("ws://80.210.37.8:2700")
ws.addListener(CustomWebSocketAdapter(context, scope!!, socketLiveListener))
ws.connect()
try {
audioRecord =
AudioRecord.Builder().setAudioSource(MediaRecorder.AudioSource.VOICE_RECOGNITION)
.setAudioFormat(
AudioFormat.Builder().setEncoding(audioFormat).setSampleRate(sampleRate)
.setChannelMask(channelConfig).build()
).setBufferSizeInBytes(bufferSize).build()
if (NoiseSuppressor.isAvailable()) {
noiseSuppressor = NoiseSuppressor.create(audioRecord!!.audioSessionId)
noiseSuppressor?.enabled = true
}
audioRecord?.startRecording()
audioEncoder?.start()
isStreaming = true
setupFileRecording()
/*
minimumBufferSize = AudioRecord.getMinBufferSize(
sampleRate,
AudioFormat.CHANNEL_IN_MONO, audioFormat
)
*/
// Let Vosk server now the sample rate of sound file
ws.sendText("{ \"config\" : { \"sample_rate\" : $sampleRate } }")
// Set an arbitrary buffer size of 1024 frames.
val numBytes = bufferSize/*bytesPerFrame*/
val inputBuffer = ByteArray(numBytes)
// for file
var totalBytesRead: Long = 0
try {
while (isStreaming) {
val bytesRead = audioRecord?.read(inputBuffer, 0, inputBuffer.size) ?: 0
if (bytesRead > 0) {
accumulatedData.addAll(inputBuffer.take(bytesRead))
val currentTime = System.currentTimeMillis()
if (currentTime - lastSendTime >= 500) {
val finalData = accumulatedData.toByteArray()
ws.sendBinary(finalData)
accumulatedData.clear()
lastSendTime = currentTime
}
if (SharedPreferencesHelper(context).live_text_voice) {
totalBytesRead += bytesRead.toLong()
processPcmData(inputBuffer, bytesRead)
}
}
}
if (SharedPreferencesHelper(context).live_text_voice) {
signalEndOfStream()
releaseFileRecordingResources()
Timber.d("file size is ${outputFile?.length()}")
saveRecordingDatabase()
}
disconnect(/*ws*/)
} catch (e: Exception) {
e.printStackTrace()
}
} catch (ex: Exception) {
ex.printStackTrace()
}
}
private fun saveRecordingDatabase() {
val dao = AppDatabase.getInstance(context).recordDao()
dao.insert(
RecordCallLog(
dateTime = System.currentTimeMillis(),
path = outputFile!!.path,
liveText = "",
text = "",
duration = "",
name = "",
number = "",
realId = 0L,
type = CallLogType.MISSED
)
)
}
private fun releaseFileRecordingResources() {
mediaCodec?.stop()
mediaCodec?.release()
mediaMuxer?.stop()
mediaMuxer?.release()
}
private fun signalEndOfStream() {
val codec = mediaCodec ?: return
val inputBufferIndex = codec.dequeueInputBuffer(10000)
if (inputBufferIndex >= 0) {
codec.queueInputBuffer(
inputBufferIndex,
0,
0,
0,
MediaCodec.BUFFER_FLAG_END_OF_STREAM
)
}
}
private fun setupFileRecording() {
// Setup AAC encoder
mediaCodec = MediaCodec.createEncoderByType(MediaFormat.MIMETYPE_AUDIO_AAC)
val format = MediaFormat.createAudioFormat(
MediaFormat.MIMETYPE_AUDIO_AAC, sampleRate, 1 // Mono
).apply {
setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC)
setInteger(MediaFormat.KEY_BIT_RATE, 256000) // 128 kbps
// Set maximum input size to match AudioRecord buffer
setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, bufferSize * 2) // Double buffer for safety
}
mediaCodec?.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE)
mediaCodec?.start()
val defaultFolder = File(context.config.saveRecordingsFolder)
if (!defaultFolder.exists()) {
defaultFolder.mkdir()
}
val baseFolder =
if (isRPlus() && !context.hasProperStoredFirstParentUri(defaultFolder.absolutePath)) {
// cacheDir
defaultFolder.absolutePath
} else {
defaultFolder.absolutePath
}
if (!File(baseFolder).exists()) {
File(baseFolder).mkdir()
}
outputFile = File("$baseFolder/${context.getCurrentFormattedDateTime()}.m4a")
outputFile!!.createNewFile()
// Setup MediaMuxer
mediaMuxer =
MediaMuxer(outputFile!!.absolutePath, MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4)
}
fun logBufferStats() {
mediaCodec?.inputBuffers?.forEachIndexed { i, buffer ->
Timber.d("InputBuffer[$i] - Capacity: ${buffer.capacity()} Limit: ${buffer.limit()}")
}
}
private fun processPcmData(data: ByteArray, size: Int) {
val codec = mediaCodec ?: return
logBufferStats()
val inputBufferIndex = codec.dequeueInputBuffer(10000)
if (inputBufferIndex >= 0) {
val inputBuffer = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP) {
codec.getInputBuffer(inputBufferIndex)
} else {
codec.inputBuffers[inputBufferIndex]
} ?: return
try {
inputBuffer.clear()
// Amplify the PCM data (e.g., gain of 2.0 doubles the volume)
val amplifiedData = amplifyPcmData(data, size, 4f)
// Calculate presentation time BEFORE queuing
presentationTimeUs = (totalBytesRead * 1_000_000L) / (2 * sampleRate)
// Check buffer capacity and handle partial writes
if (size > inputBuffer.remaining()) {
Timber.w("Input buffer overflow detected (${inputBuffer.remaining()} bytes available, $size bytes attempted). Chunking data.")
handleLargeData(inputBuffer, amplifiedData, size)
} else {
inputBuffer.put(amplifiedData, 0, size)
}
codec.queueInputBuffer(
inputBufferIndex,
0,
size,
presentationTimeUs,
0
)
// Update after successful buffer submission
totalBytesRead += size.toLong()
} catch (e: BufferOverflowException) {
Timber.e("Buffer overflow after capacity check! Size: $size, Capacity: ${inputBuffer.capacity()}")
codec.queueInputBuffer(
inputBufferIndex,
0,
0,
0,
MediaCodec.BUFFER_FLAG_CODEC_CONFIG
)
}
}
processEncodedData()
}
private fun handleLargeData(buffer: ByteBuffer, amplifiedData: ByteArray, size: Int) {
var bytesWritten = 0
while (bytesWritten < size) {
val chunkSize = min(buffer.remaining(), size - bytesWritten)
if (chunkSize > 0) {
buffer.put(amplifiedData, bytesWritten, chunkSize)
bytesWritten += chunkSize
} else {
Timber.w("Skipping ${size - bytesWritten} bytes due to insufficient buffer space")
break
}
}
}
private fun processEncodedData() {
val codec = mediaCodec ?: return
val muxer = mediaMuxer ?: return
val bufferInfo = MediaCodec.BufferInfo()
while (true) {
val outputBufferIndex = codec.dequeueOutputBuffer(bufferInfo, 10000)
when (outputBufferIndex) {
MediaCodec.INFO_OUTPUT_FORMAT_CHANGED -> {
muxerTrackIndex = muxer.addTrack(codec.outputFormat)
muxer.start()
isMuxerStarted = true
}
MediaCodec.INFO_TRY_AGAIN_LATER -> break
else -> {
if (outputBufferIndex >= 0 && isMuxerStarted) {
val outputBuffer =
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP) {
codec.getOutputBuffer(outputBufferIndex)
} else {
codec.outputBuffers[outputBufferIndex]
}
outputBuffer?.let {
muxer.writeSampleData(
muxerTrackIndex,
it,
bufferInfo
)
}
codec.releaseOutputBuffer(outputBufferIndex, false)
}
}
}
}
}
private fun disconnect(/*ws: WebSocket*/) {
ws?.sendText("{\"eof\" : 1}")
ws?.disconnect()
ws = null
}
public fun finishSocket() {
isStreaming = false
scope!!.cancel()
}
class CustomWebSocketAdapter(
val context: Context,
val scope: CoroutineScope,
var socketLiveListener: SocketLiveListener? = null
) : WebSocketAdapter() {
override fun onConnected(
websocket: WebSocket?, headers: MutableMap<String, MutableList<String>>?
) {
Timber.tag("CustomWebSocketAdapter").d("onConnected")
scope.launch {
withContext(Dispatchers.Main) {
Toast.makeText(context, "صحبت کنید", Toast.LENGTH_SHORT).show()
}
}
}
override fun onDisconnected(
websocket: WebSocket?,
serverCloseFrame: WebSocketFrame?,
clientCloseFrame: WebSocketFrame?,
closedByServer: Boolean
) {
Timber.tag("CustomWebSocketAdapter").e("onDisconnected")
Timber.e("closedByServer : $closedByServer")
scope.launch {
withContext(Dispatchers.Main) {
Toast.makeText(context, "اتصال قطع شد", Toast.LENGTH_SHORT).show()
}
}
}
override fun onConnectError(websocket: WebSocket?, exception: WebSocketException?) {
Timber.tag("CustomWebSocketAdapter").e("onConnectError : ${exception?.message}")
Timber.tag("CustomWebSocketAdapter")
.e("socket is connected : %s", websocket?.connectedSocket?.isConnected)
scope.launch {
withContext(Dispatchers.Main) {
Toast.makeText(context, "خطا در اتصال", Toast.LENGTH_SHORT).show()
}
}
}
override fun onSendError(
websocket: WebSocket?,
cause: WebSocketException?,
frame: WebSocketFrame?
) {
Timber.tag("CustomWebSocketAdapter").e("onConnectError : ${cause?.message}")
scope.launch {
withContext(Dispatchers.Main) {
Toast.makeText(context, "خطا در هنگام اتصال", Toast.LENGTH_SHORT).show()
}
}
}
override fun onTextMessage(websocket: WebSocket, message: String) {
if (message.contains("partial")) {/* val json = Gson().fromJson<PartialModel>(message, PartialModel::class.java)
val output = json.partial
socketLiveListener!!.onReceived(output)
Timber.tag("CustomWebSocketAdapter").d(output)*/
} else if (!message.contains("partial")) {
// results.add(message)
val json = Gson().fromJson<ResponseModel>(message, ResponseModel::class.java)
var output = ""
json.result.forEach { output += " " + it.withUnescapedWord().word }
socketLiveListener?.onReceived(output)
Timber.tag("CustomWebSocketAdapter").d(output)
}
}
}
private fun amplifyPcmData(data: ByteArray, size: Int, gain: Float): ByteArray {
// Wrap the ByteArray in a ByteBuffer with native byte order (matches AudioRecord)
val byteBuffer = ByteBuffer.wrap(data, 0, size).order(ByteOrder.nativeOrder())
val shortBuffer = byteBuffer.asShortBuffer()
val shortArray = ShortArray(shortBuffer.remaining())
shortBuffer.get(shortArray)
// Amplify each sample and clip to 16-bit range
val amplifiedShortArray = ShortArray(shortArray.size) { i ->
val sample = shortArray[i]
val amplified = (sample * gain).toInt()
when {
amplified > 32767 -> 32767.toShort()
amplified < -32768 -> (-32768).toShort()
else -> amplified.toShort()
}
}
// Convert back to ByteArray
val amplifiedByteBuffer = ByteBuffer.allocate(size).order(ByteOrder.nativeOrder())
amplifiedByteBuffer.asShortBuffer().put(amplifiedShortArray)
return amplifiedByteBuffer.array()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment