Created
March 13, 2025 16:56
-
-
Save sajjadyousefnia/c8e7175977c7af67c3dd53fd51e518e6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.sajjady.recorder.Socket | |
import android.annotation.SuppressLint | |
import android.content.Context | |
import android.media.AudioFormat | |
import android.media.AudioRecord | |
import android.media.MediaCodec | |
import android.media.MediaCodecInfo | |
import android.media.MediaFormat | |
import android.media.MediaMuxer | |
import android.media.MediaRecorder | |
import android.media.audiofx.AcousticEchoCanceler | |
import android.media.audiofx.NoiseSuppressor | |
import android.os.Build | |
import android.widget.Toast | |
import com.google.gson.Gson | |
import com.neovisionaries.ws.client.DualStackMode | |
import com.neovisionaries.ws.client.WebSocket | |
import com.neovisionaries.ws.client.WebSocketAdapter | |
import com.neovisionaries.ws.client.WebSocketException | |
import com.neovisionaries.ws.client.WebSocketFactory | |
import com.neovisionaries.ws.client.WebSocketFrame | |
import com.sajjady.recorder.CallLog.CallLogType | |
import com.sajjady.recorder.CallLog.RecordCallLog | |
import com.sajjady.recorder.Database.AppDatabase | |
import com.sajjady.recorder.Storage.SharedPreferencesHelper | |
import com.simplemobiletools.commons.extensions.getCurrentFormattedDateTime | |
import com.simplemobiletools.commons.extensions.hasProperStoredFirstParentUri | |
import com.simplemobiletools.commons.helpers.isRPlus | |
import com.simplemobiletools.voicerecorder.extensions.config | |
import kotlinx.coroutines.CoroutineScope | |
import kotlinx.coroutines.Dispatchers | |
import kotlinx.coroutines.cancel | |
import kotlinx.coroutines.launch | |
import kotlinx.coroutines.withContext | |
import timber.log.Timber | |
import java.io.File | |
import java.nio.BufferOverflowException | |
import java.nio.ByteBuffer | |
import java.nio.ByteOrder | |
import kotlin.math.max | |
import kotlin.math.min | |
class NeovisionariesSocketManager(val context: Context) { | |
private var scope: CoroutineScope? = null | |
private var audioRecord: AudioRecord? = null | |
private var audioEncoder: MediaCodec? = null | |
private val sampleRate = 44100 // 44100 | |
// private val channelConfig = AudioFormat.CHANNEL_IN_STEREO | |
// private val audioFormat = AudioFormat.ENCODING_PCM_FLOAT | |
private val channelConfig = AudioFormat.CHANNEL_IN_MONO | |
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT | |
private val bufferSize = 4096 | |
private var minimumBufferSize: Int = -1 | |
private var lastSendTime = System.currentTimeMillis() | |
private val accumulatedData = | |
mutableListOf<Byte>() // Temporary storage for 500ms of audio data | |
private var socketLiveListener: SocketLiveListener? = null | |
private var ws: WebSocket? = null | |
public var noiseSuppressor: NoiseSuppressor? = null | |
//---------------------for file initialization-------------------------------- | |
private var mediaCodec: MediaCodec? = null | |
private var mediaMuxer: MediaMuxer? = null | |
private var outputFile: File? = null | |
private var presentationTimeUs: Long = 0 | |
var totalBytesRead: Long = 0 | |
private var muxerTrackIndex = -1 | |
private var isMuxerStarted = false | |
@Volatile | |
private var isStreaming = false | |
public fun setSocketLiveListener(socketLiveListener: SocketLiveListener) { | |
Timber.d("setSocketLiveListener") | |
this.socketLiveListener = socketLiveListener | |
} | |
public fun startTranscribe() { | |
scope = CoroutineScope(Dispatchers.Default) | |
scope!!.launch { | |
asr() | |
} | |
} | |
@SuppressLint("MissingPermission") | |
@Throws(Exception::class) | |
fun asr() { | |
val factory = WebSocketFactory() | |
factory.setDualStackMode(DualStackMode.IPV4_ONLY); | |
val ws: WebSocket = factory.createSocket("ws://80.210.37.8:2700") | |
ws.addListener(CustomWebSocketAdapter(context, scope!!, socketLiveListener)) | |
ws.connect() | |
try { | |
audioRecord = | |
AudioRecord.Builder().setAudioSource(MediaRecorder.AudioSource.VOICE_RECOGNITION) | |
.setAudioFormat( | |
AudioFormat.Builder().setEncoding(audioFormat).setSampleRate(sampleRate) | |
.setChannelMask(channelConfig).build() | |
).setBufferSizeInBytes(bufferSize).build() | |
if (NoiseSuppressor.isAvailable()) { | |
noiseSuppressor = NoiseSuppressor.create(audioRecord!!.audioSessionId) | |
noiseSuppressor?.enabled = true | |
} | |
audioRecord?.startRecording() | |
audioEncoder?.start() | |
isStreaming = true | |
setupFileRecording() | |
/* | |
minimumBufferSize = AudioRecord.getMinBufferSize( | |
sampleRate, | |
AudioFormat.CHANNEL_IN_MONO, audioFormat | |
) | |
*/ | |
// Let Vosk server now the sample rate of sound file | |
ws.sendText("{ \"config\" : { \"sample_rate\" : $sampleRate } }") | |
// Set an arbitrary buffer size of 1024 frames. | |
val numBytes = bufferSize/*bytesPerFrame*/ | |
val inputBuffer = ByteArray(numBytes) | |
// for file | |
var totalBytesRead: Long = 0 | |
try { | |
while (isStreaming) { | |
val bytesRead = audioRecord?.read(inputBuffer, 0, inputBuffer.size) ?: 0 | |
if (bytesRead > 0) { | |
accumulatedData.addAll(inputBuffer.take(bytesRead)) | |
val currentTime = System.currentTimeMillis() | |
if (currentTime - lastSendTime >= 500) { | |
val finalData = accumulatedData.toByteArray() | |
ws.sendBinary(finalData) | |
accumulatedData.clear() | |
lastSendTime = currentTime | |
} | |
if (SharedPreferencesHelper(context).live_text_voice) { | |
totalBytesRead += bytesRead.toLong() | |
processPcmData(inputBuffer, bytesRead) | |
} | |
} | |
} | |
if (SharedPreferencesHelper(context).live_text_voice) { | |
signalEndOfStream() | |
releaseFileRecordingResources() | |
Timber.d("file size is ${outputFile?.length()}") | |
saveRecordingDatabase() | |
} | |
disconnect(/*ws*/) | |
} catch (e: Exception) { | |
e.printStackTrace() | |
} | |
} catch (ex: Exception) { | |
ex.printStackTrace() | |
} | |
} | |
private fun saveRecordingDatabase() { | |
val dao = AppDatabase.getInstance(context).recordDao() | |
dao.insert( | |
RecordCallLog( | |
dateTime = System.currentTimeMillis(), | |
path = outputFile!!.path, | |
liveText = "", | |
text = "", | |
duration = "", | |
name = "", | |
number = "", | |
realId = 0L, | |
type = CallLogType.MISSED | |
) | |
) | |
} | |
private fun releaseFileRecordingResources() { | |
mediaCodec?.stop() | |
mediaCodec?.release() | |
mediaMuxer?.stop() | |
mediaMuxer?.release() | |
} | |
private fun signalEndOfStream() { | |
val codec = mediaCodec ?: return | |
val inputBufferIndex = codec.dequeueInputBuffer(10000) | |
if (inputBufferIndex >= 0) { | |
codec.queueInputBuffer( | |
inputBufferIndex, | |
0, | |
0, | |
0, | |
MediaCodec.BUFFER_FLAG_END_OF_STREAM | |
) | |
} | |
} | |
private fun setupFileRecording() { | |
// Setup AAC encoder | |
mediaCodec = MediaCodec.createEncoderByType(MediaFormat.MIMETYPE_AUDIO_AAC) | |
val format = MediaFormat.createAudioFormat( | |
MediaFormat.MIMETYPE_AUDIO_AAC, sampleRate, 1 // Mono | |
).apply { | |
setInteger(MediaFormat.KEY_AAC_PROFILE, MediaCodecInfo.CodecProfileLevel.AACObjectLC) | |
setInteger(MediaFormat.KEY_BIT_RATE, 256000) // 128 kbps | |
// Set maximum input size to match AudioRecord buffer | |
setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, bufferSize * 2) // Double buffer for safety | |
} | |
mediaCodec?.configure(format, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE) | |
mediaCodec?.start() | |
val defaultFolder = File(context.config.saveRecordingsFolder) | |
if (!defaultFolder.exists()) { | |
defaultFolder.mkdir() | |
} | |
val baseFolder = | |
if (isRPlus() && !context.hasProperStoredFirstParentUri(defaultFolder.absolutePath)) { | |
// cacheDir | |
defaultFolder.absolutePath | |
} else { | |
defaultFolder.absolutePath | |
} | |
if (!File(baseFolder).exists()) { | |
File(baseFolder).mkdir() | |
} | |
outputFile = File("$baseFolder/${context.getCurrentFormattedDateTime()}.m4a") | |
outputFile!!.createNewFile() | |
// Setup MediaMuxer | |
mediaMuxer = | |
MediaMuxer(outputFile!!.absolutePath, MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4) | |
} | |
fun logBufferStats() { | |
mediaCodec?.inputBuffers?.forEachIndexed { i, buffer -> | |
Timber.d("InputBuffer[$i] - Capacity: ${buffer.capacity()} Limit: ${buffer.limit()}") | |
} | |
} | |
private fun processPcmData(data: ByteArray, size: Int) { | |
val codec = mediaCodec ?: return | |
logBufferStats() | |
val inputBufferIndex = codec.dequeueInputBuffer(10000) | |
if (inputBufferIndex >= 0) { | |
val inputBuffer = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP) { | |
codec.getInputBuffer(inputBufferIndex) | |
} else { | |
codec.inputBuffers[inputBufferIndex] | |
} ?: return | |
try { | |
inputBuffer.clear() | |
// Amplify the PCM data (e.g., gain of 2.0 doubles the volume) | |
val amplifiedData = amplifyPcmData(data, size, 4f) | |
// Calculate presentation time BEFORE queuing | |
presentationTimeUs = (totalBytesRead * 1_000_000L) / (2 * sampleRate) | |
// Check buffer capacity and handle partial writes | |
if (size > inputBuffer.remaining()) { | |
Timber.w("Input buffer overflow detected (${inputBuffer.remaining()} bytes available, $size bytes attempted). Chunking data.") | |
handleLargeData(inputBuffer, amplifiedData, size) | |
} else { | |
inputBuffer.put(amplifiedData, 0, size) | |
} | |
codec.queueInputBuffer( | |
inputBufferIndex, | |
0, | |
size, | |
presentationTimeUs, | |
0 | |
) | |
// Update after successful buffer submission | |
totalBytesRead += size.toLong() | |
} catch (e: BufferOverflowException) { | |
Timber.e("Buffer overflow after capacity check! Size: $size, Capacity: ${inputBuffer.capacity()}") | |
codec.queueInputBuffer( | |
inputBufferIndex, | |
0, | |
0, | |
0, | |
MediaCodec.BUFFER_FLAG_CODEC_CONFIG | |
) | |
} | |
} | |
processEncodedData() | |
} | |
private fun handleLargeData(buffer: ByteBuffer, amplifiedData: ByteArray, size: Int) { | |
var bytesWritten = 0 | |
while (bytesWritten < size) { | |
val chunkSize = min(buffer.remaining(), size - bytesWritten) | |
if (chunkSize > 0) { | |
buffer.put(amplifiedData, bytesWritten, chunkSize) | |
bytesWritten += chunkSize | |
} else { | |
Timber.w("Skipping ${size - bytesWritten} bytes due to insufficient buffer space") | |
break | |
} | |
} | |
} | |
private fun processEncodedData() { | |
val codec = mediaCodec ?: return | |
val muxer = mediaMuxer ?: return | |
val bufferInfo = MediaCodec.BufferInfo() | |
while (true) { | |
val outputBufferIndex = codec.dequeueOutputBuffer(bufferInfo, 10000) | |
when (outputBufferIndex) { | |
MediaCodec.INFO_OUTPUT_FORMAT_CHANGED -> { | |
muxerTrackIndex = muxer.addTrack(codec.outputFormat) | |
muxer.start() | |
isMuxerStarted = true | |
} | |
MediaCodec.INFO_TRY_AGAIN_LATER -> break | |
else -> { | |
if (outputBufferIndex >= 0 && isMuxerStarted) { | |
val outputBuffer = | |
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP) { | |
codec.getOutputBuffer(outputBufferIndex) | |
} else { | |
codec.outputBuffers[outputBufferIndex] | |
} | |
outputBuffer?.let { | |
muxer.writeSampleData( | |
muxerTrackIndex, | |
it, | |
bufferInfo | |
) | |
} | |
codec.releaseOutputBuffer(outputBufferIndex, false) | |
} | |
} | |
} | |
} | |
} | |
private fun disconnect(/*ws: WebSocket*/) { | |
ws?.sendText("{\"eof\" : 1}") | |
ws?.disconnect() | |
ws = null | |
} | |
public fun finishSocket() { | |
isStreaming = false | |
scope!!.cancel() | |
} | |
class CustomWebSocketAdapter( | |
val context: Context, | |
val scope: CoroutineScope, | |
var socketLiveListener: SocketLiveListener? = null | |
) : WebSocketAdapter() { | |
override fun onConnected( | |
websocket: WebSocket?, headers: MutableMap<String, MutableList<String>>? | |
) { | |
Timber.tag("CustomWebSocketAdapter").d("onConnected") | |
scope.launch { | |
withContext(Dispatchers.Main) { | |
Toast.makeText(context, "صحبت کنید", Toast.LENGTH_SHORT).show() | |
} | |
} | |
} | |
override fun onDisconnected( | |
websocket: WebSocket?, | |
serverCloseFrame: WebSocketFrame?, | |
clientCloseFrame: WebSocketFrame?, | |
closedByServer: Boolean | |
) { | |
Timber.tag("CustomWebSocketAdapter").e("onDisconnected") | |
Timber.e("closedByServer : $closedByServer") | |
scope.launch { | |
withContext(Dispatchers.Main) { | |
Toast.makeText(context, "اتصال قطع شد", Toast.LENGTH_SHORT).show() | |
} | |
} | |
} | |
override fun onConnectError(websocket: WebSocket?, exception: WebSocketException?) { | |
Timber.tag("CustomWebSocketAdapter").e("onConnectError : ${exception?.message}") | |
Timber.tag("CustomWebSocketAdapter") | |
.e("socket is connected : %s", websocket?.connectedSocket?.isConnected) | |
scope.launch { | |
withContext(Dispatchers.Main) { | |
Toast.makeText(context, "خطا در اتصال", Toast.LENGTH_SHORT).show() | |
} | |
} | |
} | |
override fun onSendError( | |
websocket: WebSocket?, | |
cause: WebSocketException?, | |
frame: WebSocketFrame? | |
) { | |
Timber.tag("CustomWebSocketAdapter").e("onConnectError : ${cause?.message}") | |
scope.launch { | |
withContext(Dispatchers.Main) { | |
Toast.makeText(context, "خطا در هنگام اتصال", Toast.LENGTH_SHORT).show() | |
} | |
} | |
} | |
override fun onTextMessage(websocket: WebSocket, message: String) { | |
if (message.contains("partial")) {/* val json = Gson().fromJson<PartialModel>(message, PartialModel::class.java) | |
val output = json.partial | |
socketLiveListener!!.onReceived(output) | |
Timber.tag("CustomWebSocketAdapter").d(output)*/ | |
} else if (!message.contains("partial")) { | |
// results.add(message) | |
val json = Gson().fromJson<ResponseModel>(message, ResponseModel::class.java) | |
var output = "" | |
json.result.forEach { output += " " + it.withUnescapedWord().word } | |
socketLiveListener?.onReceived(output) | |
Timber.tag("CustomWebSocketAdapter").d(output) | |
} | |
} | |
} | |
private fun amplifyPcmData(data: ByteArray, size: Int, gain: Float): ByteArray { | |
// Wrap the ByteArray in a ByteBuffer with native byte order (matches AudioRecord) | |
val byteBuffer = ByteBuffer.wrap(data, 0, size).order(ByteOrder.nativeOrder()) | |
val shortBuffer = byteBuffer.asShortBuffer() | |
val shortArray = ShortArray(shortBuffer.remaining()) | |
shortBuffer.get(shortArray) | |
// Amplify each sample and clip to 16-bit range | |
val amplifiedShortArray = ShortArray(shortArray.size) { i -> | |
val sample = shortArray[i] | |
val amplified = (sample * gain).toInt() | |
when { | |
amplified > 32767 -> 32767.toShort() | |
amplified < -32768 -> (-32768).toShort() | |
else -> amplified.toShort() | |
} | |
} | |
// Convert back to ByteArray | |
val amplifiedByteBuffer = ByteBuffer.allocate(size).order(ByteOrder.nativeOrder()) | |
amplifiedByteBuffer.asShortBuffer().put(amplifiedShortArray) | |
return amplifiedByteBuffer.array() | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment