Created
October 29, 2017 11:29
-
-
Save PandaWhoCodes/9f3dc05faee761149842e43b56e6ee8c to your computer and use it in GitHub Desktop.
Noise reduction using pyaudio documentation code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Measure the frequencies coming in through the microphone | |
Patchwork of wire_full.py from pyaudio tests and spectrum.py from Chaco examples | |
""" | |
import pyaudio | |
import numpy as np | |
import scipy.signal | |
CHUNK = 1024 * 2 | |
WIDTH = 2 | |
DTYPE = np.int16 | |
MAX_INT = 32768.0 | |
CHANNELS = 1 | |
RATE = 11025 * 1 | |
RECORD_SECONDS = 20 | |
j = np.complex(0, 1) | |
p = pyaudio.PyAudio() | |
stream = p.open(format=p.get_format_from_width(WIDTH), | |
channels=CHANNELS, | |
rate=RATE, | |
input=True, | |
output=True, | |
frames_per_buffer=CHUNK) | |
print("Recording Audio...") | |
# initialize filter variables | |
fir = np.zeros(CHUNK * 2) | |
fir[:(2 * CHUNK)] = 1. | |
fir /= fir.sum() | |
fir_last = fir | |
avg_freq_buffer = np.zeros(CHUNK) | |
obj = -np.inf | |
t = 10 | |
# initialize sample buffer | |
buffer = np.zeros(CHUNK * 2) | |
try: | |
while True: | |
# read audio | |
string_audio_data = stream.read(CHUNK) | |
audio_data = np.fromstring(string_audio_data, dtype=DTYPE) | |
normalized_data = audio_data / MAX_INT | |
freq_data = np.fft.fft(normalized_data) | |
# synthesize audio | |
buffer[CHUNK:] = np.random.randn(CHUNK) | |
freq_buffer = np.fft.fft(buffer) | |
freq_fir = np.fft.fft(fir) | |
freq_synth = freq_fir * freq_buffer | |
synth = np.real(np.fft.ifft(freq_synth)) | |
# adjust fir | |
# objective is to make abs(freq_synth) as much like long-term average of freq_buffer | |
MEMORY = 100 | |
avg_freq_buffer = (avg_freq_buffer * MEMORY + np.abs(freq_data)) / (MEMORY + 1) | |
obj_last = obj | |
obj = np.real(np.dot(avg_freq_buffer[1:51], np.abs(freq_synth[1:100:2])) / np.dot(freq_synth[1:100:2], | |
np.conj(freq_synth[1:100:2]))) | |
if obj > obj_last: | |
fir_last = fir | |
fir = fir_last.copy() | |
# adjust filter in frequency space | |
freq_fir = np.fft.fft(fir) | |
# t += np.clip(np.random.randint(3)-1, 0, 64) | |
t = np.random.randint(100) | |
freq_fir[t] += np.random.randn() * .05 | |
# transform frequency space filter to time space, click-free | |
fir = np.real(np.fft.ifft(freq_fir)) | |
fir[:CHUNK] *= np.linspace(1., 0., CHUNK) ** .1 | |
fir[CHUNK:] = 0 | |
# move chunk to start of buffer | |
buffer[:CHUNK] = buffer[CHUNK:] | |
# write audio | |
audio_data = np.array(np.round_(synth[CHUNK:] * MAX_INT), dtype=DTYPE) | |
string_audio_data = audio_data.tostring() | |
stream.write(string_audio_data, CHUNK) | |
finally: | |
stream.stop_stream() | |
stream.close() | |
p.terminate() |
import pyaudio
import numpy as np
import scipy.signal
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
WIDTH = 2
DTYPE = np.int16
MAX_INT = 32768.0
CHANNELS = 1
RATE = 16000
RECORD_SECONDS = 7
WAVE_OUTPUT_FILENAME = "Speech.wav"
j = np.complex(0,1)
p = pyaudio.PyAudio()
stream = p.open(format=p.get_format_from_width(WIDTH),
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=CHUNK)
print("* recording")
# initialize filter variables
fir = np.zeros(CHUNK * 2)
fir[:(2*CHUNK)] = 1.
fir /= fir.sum()
fir_last = fir
avg_freq_buffer = np.zeros(CHUNK)
obj = -np.inf
t = 10
# initialize sample buffer
buffer = np.zeros(CHUNK * 2)
#for i in np.arange(RATE / CHUNK * RECORD_SECONDS):
while True:
# read audio
frames = []
string_audio_data = stream.read(CHUNK)
audio_data = np.fromstring(string_audio_data, dtype=DTYPE)
normalized_data = audio_data / MAX_INT
freq_data = np.fft.fft(normalized_data)
# synthesize audio
buffer[CHUNK:] = np.random.randn(CHUNK)
freq_buffer = np.fft.fft(buffer)
freq_fir = np.fft.fft(fir)
freq_synth = freq_fir * freq_buffer
synth = np.real(np.fft.ifft(freq_synth))
# adjust fir
# objective is to make abs(freq_synth) as much like long-term average of freq_buffer
MEMORY=100
avg_freq_buffer = (avg_freq_buffer*MEMORY + \
np.abs(freq_data)) / (MEMORY+1)
obj_last = obj
obj = np.real(np.dot(avg_freq_buffer[1:51], np.abs(freq_synth[1:100:2])) / np.dot(freq_synth[1:100:2], np.conj(freq_synth[1:100:2])))
if obj > obj_last:
fir_last = fir
fir = fir_last.copy()
# adjust filter in frequency space
freq_fir = np.fft.fft(fir)
#t += np.clip(np.random.randint(3)-1, 0, 64)
t = np.random.randint(100)
freq_fir[t] += np.random.randn()*.05
# transform frequency space filter to time space, click-free
fir = np.real(np.fft.ifft(freq_fir))
fir[:CHUNK] *= np.linspace(1., 0., CHUNK)**.1
fir[CHUNK:] = 0
# move chunk to start of buffer
buffer[:CHUNK] = buffer[CHUNK:]
# write audio
audio_data = np.array(np.round_(synth[CHUNK:] * MAX_INT), dtype=DTYPE)
string_audio_data = audio_data.tostring()
frames.append(string_audio_data)
stream.write(string_audio_data, CHUNK)
print("* done")
stream.stop_stream()
stream.close()
p.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(p.get_sample_size(FORMAT))
waveFile.setframerate(RATE)
waveFile.writeframes(b''.join(frames))
waveFile.close()
^Above are the changes I made in your code for my use.
If not this, Can you help me with noise reduction through python on wav file, I have to use it for speech recognition, and due to background noise, accuracy of my speech recognition takes a dive.
Anyways, Following is the error I get when I try to run the above code:
runfile('C:/Users/hs45858/NoiseReduction.py', wdir='C:/Users/hs45858')
* recording
C:/Users/hs45858/NoiseReduction.py:58: DeprecationWarning: The binary mode of fromstring is deprecated, as it behaves surprisingly on unicode inputs. Use frombuffer instead
audio_data = np.fromstring(string_audio_data, dtype=DTYPE)
C:/Users/hs45858/NoiseReduction.py:76: RuntimeWarning: invalid value encountered in true_divide
obj = np.real(np.dot(avg_freq_buffer[1:51], np.abs(freq_synth[1:100:2])) / np.dot(freq_synth[1:100:2], np.conj(freq_synth[1:100:2])))
Traceback (most recent call last):
File "<ipython-input-5-4d0a5b75ddd0>", line 1, in <module>
runfile('C:/Users/hs45858/NoiseReduction.py', wdir='C:/Users/hs45858')
File "C:\Program Files\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "C:\Program Files\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/hs45858/NoiseReduction.py", line 101, in <module>
stream.write(string_audio_data, CHUNK)
File "C:\Users\hs45858\AppData\Local\conda\conda\envs\harsh\Lib\site-packages\pyaudio.py", line 586, in write
exception_on_underflow)
KeyboardInterrupt
Your code is trying to divide by 0. Add this (numpy as np) :
np.seterr(divide='ignore', invalid='ignore')
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The code is pretty old and should work despite the warnings.
Can you give the full stack trace for me to be able to help you better