오디오 녹음 및 재생
pyaudio 모듈
- 오디오 녹음
pip install pyaudio
pydub
- 오디오 재생 및 포맷 변환
pip install pydub
라즈베리파이
sudo apt install ffmpeg
sudo pip install ffmpeg-python
sudo apt-get install python3-pyaudio
sudo pip install pydub
디바이스 정보 목록
audio_device_list.py
import pyaudio
p = pyaudio.PyAudio()
info = p.get_host_api_info_by_index(0) # 오디오 디바이스 정보 얻기
numdevices = info.get('deviceCount') # 디바이스 갯수 얻기
for i in range(0, numdevices):
if (p.get_device_info_by_host_api_device_index(0, i) \
.get('maxInputChannels')) > 0:
print("Input Device id ", i, " - ",
p.get_device_info_by_host_api_device_index(0, i).get('name'))
Input Device id 0 - Microsoft 사운드 매퍼 - Input
Input Device id 1 - Microphone(USB PnP Sound Device
Input Device id 2 - 1(High Definition Audio Device)
Input Device id 3 - 2(High Definition Audio Device)
라즈베리파이
python audio_device_list.py 2> err_log.txt
디버그 내용 제거
디폴트 오디오 디바이스 정보 추출
default_audio_device.py
import pyaudio
p = pyaudio.PyAudio()
default_input = p.get_default_input_device_info()
print(default_input)
default_output = p.get_default_output_device_info()
print(default_output)
{'index': 1, 'structVersion': 2, 'name': 'Microphone(USB PnP Sound Device', 'hostApi': 0, 'maxInputChannels': 1, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}
{'index': 5, 'structVersion': 2, 'name': 'Speakers(High Definition Audio ', 'hostApi': 0, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}
녹음하기
pyaudio_record.py
import pyaudio
import wave
MIC_DEVICE_ID = 1
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
# RATE = 44100
RATE = 16000 # 카카오 음성 인식에서 요구하는 RATE
# 이것을 라즈베리파이에서는 인식을 못하여 아래 16k 변환 소스 참고
SAMPLE_SIZE = 2 # FORMAT의 바이트 수
def record(record_seconds):
p = pyaudio.PyAudio()
stream = p.open(input_device_index=MIC_DEVICE_ID,
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("Start to record the audio.")
frames = []
for i in range(0, int(RATE / CHUNK * record_seconds)):
data = stream.read(CHUNK)
frames.append(data)
print("Recording is finished.")
stream.stop_stream()
stream.close()
p.terminate()
return frames
# 녹음 데이터를 WAV 파일로 저장하기
def save_wav(target, frames):
wf = wave.open(target, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(SAMPLE_SIZE)
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
if isinstance(target, str):
wf.close()
if __name__ == '__main__':
RECORD_SECONDS = 5
frames = record(RECORD_SECONDS)
WAVE_OUTPUT_FILENAME = "output.wav"
save_wav(WAVE_OUTPUT_FILENAME, frames)
녹음하면서 재생하기
audio_rec_play.py
import pyaudio
import wave
MIC_DEVICE_ID = 1
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(input_device_index=MIC_DEVICE_ID,
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
out = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
output=True)
print("Start to record the audio.")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
out.write(data)
frames.append(data)
print("Recording is finished.")
stream.stop_stream()
stream.close()
p.terminate()
카카오 음성 인식 + 녹음
kakao_test.py
from kakao_dictation import *
from pyaudio_record import *
import io
audio_data = record(5) # 녹음 데이터 리스트
# file_name = 'test.wav'
# save_wav(file_name, audio_data)
# f = open(file_name, 'rb')
# audio_data = f.read()
audio_stream = io.BytesIO()
save_wav(audio_stream, audio_data)
result = dictation(audio_stream.getvalue()) # wav 파일 내용 전달
print('인식결과', result)
16000 녹음하기
rec16K.py
import pyaudio
import wave
import io
from pydub import AudioSegment
from kakao_dictation import *
from pyaudio_record import *
CHUNK = 1024
FORMAT = pyaudio.paInt16
SAMPLE_WIDTH = 2
CHANNELS = 1
RATE = 44100
def record(record_seconds=5):
p = pyaudio.PyAudio()
stream = p.open(input_device_index=0,
format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("Start to record the audio.")
frames = []
for i in range(0, int(RATE / CHUNK * record_seconds)):
data = stream.read(CHUNK, exception_on_overflow=False)
frames.append(data)
print("Recording is finished.")
stream.stop_stream()
stream.close()
p.terminate()
return frames
def convertTo16K(file):
# 16000 으로 변환
sound = AudioSegment.from_file(file)
sound = sound.set_frame_rate(16000)
rec_data = io.BytesIO()
sound.export(rec_data, format="wav")
sound.export("rec.wav", format="wav")
return rec_data
# wav 파일 저장
def getWav(frames):
wavfile = io.BytesIO()
wf = wave.open(wavfile, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(SAMPLE_WIDTH)
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wavfile.seek(0)
return wavfile
if __name__ == "__main__":
audio_data = record()
wav_data = getWav(audio_data)
wav_data = convertTo16K(wav_data)
# wav_data 처리
result = dictation(wav_data.getvalue()) # wav 파일 내용 전달
print('인식결과', result)
Start to record the audio.
Recording is finished.
인식결과 헤이 카카오야
AudioSegment
https://audiosegment.readthedocs.io/en/latest/audiosegment.html
'IoT 디바이스 활용 > Raspberry Pi' 카테고리의 다른 글
Raspberry Pi - 자동차 제어 (rccar.py) (0) | 2020.10.19 |
---|---|
Raspberry Pi - 블루투스 통신 (btsocket.py) (0) | 2020.10.19 |
Raspberry Pi - 카카오 음성합성, 음성인식 (0) | 2020.10.19 |
Raspberry Pi - python-picamera - 이미지 프로세싱 (picam.py) (0) | 2020.10.19 |
Raspberry Pi - python-picamera - 동영상 촬영 (0) | 2020.10.19 |
댓글