본문 바로가기
IoT 디바이스 활용/Raspberry Pi

Raspberry Pi - 오디오 녹음 및 재생

by cooluk 2020. 10. 19.

오디오 녹음 및 재생

pyaudio 모듈

  • 오디오 녹음
  • pip install pyaudio

pydub

  • 오디오 재생 및 포맷 변환
  • pip install pydub

라즈베리파이

  • sudo apt install ffmpeg
  • sudo pip install ffmpeg-python
  • sudo apt-get install python3-pyaudio
  • sudo pip install pydub

디바이스 정보 목록

audio_device_list.py

import pyaudio

p = pyaudio.PyAudio()
info = p.get_host_api_info_by_index(0) # 오디오 디바이스 정보 얻기
numdevices = info.get('deviceCount') # 디바이스 갯수 얻기

for i in range(0, numdevices):
    if (p.get_device_info_by_host_api_device_index(0, i) \
        .get('maxInputChannels')) > 0:
        print("Input Device id ", i, " - ",
            p.get_device_info_by_host_api_device_index(0, i).get('name'))

Input Device id 0 - Microsoft 사운드 매퍼 - Input
Input Device id 1 - Microphone(USB PnP Sound Device
Input Device id 2 - 1(High Definition Audio Device)
Input Device id 3 - 2(High Definition Audio Device)


라즈베리파이
python audio_device_list.py 2> err_log.txt 디버그 내용 제거



디폴트 오디오 디바이스 정보 추출

default_audio_device.py

import pyaudio

p = pyaudio.PyAudio()

default_input = p.get_default_input_device_info()
print(default_input)

default_output = p.get_default_output_device_info()
print(default_output)

{'index': 1, 'structVersion': 2, 'name': 'Microphone(USB PnP Sound Device', 'hostApi': 0, 'maxInputChannels': 1, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}
{'index': 5, 'structVersion': 2, 'name': 'Speakers(High Definition Audio ', 'hostApi': 0, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}



녹음하기

pyaudio_record.py

import pyaudio
import wave

MIC_DEVICE_ID = 1

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
# RATE = 44100
RATE = 16000     # 카카오 음성 인식에서 요구하는 RATE
# 이것을 라즈베리파이에서는 인식을 못하여 아래 16k 변환 소스 참고
SAMPLE_SIZE = 2  # FORMAT의 바이트 수

def record(record_seconds):
    p = pyaudio.PyAudio()
    stream = p.open(input_device_index=MIC_DEVICE_ID,
                    format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)
    print("Start to record the audio.")
    frames = []

    for i in range(0, int(RATE / CHUNK * record_seconds)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("Recording is finished.")

    stream.stop_stream()
    stream.close()
    p.terminate()

    return frames


# 녹음 데이터를 WAV 파일로 저장하기
def save_wav(target, frames):
    wf = wave.open(target, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(SAMPLE_SIZE)
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))

    if isinstance(target, str):
        wf.close()

if __name__ == '__main__':
    RECORD_SECONDS = 5
    frames = record(RECORD_SECONDS)

    WAVE_OUTPUT_FILENAME = "output.wav"
    save_wav(WAVE_OUTPUT_FILENAME, frames)

녹음하면서 재생하기

audio_rec_play.py

import pyaudio
import wave

MIC_DEVICE_ID = 1

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100

RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()
stream = p.open(input_device_index=MIC_DEVICE_ID,
                format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

out = p.open(format=FORMAT,
             channels=CHANNELS,
             rate=RATE,
             output=True)

print("Start to record the audio.")
frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    out.write(data)
    frames.append(data)

print("Recording is finished.")

stream.stop_stream()
stream.close()

p.terminate()


카카오 음성 인식 + 녹음

kakao_test.py

from kakao_dictation import *
from pyaudio_record import *
import io


audio_data = record(5)  # 녹음 데이터 리스트

# file_name = 'test.wav'
# save_wav(file_name, audio_data)
# f = open(file_name, 'rb')
# audio_data = f.read()

audio_stream = io.BytesIO()
save_wav(audio_stream, audio_data)
result = dictation(audio_stream.getvalue())    # wav 파일 내용 전달
print('인식결과', result)


16000 녹음하기

rec16K.py

import pyaudio
import wave
import io
from pydub import AudioSegment
from kakao_dictation import *
from pyaudio_record import *

CHUNK = 1024
FORMAT = pyaudio.paInt16
SAMPLE_WIDTH = 2
CHANNELS = 1
RATE = 44100

def record(record_seconds=5):
    p = pyaudio.PyAudio()
    stream = p.open(input_device_index=0,
                    format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print("Start to record the audio.")

    frames = []
    for i in range(0, int(RATE / CHUNK * record_seconds)):
        data = stream.read(CHUNK, exception_on_overflow=False)
        frames.append(data)

    print("Recording is finished.")
    stream.stop_stream()
    stream.close()
    p.terminate()

    return frames


def convertTo16K(file):
    # 16000 으로 변환
    sound = AudioSegment.from_file(file)
    sound = sound.set_frame_rate(16000)
    rec_data = io.BytesIO()
    sound.export(rec_data, format="wav")
    sound.export("rec.wav", format="wav")
    return rec_data

# wav 파일 저장
def getWav(frames):
    wavfile = io.BytesIO()
    wf = wave.open(wavfile, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(SAMPLE_WIDTH)
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wavfile.seek(0)
    return wavfile

if __name__ == "__main__":
    audio_data = record()
    wav_data = getWav(audio_data)
    wav_data = convertTo16K(wav_data)
    # wav_data 처리
    result = dictation(wav_data.getvalue())    # wav 파일 내용 전달
    print('인식결과', result)

Start to record the audio.
Recording is finished.
인식결과 헤이 카카오야


AudioSegment

https://audiosegment.readthedocs.io/en/latest/audiosegment.html


댓글