Last Updated on : 2023-08-09 09:25:02download
This topic describes how to use the audio input and output APIs in the TuyaOS Kernel Layer (TKL) and TuyaOS Abstraction Layer (TAL).
Audio input (AI): Capture sound with a microphone and encode it into audio data. Typically, the frame rate is 25 frames per second.
Audio output (AO): Play audio data on a speaker. The audio can come from prompts and two-way chat.
The captured raw audio data is PCM. To encode raw data to formats such as G.711u and G.711a, process it in the application layer.
After the audio is decoded to raw data in the application layer, call the playback API to output the audio. Before output, switch on the power amplifier pin of the speaker.
typedef struct {
UINT_T enable; // 1, enable, 0, disable
UINT_T card; // audio card num
TKL_AI_CHN_E ai_chn; // audio input channel
TKL_AUDIO_SAMPLE_E sample; // sample
TKL_AUDIO_DATABITS_E datebits; // datebit
TKL_AUDIO_CHANNEL_E channel; // channel num
TKL_MEDIA_CODEC_TYPE_E codectype; // codec type
INT32_T is_softcodec; // 1, soft encode, 0, hardware encode
UINT_T fps; // frame per second, suggest 25
INT32_T mic_volume; // mic volume, [0, 100]
INT32_T spk_volume; // spk volume, [0, 100]
INT32_T spk_volume_offset; // spk volume offset, for adapting different speakers. The default value is 0, [0, 100]
INT32_T spk_gpio; // spk amplifier pin number, <0, no amplifier
INT32_T spk_gpio_polarity; // pin polarity, 0 high enable, 1 low enable
void* padta;
} TKL_AUDIO_CONFIG_T; // audio config
#define TAL_AUDIO_CONFIG_T TKL_AUDIO_CONFIG_T
typedef struct {
TKL_MEDIA_FRAME_TYPE_E type; // frame type
CHAR_T* pbuf; // buffer
UINT_T buf_size; // buffer size
UINT_T used_size; // used buffer
UINT64_T pts; // sdk pts
UINT64_T timestamp; // system utc time, unit: ms
TKL_MEDIA_CODEC_TYPE_E codectype; // codec type
TKL_AUDIO_SAMPLE_E sample; // sample
TKL_AUDIO_DATABITS_E datebits; // date bit
TKL_AUDIO_CHANNEL_E channel; // channel num
UINT_T seq; // frame sequence number
} TKL_AUDIO_FRAME_INFO_T; // audio frame
#define TAL_AUDIO_FRAME_INFO_T TKL_AUDIO_FRAME_INFO_T
typedef enum {
TAL_AI_CMD_VOL, // AI volume
} TAL_AI_CMD_E;
typedef enum {
TAL_AO_CMD_VOL, // AO volume, int, val[0, 100]
} TAL_AO_CMD_E;
The IPC only supports 1-channel AI currently. Video initialization is performed before audio initialization.
/**
* @brief AI init
*
* @param[in] pconfig: audio config
* @param[in] count: count of pconfig
*
* @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
*/
OPERATE_RET tal_ai_init(TAL_AUDIO_CONFIG_T *pconfig, INT32_T count);
/**
* @brief AI start
*
* @param[in] card: card number
* @param[in] chn: channel number
*
* @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
*/
OPERATE_RET tal_ai_start(INT32_T card, INT32_T chn);
/**
* @brief AI stop
*
* @param[in] card: card number
* @param[in] chn: channel number
*
* @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
*/
OPERATE_RET tal_ai_stop(INT32_T card, INT32_T chn);
/**
* @brief AI get frame
*
* @param[in] card: card number
* @param[in] chn: channel number
* @param[out] pframe: audio frame, pframe->pbuf allocated by upper layer application
*
* @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
*/
OPERATE_RET tal_ai_get_frame(INT32_T card, INT32_T chn, TAL_AUDIO_FRAME_INFO_T *pframe);
In the pframe
struct, allocate memory to pbuf
. A memory size of 640 bytes is recommended.
Typically, the volume is set during initialization and remains unchanged after AI starts up.
/**
* @brief AI set
*
* @param[in] card: card number
* @param[in] chn: channel number
* @param[in] cmd
* @param[in] parg
*
* @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
*/
OPERATE_RET tal_ai_set(INT32_T card, INT32_T chn, TAL_AI_CMD_E cmd, VOID *parg);
/**
* @brief AO uninit
*
* @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
*/
OPERATE_RET tal_ai_uninit(VOID);
The IPC only supports 1-channel AO currently.
Certain platforms require a specific sequence for AO and AI initialization. AI is typically initialized before AO.
VOID **handle
is used if you manage multiple AO devices.
/**
* @brief AO init
*
* @param[in] pconfig: audio config
* @param[in] count: config count
*
* @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
*/
OPERATE_RET tal_ao_init(TAL_AUDIO_CONFIG_T *pconfig, INT32_T count, VOID **handle);
/**
* @brief AO start
*
* @param[in] card: card number
* @param[in] chn: channel number
* @param[out] handle: handle of start
*
* @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
*/
OPERATE_RET tal_ao_start(INT32_T card, INT32_T chn, VOID *handle);
OPERATE_RET tal_ao_stop(INT32_T card, INT32_T chn, VOID *handle);
The audio to play must be raw data.
OPERATE_RET tal_ao_put_frame(INT32_T card, INT32_T chn, VOID *handle, TAL_AUDIO_FRAME_INFO_T *pframe);
Set the AO volume after AO initialization.
/**
* @brief AO set
*
* @param[in] card: card number
* @param[in] chn: channel number
* @param[in] handle: the return of start
* @param[in] cmd
* @param[in] parg
*
* @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
*/
OPERATE_RET tal_ao_set(INT32_T card, INT32_T chn, VOID *handle, TAL_AO_CMD_E cmd, VOID *parg);
/**
* @brief AO uninit
*
* @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
*/
OPERATE_RET tal_ao_uninit(VOID *handle);
// Configure parameters.
pinfo->audio[0].enable = 1;
pinfo->audio[0].ai_chn = 0;
pinfo->audio[0].sample = 8000;
pinfo->audio[0].datebits = 16;
pinfo->audio[0].channel = 0;
pinfo->audio[0].codectype = 101;
pinfo->audio[0].fps = 25;
pinfo->audio[0].mic_volume = 80;
pinfo->audio[0].spk_volume = 80;
pinfo->audio[0].spk_gpio = 15;
pinfo->audio[0].spk_gpio_polarity = 0;
// Initialize:
ret = tal_ai_init(pinfo->audio, 1);
if (OPRT_OK != ret) {
TYERROR("tal_ai_init failed, %d\n", ret);
return -1;
}
ret = tal_ao_init(pinfo->audio, 1, NULL);
if (OPRT_OK != ret) {
TYERROR("tal_ao_init failed, %d\n", ret);
return -1;
}
ret = ty_sys_gpio_init(&spk_gpio);
if (OPRT_OK != ret) {
TYERROR("tycam_gpio_init failed, %d\n", ret);
return -1;
}
// Audio input:
ret = tal_ai_start(0, 0);
if (OPRT_OK != ret) {
TYERROR("ty_dev_ai_start failed, %d\n", ret);
return -1;
}
TAL_AUDIO_FRAME_INFO_T frame = {0};
frame.pbuf = (char *)malloc(640);
frame.buf_size = 640;
ret = tal_ai_get_frame(0, 0, &frame);
if (OPRT_OK != ret) {
// TYERROR("get frame failed type:%d\n", type);
return -1;
}
ret = tuya_g711_encode(TUYA_G711_MU_LAW, (unsigned short *)buf, used_size, tmpBuf, &outLen);
...
// Audio output:
ret = tal_ao_start(0, 0, handle);
...
ret = tal_gpio_write(spk_gpio, TAL_GPIO_LEVEL_HIGH);
...
ret = tuya_g711_decode(g711Type, (unsigned short *)pbuf, used_size, buf, &outLen);
...
ret = tal_ao_put_frame(0, 0, NULL, pFrame);
...
ret = tal_gpio_write(spk_gpio, TAL_GPIO_LEVEL_HIGH);
...
ret = tal_ao_stop(spk_gpio, TAL_GPIO_LEVEL_HIGH);
...
// Adjust playback volume:
ret = tal_ao_set(0, 0, NULL, TAL_AO_CMD_VOL, &spk_volume);
Video initialization is performed before audio initialization.
For devices that support video input, video initialization is performed before AI initialization. This is because the native SDK of the chip platform needs to allocate memory for the media memory zone (MMZ). MMZ memory is allocated during video initialization.
A memory of 640 bytes is recommended. The common encoding format for integration with third-party devices is G.711 or PCM 8 kHz. PCM audio per second is 16,000 bytes (8,000 × 16 ÷ 8 = 16,000). Assuming a frame rate of 25 fps, each frame consists of 640 bytes (16,000 ÷ 25 = 640). The audio encoded with G.711 is half the length of the PCM data, so it requires a length of 320 bytes.
Is this page helpful?
YesFeedbackIs this page helpful?
YesFeedback