Audio Input and Output

Last Updated on : 2023-08-09 09:25:02download

This topic describes how to use the audio input and output APIs in the TuyaOS Kernel Layer (TKL) and TuyaOS Abstraction Layer (TAL).

  • Audio input (AI): Capture sound with a microphone and encode it into audio data. Typically, the frame rate is 25 frames per second.

  • Audio output (AO): Play audio data on a speaker. The audio can come from prompts and two-way chat.

Implementation

Audio input

The captured raw audio data is PCM. To encode raw data to formats such as G.711u and G.711a, process it in the application layer.

Audio Input and Output

Audio output

After the audio is decoded to raw data in the application layer, call the playback API to output the audio. Before output, switch on the power amplifier pin of the speaker.

Audio Input and Output

Data structure

Struct of initial parameters

typedef struct {
    UINT_T enable;                     // 1, enable, 0, disable
    UINT_T card;                       // audio card num
    TKL_AI_CHN_E ai_chn;               // audio input channel
    TKL_AUDIO_SAMPLE_E sample;         // sample
    TKL_AUDIO_DATABITS_E datebits;     // datebit
    TKL_AUDIO_CHANNEL_E channel;       // channel num
    TKL_MEDIA_CODEC_TYPE_E codectype;  // codec type
    INT32_T is_softcodec;              // 1, soft encode, 0, hardware encode
    UINT_T fps;                        // frame per second, suggest 25
    INT32_T mic_volume;                // mic volume, [0, 100]
    INT32_T spk_volume;                // spk volume, [0, 100]
    INT32_T spk_volume_offset;         // spk volume offset, for adapting different speakers. The default value is 0, [0, 100]
    INT32_T spk_gpio;                  // spk amplifier pin number, <0, no amplifier
    INT32_T spk_gpio_polarity;         // pin polarity, 0 high enable, 1 low enable
    void* padta;
} TKL_AUDIO_CONFIG_T;                  // audio config
#define TAL_AUDIO_CONFIG_T             TKL_AUDIO_CONFIG_T

Struct of audio data

typedef struct {
    TKL_MEDIA_FRAME_TYPE_E type;         // frame type
    CHAR_T* pbuf;                       // buffer
    UINT_T buf_size;                    // buffer size
    UINT_T used_size;                   // used buffer
    UINT64_T pts;                       // sdk pts
    UINT64_T timestamp;                 // system utc time, unit: ms
    TKL_MEDIA_CODEC_TYPE_E codectype;    // codec type
    TKL_AUDIO_SAMPLE_E sample;          // sample
    TKL_AUDIO_DATABITS_E datebits;      // date bit
    TKL_AUDIO_CHANNEL_E channel;        // channel num
    UINT_T seq;                         // frame sequence number
} TKL_AUDIO_FRAME_INFO_T;               // audio frame
#define TAL_AUDIO_FRAME_INFO_T TKL_AUDIO_FRAME_INFO_T

Control commands

typedef enum {
    TAL_AI_CMD_VOL, // AI volume
} TAL_AI_CMD_E;

typedef enum {
    TAL_AO_CMD_VOL, // AO volume, int, val[0, 100]
} TAL_AO_CMD_E;

Audio input

Initialize AI

The IPC only supports 1-channel AI currently. Video initialization is performed before audio initialization.

/**
 * @brief AI init
 *
 * @param[in] pconfig: audio config
 * @param[in] count: count of pconfig
 *
 * @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
 */
OPERATE_RET tal_ai_init(TAL_AUDIO_CONFIG_T *pconfig, INT32_T count);

Start AI

/**
 * @brief AI start
 *
 * @param[in] card: card number
 * @param[in] chn: channel number
 *
 * @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
 */
OPERATE_RET tal_ai_start(INT32_T card, INT32_T chn);

Stop AI

/**
 * @brief AI stop
 *
 * @param[in] card: card number
 * @param[in] chn: channel number
 *
 * @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
 */
OPERATE_RET tal_ai_stop(INT32_T card, INT32_T chn);

Get audio frames

/**
 * @brief AI get frame
 *
 * @param[in] card: card number
 * @param[in] chn: channel number
 * @param[out] pframe: audio frame, pframe->pbuf allocated by upper layer application
 *
 * @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
 */
OPERATE_RET tal_ai_get_frame(INT32_T card, INT32_T chn, TAL_AUDIO_FRAME_INFO_T *pframe);

In the pframe struct, allocate memory to pbuf. A memory size of 640 bytes is recommended.

Set AI

Typically, the volume is set during initialization and remains unchanged after AI starts up.

/**
 * @brief AI set
 *
 * @param[in] card: card number
 * @param[in] chn: channel number
 * @param[in] cmd
 * @param[in] parg
 *
 * @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
 */
OPERATE_RET tal_ai_set(INT32_T card, INT32_T chn, TAL_AI_CMD_E cmd, VOID *parg);

Deinitialize AI

/**
 * @brief AO uninit
 *
 * @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
 */
OPERATE_RET tal_ai_uninit(VOID);

Audio output

Initialize AO

  • The IPC only supports 1-channel AO currently.

  • Certain platforms require a specific sequence for AO and AI initialization. AI is typically initialized before AO.

  • VOID **handle is used if you manage multiple AO devices.

/**
 * @brief AO init
 *
 * @param[in] pconfig: audio config
 * @param[in] count: config count
 *
 * @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
 */
OPERATE_RET tal_ao_init(TAL_AUDIO_CONFIG_T *pconfig, INT32_T count, VOID **handle);

Start playing audio

/**
 * @brief AO start
 *
 * @param[in] card: card number
 * @param[in] chn: channel number
 * @param[out] handle: handle of start
 *
 * @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
 */
OPERATE_RET tal_ao_start(INT32_T card, INT32_T chn, VOID *handle);

Stop playing audio

OPERATE_RET tal_ao_stop(INT32_T card, INT32_T chn, VOID *handle);

Play audio frames

The audio to play must be raw data.

OPERATE_RET tal_ao_put_frame(INT32_T card, INT32_T chn, VOID *handle, TAL_AUDIO_FRAME_INFO_T *pframe);

Set playback

Set the AO volume after AO initialization.

/**
 * @brief AO set
 *
 * @param[in] card: card number
 * @param[in] chn: channel number
 * @param[in] handle: the return of start
 * @param[in] cmd
 * @param[in] parg
 *
 * @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
 */
OPERATE_RET tal_ao_set(INT32_T card, INT32_T chn, VOID *handle, TAL_AO_CMD_E cmd, VOID *parg);

Deinitialize AO

/**
 * @brief AO uninit
 *
 * @return OPRT_OK on success. Others on error, please refer to tkl_error_code.h
 */
OPERATE_RET tal_ao_uninit(VOID *handle);

Example

// Configure parameters.
pinfo->audio[0].enable = 1;
pinfo->audio[0].ai_chn = 0;
pinfo->audio[0].sample = 8000;
pinfo->audio[0].datebits = 16;
pinfo->audio[0].channel = 0;
pinfo->audio[0].codectype = 101;
pinfo->audio[0].fps = 25;
pinfo->audio[0].mic_volume = 80;
pinfo->audio[0].spk_volume = 80;
pinfo->audio[0].spk_gpio = 15;
pinfo->audio[0].spk_gpio_polarity = 0;

// Initialize:
ret = tal_ai_init(pinfo->audio, 1);
if (OPRT_OK != ret) {
    TYERROR("tal_ai_init failed, %d\n", ret);
    return -1;
}

ret = tal_ao_init(pinfo->audio, 1, NULL);
if (OPRT_OK != ret) {
    TYERROR("tal_ao_init failed, %d\n", ret);
    return -1;
}

ret = ty_sys_gpio_init(&spk_gpio);
if (OPRT_OK != ret) {
    TYERROR("tycam_gpio_init failed, %d\n", ret);
    return -1;
}

// Audio input:
ret = tal_ai_start(0, 0);
if (OPRT_OK != ret) {
    TYERROR("ty_dev_ai_start failed, %d\n", ret);
    return -1;
}

TAL_AUDIO_FRAME_INFO_T frame = {0};
frame.pbuf = (char *)malloc(640);
frame.buf_size = 640;
ret = tal_ai_get_frame(0, 0, &frame);
if (OPRT_OK != ret) {
    // TYERROR("get frame failed type:%d\n", type);
    return -1;
}
ret = tuya_g711_encode(TUYA_G711_MU_LAW, (unsigned short *)buf, used_size, tmpBuf, &outLen);
...

// Audio output:
ret = tal_ao_start(0, 0, handle);
...
ret = tal_gpio_write(spk_gpio, TAL_GPIO_LEVEL_HIGH);
...
ret = tuya_g711_decode(g711Type, (unsigned short *)pbuf, used_size, buf, &outLen);
...
ret = tal_ao_put_frame(0, 0, NULL, pFrame);
...
ret = tal_gpio_write(spk_gpio, TAL_GPIO_LEVEL_HIGH);
...
ret = tal_ao_stop(spk_gpio, TAL_GPIO_LEVEL_HIGH);
...

// Adjust playback volume:
ret = tal_ao_set(0, 0, NULL, TAL_AO_CMD_VOL, &spk_volume);

Things to note

Video initialization is performed before audio initialization.

For devices that support video input, video initialization is performed before AI initialization. This is because the native SDK of the chip platform needs to allocate memory for the media memory zone (MMZ). MMZ memory is allocated during video initialization.

FAQs

How much memory should be allocated for audio frame capture?

A memory of 640 bytes is recommended. The common encoding format for integration with third-party devices is G.711 or PCM 8 kHz. PCM audio per second is 16,000 bytes (8,000 × 16 ÷ 8 = 16,000). Assuming a frame rate of 25 fps, each frame consists of 640 bytes (16,000 ÷ 25 = 640). The audio encoded with G.711 is half the length of the PCM data, so it requires a length of 320 bytes.