Audio Interaction

Last Updated on : 2025-05-06 02:56:13download

Overview

Audio interaction refers to the process where a device communicates with a large AI model via audio. The model processes the input audio and returns a response.

How it works

ApplicationSDKCloudStart audio interactionInput the audioUpload audio to the cloudComplete audio interactionCloud returns resultsOutput the informationApplicationSDKCloud

APIs

Initialization

typedef struct {
    /** recv event */
    OPERATE_RET(*event_cb)(AI_EVENT_TYPE type);
    /** recv media attr */
    OPERATE_RET(*media_attr_cb)(AI_BIZ_ATTR_INFO_T *attr);
    /** recv media data */
    OPERATE_RET(*media_data_cb)(AI_PACKET_PT type, CHAR_T *data, UINT_T len);
    /** recv text stream */
    OPERATE_RET(*text_cb)(AI_TEXT_TYPE_E type, CHAR_T *data, INT_T len);
    /** recv alert */
    OPERATE_RET(*alert_cb)(AI_ALERT_TYPE_E type);
} AI_OUTPUT_CBS_T;
typedef struct {
    UINT32_T biz_code;
    AI_ATTR_BASE_T attr;
    AI_INPUT_SEND_T biz_get[AI_MAX_SESSION_ID_NUM];
    AI_OUTPUT_CBS_T output;
} AI_AGENT_CFG_T;
/**
 * @brief ai agent init
 *
 * @param[in] cfg agent cfg
 *
 * @return OPRT_OK on success. Others on error, please refer to tuya_error_code.h
 */
OPERATE_RET tuya_ai_agent_init(AI_AGENT_CFG_T *cfg);

Input

Start interaction

/**
 * @brief ai input start
 *
 */
VOID tuya_ai_input_start(VOID);

Input audio

/**
 * @brief ai audio input
 *
 * @param[in] timestamp audio timestamp
 * @param[in] data audio data
 * @param[in] len audio data length
 *
 * @return OPRT_OK on success. Others on error, please refer to tuya_error_code.h
 */
OPERATE_RET tuya_ai_audio_input(UINT64_T timestamp, UINT64_T pts, BYTE_T *data, UINT_T len);

Complete interaction

/**
 * @brief ai input stop
 *
 */
VOID tuya_ai_input_stop(VOID);

Output

Invoke callback

Output the response by invoking the output interface registered by tuya_ai_agent_init.

Return audio

The cloud streams text-to-speech (TTS) audio to the device, and the device plays it with a local player.

/** recv media data */
OPERATE_RET(*media_data_cb)(AI_PACKET_PT type, CHAR_T *data, UINT_T len);

Example


// Output cb
OPERATE_RET __ai_agent_media_data_cb(AI_PACKET_PT type, CHAR_T *data, UINT_T len)
{
    PR_DEBUG("===media data type: %d", type);
    OPERATE_RET rt = OPRT_OK;
    if(type == AI_PT_AUDIO) {
        rt = tkl_player_write_stream((UINT8_T *)data, len);
    }
    return rt;
}

// Input audio from mic pickup cb
STATIC OPERATE_RET _ai_input_mic_init(AI_DEMO_INPUT_CFG_T *cfg)
{
    OPERATE_RET rt = OPRT_OK;
    TKL_AUDIO_CONFIG_T config = {0};
    AI_BOARD_CFG_T board_cfg = {0};

    if((cfg->ai_trigger_mode == AI_TRIGGER_MODE_ONE_SHOT) ||
        (cfg->ai_trigger_mode == AI_TRIGGER_MODE_FREE)) {
            board_cfg.vad = TRUE;
    } else {
        board_cfg.vad = FALSE;
    }
    ai_board_init(&board_cfg);

    config.enable = cfg->ai_trigger_mode == AI_TRIGGER_MODE_FREE ? 1 : 0;
    config.ai_chn = 0;
    config.sample = AI_AUDIO_INPUT_SAMPLE_RATE;
    config.spk_sample = 16000;
    config.datebits = AI_AUDIO_INPUT_SAMPLE_BITS;
    config.channel = AI_AUDIO_INPUT_CHANNEL;
    config.codectype = TKL_CODEC_AUDIO_PCM;
    config.card = TKL_AUDIO_TYPE_BOARD;
    config.spk_gpio = cfg->spk_en_pin;
    config.spk_gpio_polarity = TUYA_GPIO_LEVEL_LOW;
    config.put_cb = __ai_input_audio;

    TUYA_CALL_ERR_RETURN(tkl_ai_init(&config, 0));
    TUYA_CALL_ERR_RETURN(tkl_ai_start(0, 0));

    return rt;
}

STATIC OPERATE_RET __ai_demo_init(VOID)
{
    OPERATE_RET rt = OPRT_OK;
    AI_AGENT_CFG_T ai_agent_cfg = {0};
    ai_agent_cfg.output.media_data_cb = __ai_agent_media_data_cb;
    TUYA_CALL_ERR_RETURN(tuya_ai_agent_init(&ai_agent_cfg));
    _ai_input_video_init(NULL);
    return rt;
}

Support and help

If you have any problems with TuyaOS development, you can post your questions in the Tuya Developer Forum.