Last Updated on : 2025-05-06 02:56:13download
Audio interaction refers to the process where a device communicates with a large AI model via audio. The model processes the input audio and returns a response.
typedef struct {
/** recv event */
OPERATE_RET(*event_cb)(AI_EVENT_TYPE type);
/** recv media attr */
OPERATE_RET(*media_attr_cb)(AI_BIZ_ATTR_INFO_T *attr);
/** recv media data */
OPERATE_RET(*media_data_cb)(AI_PACKET_PT type, CHAR_T *data, UINT_T len);
/** recv text stream */
OPERATE_RET(*text_cb)(AI_TEXT_TYPE_E type, CHAR_T *data, INT_T len);
/** recv alert */
OPERATE_RET(*alert_cb)(AI_ALERT_TYPE_E type);
} AI_OUTPUT_CBS_T;
typedef struct {
UINT32_T biz_code;
AI_ATTR_BASE_T attr;
AI_INPUT_SEND_T biz_get[AI_MAX_SESSION_ID_NUM];
AI_OUTPUT_CBS_T output;
} AI_AGENT_CFG_T;
/**
* @brief ai agent init
*
* @param[in] cfg agent cfg
*
* @return OPRT_OK on success. Others on error, please refer to tuya_error_code.h
*/
OPERATE_RET tuya_ai_agent_init(AI_AGENT_CFG_T *cfg);
/**
* @brief ai input start
*
*/
VOID tuya_ai_input_start(VOID);
/**
* @brief ai audio input
*
* @param[in] timestamp audio timestamp
* @param[in] data audio data
* @param[in] len audio data length
*
* @return OPRT_OK on success. Others on error, please refer to tuya_error_code.h
*/
OPERATE_RET tuya_ai_audio_input(UINT64_T timestamp, UINT64_T pts, BYTE_T *data, UINT_T len);
/**
* @brief ai input stop
*
*/
VOID tuya_ai_input_stop(VOID);
Output the response by invoking the output interface registered by tuya_ai_agent_init
.
The cloud streams text-to-speech (TTS) audio to the device, and the device plays it with a local player.
/** recv media data */
OPERATE_RET(*media_data_cb)(AI_PACKET_PT type, CHAR_T *data, UINT_T len);
// Output cb
OPERATE_RET __ai_agent_media_data_cb(AI_PACKET_PT type, CHAR_T *data, UINT_T len)
{
PR_DEBUG("===media data type: %d", type);
OPERATE_RET rt = OPRT_OK;
if(type == AI_PT_AUDIO) {
rt = tkl_player_write_stream((UINT8_T *)data, len);
}
return rt;
}
// Input audio from mic pickup cb
STATIC OPERATE_RET _ai_input_mic_init(AI_DEMO_INPUT_CFG_T *cfg)
{
OPERATE_RET rt = OPRT_OK;
TKL_AUDIO_CONFIG_T config = {0};
AI_BOARD_CFG_T board_cfg = {0};
if((cfg->ai_trigger_mode == AI_TRIGGER_MODE_ONE_SHOT) ||
(cfg->ai_trigger_mode == AI_TRIGGER_MODE_FREE)) {
board_cfg.vad = TRUE;
} else {
board_cfg.vad = FALSE;
}
ai_board_init(&board_cfg);
config.enable = cfg->ai_trigger_mode == AI_TRIGGER_MODE_FREE ? 1 : 0;
config.ai_chn = 0;
config.sample = AI_AUDIO_INPUT_SAMPLE_RATE;
config.spk_sample = 16000;
config.datebits = AI_AUDIO_INPUT_SAMPLE_BITS;
config.channel = AI_AUDIO_INPUT_CHANNEL;
config.codectype = TKL_CODEC_AUDIO_PCM;
config.card = TKL_AUDIO_TYPE_BOARD;
config.spk_gpio = cfg->spk_en_pin;
config.spk_gpio_polarity = TUYA_GPIO_LEVEL_LOW;
config.put_cb = __ai_input_audio;
TUYA_CALL_ERR_RETURN(tkl_ai_init(&config, 0));
TUYA_CALL_ERR_RETURN(tkl_ai_start(0, 0));
return rt;
}
STATIC OPERATE_RET __ai_demo_init(VOID)
{
OPERATE_RET rt = OPRT_OK;
AI_AGENT_CFG_T ai_agent_cfg = {0};
ai_agent_cfg.output.media_data_cb = __ai_agent_media_data_cb;
TUYA_CALL_ERR_RETURN(tuya_ai_agent_init(&ai_agent_cfg));
_ai_input_video_init(NULL);
return rt;
}
If you have any problems with TuyaOS development, you can post your questions in the Tuya Developer Forum.
Is this page helpful?
YesFeedbackIs this page helpful?
YesFeedback