语音交互

更新时间:2025-04-27 02:31:19下载pdf

概述

语音交互是指设备通过语音与大模型进行交互,大模型识别语音后返回信息。

流程

应用SDK云端语音交互开始语音输入语音上传到云端语音交互结束云端返回信息输出应用SDK云端

接口

初始化

typedef struct {
    /** recv event */
    OPERATE_RET(*event_cb)(AI_EVENT_TYPE type);
    /** recv media attr */
    OPERATE_RET(*media_attr_cb)(AI_BIZ_ATTR_INFO_T *attr);
    /** recv media data */
    OPERATE_RET(*media_data_cb)(AI_PACKET_PT type, CHAR_T *data, UINT_T len);
    /** recv text stream */
    OPERATE_RET(*text_cb)(AI_TEXT_TYPE_E type, CHAR_T *data, INT_T len);
    /** recv alert */
    OPERATE_RET(*alert_cb)(AI_ALERT_TYPE_E type);
} AI_OUTPUT_CBS_T;
typedef struct {
    UINT32_T biz_code;
    AI_ATTR_BASE_T attr;
    AI_INPUT_SEND_T biz_get[AI_MAX_SESSION_ID_NUM];
    AI_OUTPUT_CBS_T output;
} AI_AGENT_CFG_T;
/**
 * @brief ai agent init
 *
 * @param[in] cfg agent cfg
 *
 * @return OPRT_OK on success. Others on error, please refer to tuya_error_code.h
 */
OPERATE_RET tuya_ai_agent_init(AI_AGENT_CFG_T *cfg);

输入

交互开始

/**
 * @brief ai input start
*
 */
VOID tuya_ai_input_start(VOID);

语音输入

/**
 * @brief ai image input
 *
 * @param[in] timestamp image timestamp
 * @param[in] data image data
 * @param[in] len image data length
 *
 * @return OPRT_OK on success. Others on error, please refer to tuya_error_code.h
 */
OPERATE_RET tuya_ai_audio_input(UINT64_T timestamp, UINT64_T pts, BYTE_T *data, UINT_T len);

交互结束

/**
 * @brief ai input stop
*
 */
VOID tuya_ai_input_stop(VOID);

输出

回调执行

通过 tuya_ai_agent_init 注册的 Output 接口执行响应的输出。

返回语音

云端返回语音 tts 流,设备收到后通过播放器播放。

/** recv media data */
OPERATE_RET(*media_data_cb)(AI_PACKET_PT type, CHAR_T *data, UINT_T len);

示例


// output cb
OPERATE_RET __ai_agent_media_data_cb(AI_PACKET_PT type, CHAR_T *data, UINT_T len)
{
    PR_DEBUG("===media data type: %d", type);
    OPERATE_RET rt = OPRT_OK;
    if(type == AI_PT_AUDIO) {
        rt = tkl_player_write_stream((UINT8_T *)data, len);
    }
    return rt;
}

// input audio from mic put cb 
STATIC OPERATE_RET _ai_input_mic_init(AI_DEMO_INPUT_CFG_T *cfg)
{
    OPERATE_RET rt = OPRT_OK;
    TKL_AUDIO_CONFIG_T config = {0};
    AI_BOARD_CFG_T board_cfg = {0};

    if((cfg->ai_trigger_mode == AI_TRIGGER_MODE_ONE_SHOT) ||
        (cfg->ai_trigger_mode == AI_TRIGGER_MODE_FREE)) {
            board_cfg.vad = TRUE;
    } else {
        board_cfg.vad = FALSE;
    }
    ai_board_init(&board_cfg);

    config.enable = cfg->ai_trigger_mode == AI_TRIGGER_MODE_FREE ? 1 : 0;
    config.ai_chn = 0;
    config.sample = AI_AUDIO_INPUT_SAMPLE_RATE;
    config.spk_sample = 16000;
    config.datebits = AI_AUDIO_INPUT_SAMPLE_BITS;
    config.channel = AI_AUDIO_INPUT_CHANNEL;
    config.codectype = TKL_CODEC_AUDIO_PCM;
    config.card = TKL_AUDIO_TYPE_BOARD;
    config.spk_gpio = cfg->spk_en_pin;
    config.spk_gpio_polarity = TUYA_GPIO_LEVEL_LOW;
    config.put_cb = __ai_input_audio;

    TUYA_CALL_ERR_RETURN(tkl_ai_init(&config, 0));
    TUYA_CALL_ERR_RETURN(tkl_ai_start(0, 0));

    return rt;
}

STATIC OPERATE_RET __ai_demo_init(VOID)
{
    OPERATE_RET rt = OPRT_OK;
    AI_AGENT_CFG_T ai_agent_cfg = {0};
    ai_agent_cfg.biz_code = TY_BIZCODE_AI_CHAT,
    ai_agent_cfg.output.text_cb = __ai_agent_media_data_cb;
    TUYA_CALL_ERR_RETURN(tuya_ai_agent_init(&ai_agent_cfg));
    _ai_input_mic_init(NULL);
    return rt;
}

支持与帮助

在开发过程遇到问题,可以到 TuyaOS 开发者论坛 联网单品开发版块 发帖咨询。