视频交互

更新时间:2025-04-25 10:38:17下载pdf

概述

视频交互是指设备通过视频与大模型进行交互,大模型识别语音后返回信息。

流程

应用SDK云端视频交互开始视频输入视频上传到云端视频交互结束云端返回信息输出应用SDK云端

接口

初始化

typedef enum {
    /* Power-on boot prompt */
    AT_POWER_ON,
    /* Not yet configured for network, please configure network first */
    AT_NOT_ACTIVE,
    /* Entering network configuration state, starting network configuration */
    AT_NETWORK_CFG,
    /* Network connection successful */
    AT_NETWORK_CONNECTED,
    /* Network connection failed, retrying */
    AT_NETWORK_FAIL,
    /* Network disconnected */
    AT_NETWORK_DISCONNECT,
    /* Low battery */
    AT_BATTERY_LOW,
    /* Please say again */
    AT_PLEASE_AGAIN,
    AT_MAX
} AI_ALERT_TYPE_E;

typedef BYTE_T AI_TEXT_TYPE_E;
#define AI_TEXT_ASR 0x00
#define AI_TEXT_NLG 0x01
#define AI_TEXT_SKILL 0x02

typedef struct {
    /** recv event */
    OPERATE_RET(*event_cb)(AI_EVENT_TYPE type);
    /** recv media attr */
    OPERATE_RET(*media_attr_cb)(AI_BIZ_ATTR_INFO_T *attr);
    /** recv media data */
    OPERATE_RET(*media_data_cb)(AI_PACKET_PT type, CHAR_T *data, UINT_T len);
    /** recv text stream */
    OPERATE_RET(*text_cb)(AI_TEXT_TYPE_E type, CHAR_T *data, INT_T len);
    /** recv alert */
    OPERATE_RET(*alert_cb)(AI_ALERT_TYPE_E type);
} AI_OUTPUT_CBS_T;
typedef struct {
    UINT32_T biz_code;
    AI_ATTR_BASE_T attr;
    AI_INPUT_SEND_T biz_get[AI_MAX_SESSION_ID_NUM];
    AI_OUTPUT_CBS_T output;
} AI_AGENT_CFG_T;
/**
 * @brief ai agent init
 *
 * @param[in] cfg agent cfg
 *
 * @return OPRT_OK on success. Others on error, please refer to tuya_error_code.h
 */
OPERATE_RET tuya_ai_agent_init(AI_AGENT_CFG_T *cfg);

输入

交互开始

/**
 * @brief ai input start
*
 */
VOID tuya_ai_input_start(VOID);

视频输入

/**
 * @brief ai video input
 *
 * @param[in] timestamp video timestamp
 * @param[in] pts video pts
 * @param[in] data video data
 * @param[in] len video data length
 *
 * @return OPRT_OK on success. Others on error, please refer to tuya_error_code.h
 */
OPERATE_RET tuya_ai_video_input(UINT64_T timestamp, UINT64_T pts, BYTE_T *data, UINT_T len);

交互结束

/**
 * @brief ai input stop
*
 */
VOID tuya_ai_input_stop(VOID);

输出

回调执行

通过 tuya_ai_agent_init 注册的 Output 接口执行响应的输出。

返回语音

云端返回语音 tts 流,设备收到后通过播放器播放。

/** recv media data */
OPERATE_RET(*media_data_cb)(AI_PACKET_PT type, CHAR_T *data, UINT_T len);

示例

/** recv media data */
OPERATE_RET __ai_agent_media_data_cb(AI_PACKET_PT type, CHAR_T *data, UINT_T len)
{
    PR_DEBUG("===media data type: %d", type);
    OPERATE_RET rt = OPRT_OK;
    if(type == AI_PT_AUDIO) {
        rt = tkl_player_write_stream((UINT8_T *)data, len);
    }
    return rt;
}
STATIC INT_T __ai_input_video(TKL_VENC_FRAME_T *pframe)
{
    OPERATE_RET rt = OPRT_OK;

    if(ai_input_ctx.recorder_state != RECORDER_STATUS_PROC) {
        return 1;
    }

    if(pframe->pbuf == NULL || pframe->buf_size == 0) {
        return 1;
    }
    if (pframe->frametype != TKL_VIDEO_I_FRAME) {
        return 0;
    }
    rt = tuya_ai_video_input(pframe->timestamp, pframe->pts, pframe->pbuf, pframe->buf_size);
    if(rt != OPRT_OK) {
        PR_ERR("ai video input failed %d", rt);
        return 1;
}
static void __ai_video_uvc_init(void)
{
    OPERATE_RET rt = OPRT_OK;
    // lcd
    video_disp_info.width = 320;
    video_disp_info.height = 480;
    video_disp_info.fps = 15;
    video_disp_info.format = TKL_DISP_PIXEL_FMT_RGB565;
    video_disp_info.rotation = TKL_DISP_ROTATION_0;

    video_disp_info.ll_ctrl.enable_lcd_pipeline = 1;

    video_disp_info.ll_ctrl.bl.io              = TUYA_GPIO_NUM_9;
    video_disp_info.ll_ctrl.bl.mode            = TKL_DISP_BL_GPIO;
    video_disp_info.ll_ctrl.bl.active_level    = TUYA_GPIO_LEVEL_HIGH;

    video_disp_info.ll_ctrl.spi.clk            = TUYA_GPIO_NUM_49;
    video_disp_info.ll_ctrl.spi.csx            = TUYA_GPIO_NUM_48;
    video_disp_info.ll_ctrl.spi.sda            = TUYA_GPIO_NUM_50;
    video_disp_info.ll_ctrl.spi.rst_mode       = TKL_DISP_GPIO_RESET;
    video_disp_info.ll_ctrl.spi.rst            = TUYA_GPIO_NUM_53;

    video_disp_info.ll_ctrl.power_ctrl_pin     = TUYA_GPIO_NUM_56;     // no lcd ldo
    video_disp_info.ll_ctrl.power_active_level = TUYA_GPIO_LEVEL_HIGH;
    video_disp_info.ll_ctrl.rgb_mode           = TKL_DISP_PIXEL_FMT_RGB565;

    video_disp_info.ll_ctrl.tp.tp_i2c_clk      = TUYA_GPIO_NUM_13;
    video_disp_info.ll_ctrl.tp.tp_i2c_sda      = TUYA_GPIO_NUM_15;
    video_disp_info.ll_ctrl.tp.tp_rst          = TUYA_GPIO_NUM_54;
    video_disp_info.ll_ctrl.tp.tp_intr         = TUYA_GPIO_NUM_55;

    video_disp_info.ll_ctrl.init_param         = NULL;

    // 拉高 lcd rst 引脚
    TUYA_GPIO_BASE_CFG_T gpio_cfg = {
        .direct = TUYA_GPIO_OUTPUT,
        .mode = TUYA_GPIO_PULLUP,
        .level = TUYA_GPIO_LEVEL_HIGH,
    };
    tkl_gpio_init(TUYA_GPIO_NUM_53, &gpio_cfg);
    tkl_gpio_write(TUYA_GPIO_NUM_53, 1);

    memset(video_disp_info.ll_ctrl.ic_name, 0, IC_NAME_LENGTH);
    int len = (IC_NAME_LENGTH < sizeof("T35P128CQ"))? IC_NAME_LENGTH: strlen("T35P128CQ");
    memcpy(video_disp_info.ll_ctrl.ic_name, "T35P128CQ", len);

    video_lcd.device_info = &video_disp_info;

    tkl_disp_init(&video_lcd, NULL);

    tkl_disp_set_brightness(NULL, 100);

    // uvc
    uint8_t uvc_status = 0xff;
    TKL_VI_CONFIG_T vi_config;
    TKL_VI_EXT_CONFIG_T ext_conf;

    ext_conf.type = TKL_VI_EXT_CONF_CAMERA;
    ext_conf.camera.camera_type = TKL_VI_CAMERA_TYPE_UVC;
    ext_conf.camera.fmt = TKL_CODEC_VIDEO_MJPEG;
    ext_conf.camera.power_pin = TUYA_GPIO_NUM_28;
    ext_conf.camera.active_level = TUYA_GPIO_LEVEL_HIGH;
    vi_config.isp.width = 800;
    vi_config.isp.height = 480;
    vi_config.isp.fps = 15;
    vi_config.pdata = &ext_conf;
    tkl_vi_init(&vi_config, 0);

    TKL_VENC_CONFIG_T h264_config = {0};
    h264_config.enable_h264_pipeline = 1;
    h264_config.put_cb = __ai_input_video;
    TUYA_CALL_ERR_RETURN(tkl_venc_init(0, &h264_config, 0));
}

STATIC OPERATE_RET _ai_input_video_init(AI_DEMO_INPUT_CFG_T *cfg)
{
    OPERATE_RET rt = OPRT_OK;
    // __ai_video_dvp_init(); // dvp
    __ai_video_uvc_init(); // uvc
    return rt;
}
STATIC OPERATE_RET __ai_demo_init(VOID)
{
    OPERATE_RET rt = OPRT_OK;
    AI_AGENT_CFG_T ai_agent_cfg = {0};
    ai_agent_cfg.biz_code = TY_BIZCODE_AI_CHAT,
    ai_agent_cfg.output.text_cb = __ai_agent_media_data_cb;
    TUYA_CALL_ERR_RETURN(tuya_ai_agent_init(&ai_agent_cfg));
    _ai_input_video_init(NULL);
    return rt;
}

支持与帮助

在开发过程遇到问题,可以到 TuyaOS 开发者论坛 联网单品开发版块 发帖咨询。