Video Interaction

Last Updated on : 2025-05-06 09:34:03download

Overview

Video interaction refers to the process where a device communicates with a large AI model via video. The model analyzes the video content and returns voice responses.

How it works

ApplicationSDKCloudStart video interactionInput videoUpload video to the cloudComplete video interactionCloud returns resultsOutput the informationApplicationSDKCloud

APIs

Initialization

typedef enum {
    /* Power-on boot prompt */
    AT_POWER_ON,
    /* Not yet paired, please pair the device first */
    AT_NOT_ACTIVE,
    /* Entering pairing state, starting pairing */
    AT_NETWORK_CFG,
    /* Network connection successful */
    AT_NETWORK_CONNECTED,
    /* Network connection failed, retrying */
    AT_NETWORK_FAIL,
    /* Network disconnected */
    AT_NETWORK_DISCONNECT,
    /* Low battery */
    AT_BATTERY_LOW,
    /* Please say again */
    AT_PLEASE_AGAIN,
    AT_MAX
} AI_ALERT_TYPE_E;

typedef BYTE_T AI_TEXT_TYPE_E;
#define AI_TEXT_ASR 0x00
#define AI_TEXT_NLG 0x01
#define AI_TEXT_SKILL 0x02

typedef struct {
    /** recv event */
    OPERATE_RET(*event_cb)(AI_EVENT_TYPE type);
    /** recv media attr */
    OPERATE_RET(*media_attr_cb)(AI_BIZ_ATTR_INFO_T *attr);
    /** recv media data */
    OPERATE_RET(*media_data_cb)(AI_PACKET_PT type, CHAR_T *data, UINT_T len);
    /** recv text stream */
    OPERATE_RET(*text_cb)(AI_TEXT_TYPE_E type, CHAR_T *data, INT_T len);
    /** recv alert */
    OPERATE_RET(*alert_cb)(AI_ALERT_TYPE_E type);
} AI_OUTPUT_CBS_T;
typedef struct {
    UINT32_T biz_code;
    AI_ATTR_BASE_T attr;
    AI_INPUT_SEND_T biz_get[AI_MAX_SESSION_ID_NUM];
    AI_OUTPUT_CBS_T output;
} AI_AGENT_CFG_T;
/**
 * @brief ai agent init
 *
 * @param[in] cfg agent cfg
 *
 * @return OPRT_OK on success. Others on error, please refer to tuya_error_code.h
 */
OPERATE_RET tuya_ai_agent_init(AI_AGENT_CFG_T *cfg);

Input

Start interaction

/**
 * @brief ai input start
 *
 */
VOID tuya_ai_input_start(VOID);

Input video

/**
 * @brief ai video input
 *
 * @param[in] timestamp video timestamp
 * @param[in] pts video pts
 * @param[in] data video data
 * @param[in] len video data length
 *
 * @return OPRT_OK on success. Others on error, please refer to tuya_error_code.h
 */
OPERATE_RET tuya_ai_video_input(UINT64_T timestamp, UINT64_T pts, BYTE_T *data, UINT_T len);

Complete interaction

/**
 * @brief ai input stop
 *
 */
VOID tuya_ai_input_stop(VOID);

Output

Invoke callback

Output the response by invoking the output interface registered by tuya_ai_agent_init.

Return audio

The cloud streams text-to-speech (TTS) audio to the device, and the device plays it with a local player.

/** recv media data */
OPERATE_RET(*media_data_cb)(AI_PACKET_PT type, CHAR_T *data, UINT_T len);

Example

/** recv media data */
OPERATE_RET __ai_agent_media_data_cb(AI_PACKET_PT type, CHAR_T *data, UINT_T len)
{
    PR_DEBUG("===media data type: %d", type);
    OPERATE_RET rt = OPRT_OK;
    if(type == AI_PT_AUDIO) {
        rt = tkl_player_write_stream((UINT8_T *)data, len);
    }
    return rt;
}
STATIC INT_T __ai_input_video(TKL_VENC_FRAME_T *pframe)
{
    OPERATE_RET rt = OPRT_OK;

    if(ai_input_ctx.recorder_state != RECORDER_STATUS_PROC) {
        return 1;
    }

    if(pframe->pbuf == NULL || pframe->buf_size == 0) {
        return 1;
    }
    if (pframe->frametype != TKL_VIDEO_I_FRAME) {
        return 0;
    }
    rt = tuya_ai_video_input(pframe->timestamp, pframe->pts, pframe->pbuf, pframe->buf_size);
    if(rt != OPRT_OK) {
        PR_ERR("ai video input failed %d", rt);
        return 1;
}
static void __ai_video_uvc_init(void)
{
    OPERATE_RET rt = OPRT_OK;
    // lcd
    video_disp_info.width = 320;
    video_disp_info.height = 480;
    video_disp_info.fps = 15;
    video_disp_info.format = TKL_DISP_PIXEL_FMT_RGB565;
    video_disp_info.rotation = TKL_DISP_ROTATION_0;

    video_disp_info.ll_ctrl.enable_lcd_pipeline = 1;

    video_disp_info.ll_ctrl.bl.io              = TUYA_GPIO_NUM_9;
    video_disp_info.ll_ctrl.bl.mode            = TKL_DISP_BL_GPIO;
    video_disp_info.ll_ctrl.bl.active_level    = TUYA_GPIO_LEVEL_HIGH;

    video_disp_info.ll_ctrl.spi.clk            = TUYA_GPIO_NUM_49;
    video_disp_info.ll_ctrl.spi.csx            = TUYA_GPIO_NUM_48;
    video_disp_info.ll_ctrl.spi.sda            = TUYA_GPIO_NUM_50;
    video_disp_info.ll_ctrl.spi.rst_mode       = TKL_DISP_GPIO_RESET;
    video_disp_info.ll_ctrl.spi.rst            = TUYA_GPIO_NUM_53;

    video_disp_info.ll_ctrl.power_ctrl_pin     = TUYA_GPIO_NUM_56;     // no lcd ldo
    video_disp_info.ll_ctrl.power_active_level = TUYA_GPIO_LEVEL_HIGH;
    video_disp_info.ll_ctrl.rgb_mode           = TKL_DISP_PIXEL_FMT_RGB565;

    video_disp_info.ll_ctrl.tp.tp_i2c_clk      = TUYA_GPIO_NUM_13;
    video_disp_info.ll_ctrl.tp.tp_i2c_sda      = TUYA_GPIO_NUM_15;
    video_disp_info.ll_ctrl.tp.tp_rst          = TUYA_GPIO_NUM_54;
    video_disp_info.ll_ctrl.tp.tp_intr         = TUYA_GPIO_NUM_55;

    video_disp_info.ll_ctrl.init_param         = NULL;

    // Pull up the lcd rst pin
    TUYA_GPIO_BASE_CFG_T gpio_cfg = {
        .direct = TUYA_GPIO_OUTPUT,
        .mode = TUYA_GPIO_PULLUP,
        .level = TUYA_GPIO_LEVEL_HIGH,
    };
    tkl_gpio_init(TUYA_GPIO_NUM_53, &gpio_cfg);
    tkl_gpio_write(TUYA_GPIO_NUM_53, 1);

    memset(video_disp_info.ll_ctrl.ic_name, 0, IC_NAME_LENGTH);
    int len = (IC_NAME_LENGTH < sizeof("T35P128CQ"))? IC_NAME_LENGTH: strlen("T35P128CQ");
    memcpy(video_disp_info.ll_ctrl.ic_name, "T35P128CQ", len);

    video_lcd.device_info = &video_disp_info;

    tkl_disp_init(&video_lcd, NULL);

    tkl_disp_set_brightness(NULL, 100);

    // uvc
    uint8_t uvc_status = 0xff;
    TKL_VI_CONFIG_T vi_config;
    TKL_VI_EXT_CONFIG_T ext_conf;

    ext_conf.type = TKL_VI_EXT_CONF_CAMERA;
    ext_conf.camera.camera_type = TKL_VI_CAMERA_TYPE_UVC;
    ext_conf.camera.fmt = TKL_CODEC_VIDEO_MJPEG;
    ext_conf.camera.power_pin = TUYA_GPIO_NUM_28;
    ext_conf.camera.active_level = TUYA_GPIO_LEVEL_HIGH;
    vi_config.isp.width = 800;
    vi_config.isp.height = 480;
    vi_config.isp.fps = 15;
    vi_config.pdata = &ext_conf;
    tkl_vi_init(&vi_config, 0);

    TKL_VENC_CONFIG_T h264_config = {0};
    h264_config.enable_h264_pipeline = 1;
    h264_config.put_cb = __ai_input_video;
    TUYA_CALL_ERR_RETURN(tkl_venc_init(0, &h264_config, 0));
}

STATIC OPERATE_RET _ai_input_video_init(AI_DEMO_INPUT_CFG_T *cfg)
{
    OPERATE_RET rt = OPRT_OK;
    // __ai_video_dvp_init(); // dvp
    __ai_video_uvc_init(); // uvc
    return rt;
}
STATIC OPERATE_RET __ai_demo_init(VOID)
{
    OPERATE_RET rt = OPRT_OK;
    AI_AGENT_CFG_T ai_agent_cfg = {0};
    ai_agent_cfg.output.media_data_cb = __ai_agent_media_data_cb;
    TUYA_CALL_ERR_RETURN(tuya_ai_agent_init(&ai_agent_cfg));
    _ai_input_video_init(NULL);
    return rt;
}

Support and help

If you have any problems with TuyaOS development, you can post your questions in the Tuya Developer Forum.