实时手势识别(C++与python都可实现)

奋斗吧

擅长邻域：未填写

标签：实时手势识别

2023-03-22 12:56:58 459浏览

实时手势识别，label[0,1,2,3,4,5]，python，C++均可实现

一、前提配置：

Windows，visual studio 2019，opencv，python10，opencv-python，numpy，tensorflow，mediapipe，math

1.安装python环境

这里我个人使用的安装python10（google官方使用的python8）
安装相应的包，python路径添加到系统路径去，方便使用pip

pip install mediapipe opencv-python
pip install numpy==1.22.4 
pip install tensorflow-cpu

二、基于python手势识别

打开运行infer.py

如果能正常运行使用，说明python环境没有问题。
infer.py下载在我gitee上，下面是链接地址
https://gitee.com/cnlycs/hand_static-lib/tree/master
如果只是使用python进行手势识别的话，把infer进行魔改就到此已经完成了
看看效果
0 在这里插入图片描述

在这里插入图片描述
label一共为[0,1,2,3,4,5]

三、C++实现

前面python的配置都得安装

最本质的原理是通过python对c++的接口实现的
（我也尝试过编译mediapipe源码给生成DLL文件，只能说现在各方面条件还不允许，不是网络问题，就是编译问题，能不能成功看运气，最后虽然成功了，但卡在了部署阶段）

1.创建新项目

通过visual studio 2019创建一个新项目，我选择的是控制台应用，毕竟展示出来给大家看，也可自己打包成静态库
在这里插入图片描述

2.在pose_demo.cpp中复制该代码

#include<iostream>
#include<Python.h>
#include <numpy/arrayobject.h>//numpy的头文件
#include<opencv/cv.hpp>//opencv的头文件
using namespace cv;
using namespace std;


PyObject* Init_Hand_Model() {
	//加载numpy相关的库
	import_array();
	//命令行执行语句
	PyRun_SimpleString("import sys");
	PyRun_SimpleString("sys.path.append('./script')");
	//PyImport_ImportModule：动态加载python模块，相当于导入python脚本文件
	PyObject* pModule = PyImport_ImportModule("infer");
	if (pModule == NULL) {
		cout << "pModule not found" << endl;
	}

	//调用模型加载
	PyObject* pFunc_load = PyObject_GetAttrString(pModule, "load_model");
	if (pFunc_load == NULL || PyCallable_Check(pFunc_load) == NULL) {
		cout << "pFunc_load not found!" << endl;
		return 0;
	}
	PyObject_CallObject(pFunc_load, NULL);
	//准备推理模型
	PyObject* pInfer = PyObject_GetAttrString(pModule, "infer_image");
	if (pInfer == NULL || PyCallable_Check(pInfer) == NULL) {
		cout << "pInfer not found!" << endl;
		return 0;
	}
	Py_DECREF(pModule);
	Py_DECREF(pFunc_load);
	return pInfer;
}

cv::Mat Hand_Infer(cv::Mat img, PyObject* pInfer,int *res)
{
	PyArrayObject* array_com = NULL;
	PyObject* pRet = NULL;
	npy_intp dims[] = { img.rows, img.cols, img.channels() };
	//生成包含这个多维数组的PyObject对象，使用PyArray_SimpleNewFromData函数，
	//第一个参数2表示维度，第二个为维度数组Dims,第三个参数指出数组的类型，第四个参数为数组
	PyObject* pValue = PyArray_SimpleNewFromData(3, dims, NPY_UINT8, img.data);
	PyObject* pArgs = PyTuple_New(1);
	PyTuple_SetItem(pArgs, 0, pValue);	/* pValue的引用计数被偷偷减一，无需手动再减 */

	pRet = PyObject_CallObject(pInfer, pArgs);
	Py_DECREF(pValue);
	Py_DECREF(pArgs);
	// 解析返回结果 
	//PyArrayObject* array_com;
	*res = -1;
	PyArray_OutputConverter(PyList_GetItem(pRet, 0), &array_com);
	npy_intp* shape = PyArray_SHAPE(array_com);
	Mat com(shape[0], shape[1], CV_8UC3, PyArray_DATA(array_com));
	PyArg_Parse(PyList_GetItem(pRet, 1),"i",res);
	return com;
}


int hand_infer_by_camera() {
	//初始化python解释器
	Py_Initialize();
	PyObject* pInfer = Init_Hand_Model();



	VideoCapture cap(0);
	if (!cap.isOpened())
	{
		printf("Can not open a camera\n");
		return -1;
	}
	while (true)
	{
		Mat img;
		cap >> img;
		if (img.empty())
			break;
		cv::flip(img, img, 1);
		int* res = new int;
		Mat com = Hand_Infer(img, pInfer, res);
		cout << *res;
		cv::imshow("com", com);
		cv::waitKey(10);
		/*
		cv::imshow("pha", pha);
		cv::waitKey(0);
		*/

		img.release();
	}

	Py_DECREF(pInfer);
	Py_Finalize();
	return 0;
}


int main()
{
	hand_infer_by_camera();
	return 0;
}