【乘风新基建】【百度大脑新品体验】智能结构化识别
让天涯 发布于2020-09 浏览:2439 回复:0
0
收藏

百度新推出的智能结构化识别技术,可结构化识别各类卡证、票据,无需配置结构化对应关系、无需提取关键词、无需定制开发,直接上传图片即可获得结构化识别信息,大大简化了结构化识别过程,有效降低识别模板定制成本,快速实现各种新卡证/票据的信息录入。

一、使用攻略

说明:本文采用C# 语言,开发环境为.Net Core 3.1,采用在线API接口方式实现。

(1)平台接入
登陆 百度智能云-管理中心 创建 “文字识别”应用,获取 “API Key ”和 “Secret Key”(目前邀测,需要人工或填写工单申请):https://console.bce.baidu.com/ai/?_=1600438006044&fromai=1#/ai/ocr/overview/index

(2)接口文档

文档地址:https://ai.baidu.com/ai-doc/OCR/Qke3nkykj

接口描述:结构化识别各类卡证、票据,无需配置结构化对应关系、无需提取关键词、无需定制开发,直接上传图片即可获得结构化识别信息。

请求说明

HTTP方法:POST
请求URL:https://aip.baidubce.com/rest/2.0/ocr/v1/intelligent_ocr
URL参数:

Header如下:

Body中放置请求参数,参数详情如下:
请求参数

返回说明
返回参数

返回示例:

{
	"words_result": [
		{
			"value": {
				"probability": {
					"average": 0.99977076053619,
					"min": 0.99970018863678,
					"variance": 4.9803929869086e-9
				},
				"location": {
					"top": 146,
					"left": 365,
					"width": 50,
					"height": 25
				},
				"word": "丛齐"
			},
			"key": {
				"probability": {
					"average": 0.99997997283936,
					"min": 0.99997985363007,
					"variance": 1.4210854715202e-14
				},
				"location": {
					"top": 149,
					"left": 311,
					"width": 40,
					"height": 20
				},
				"word": "姓名"
			}
		},
		{
			"value": {
				"probability": {
					"average": 0.9999588727951,
					"min": 0.9999588727951,
					"variance": 0
				},
				"location": {
					"top": 190,
					"left": 366,
					"width": 20,
					"height": 21
				},
				"word": "男"
			},
			"key": {
				"probability": {
					"average": 0.99983507394791,
					"min": 0.99969410896301,
					"variance": 1.9871126966109e-8
				},
				"location": {
					"top": 192,
					"left": 312,
					"width": 39,
					"height": 19
				},
				"word": "性别"
			}
		},
		{
			"value": {
				"probability": {
					"average": 0.99964165687561,
					"min": 0.99964165687561,
					"variance": 0
				},
				"location": {
					"top": 228,
					"left": 366,
					"width": 22,
					"height": 20
				},
				"word": "汉"
			},
			"key": {
				"probability": {
					"average": 0.99994975328445,
					"min": 0.99993216991425,
					"variance": 3.0917490789761e-10
				},
				"location": {
					"top": 228,
					"left": 310,
					"width": 41,
					"height": 20
				},
				"word": "民族"
			}
		},
		{
			"value": {
				"probability": {
					"average": 0.99989211559296,
					"min": 0.9996235370636,
					"variance": 1.0301564046244e-8
				},
				"location": {
					"top": 262,
					"left": 356,
					"width": 158,
					"height": 21
				},
				"word": "1989年7月28日"
			},
			"key": {
				"probability": {
					"average": 0.99828881025314,
					"min": 0.99683433771133,
					"variance": 0.0000021154903606657
				},
				"location": {
					"top": 264,
					"left": 312,
					"width": 40,
					"height": 19
				},
				"word": "出生"
			}
		},
		{
			"value": {
				"probability": {
					"average": 0.99985313415527,
					"min": 0.99945932626724,
					"variance": 1.7040544975089e-8
				},
				"location": {
					"top": 315,
					"left": 343,
					"width": 297,
					"height": 25
				},
				"word": "370441198907287001"
			},
			"key": {
				"probability": {
					"average": 0.99995613098145,
					"min": 0.99990141391754,
					"variance": 1.089595969006e-9
				},
				"location": {
					"top": 320,
					"left": 191,
					"width": 140,
					"height": 21
				},
				"word": "社会保障号码"
			}
		}
	],
	"log_id": "8733452781125821952",
	"words_result_num": 5,
	"direction": 0
}

 

(3)源码共享

(3-1)根据 API Key 和 Secret Key 获取 AccessToken

/// 
/// 获取百度access_token
/// 
/// API Key
/// Secret Key
/// 
public static string GetAccessToken(string clientId, string clientSecret)
{
    string authHost = "https://aip.baidubce.com/oauth/2.0/token";
    HttpClient client = new HttpClient();
    List> paraList = new List>();
    paraList.Add(new KeyValuePair("grant_type", "client_credentials"));
    paraList.Add(new KeyValuePair("client_id", clientId));
    paraList.Add(new KeyValuePair("client_secret", clientSecret));

    HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result;
    string result = response.Content.ReadAsStringAsync().Result;
    JObject jo = (JObject)JsonConvert.DeserializeObject(result);

    string token = jo["access_token"].ToString();
    return token;
}

(3-2)调用API接口获取识别结果

(3-2-1)在Startup.cs 文件 的 Configure(IApplicationBuilder app, IHostingEnvironment env) 方法中开启虚拟目录映射功能:

string webRootPath = Path.Combine(Directory.GetCurrentDirectory(), "wwwroot");//wwwroot目录

app.UseStaticFiles(new StaticFileOptions
{
    FileProvider = new PhysicalFileProvider(
        Path.Combine(webRootPath, "Uploads", "BaiduAIs")),
    RequestPath = "/BaiduAIs"
});

(3-2-2) 建立Index.cshtml文件

(3-2-2-1)前台代码:

    由于html代码无法原生显示,只能简单说明一下:

    主要是一个form表单,需要设置属性enctype="multipart/form-data",否则无法上传图片;

    form表单里面有几个控件:

一个Input:type="file",asp-for="FileUpload" ,上传图片;

一个Input:type="submit",asp-page-handler="Intelligent" ,提交请求。

一个img:src="@Model.curPath",显示需要识别的图片。

最后显示后台 msg 字符串列表信息,如果需要输出原始Html代码,则需要使用@Html.Raw()函数。 

(3-2-2-2) 后台代码: 

主程序代码:

[BindProperty]
public IFormFile FileUpload { get; set; }
[BindProperty]
public string ImageUrl { get; set; }
public List msg = new List();
public string curPath { get; set; }
string webRootPath = Path.Combine(Directory.GetCurrentDirectory(), "wwwroot");

string BaiduAI_OCRPath="Uploads//BaiduAIs//";
string BaiduAI_OCRUrl="/BaiduAIs/";
string OCR_API_KEY="你的API KEY";
string OCR_SECRET_KEY="你的SECRET KEY";

public OCRSearchModel()
{

}

public void OnGet()
{

}

public async Task OnPostIntelligentAsync()
{
    if (FileUpload is null)
    {
        ModelState.AddModelError(string.Empty, "请先选择需要识别的图片!");
    }
    if (!ModelState.IsValid)
    {
        return Page();
    }
    msg = new List();

    string fileDir = Path.Combine(webRootPath, BaiduAI_OCRPath);
    string imgName = GetRandomName();
    imgName = await UploadFile(FileUpload, fileDir);

    string fileName = Path.Combine(fileDir, imgName);
    string imgBase64 = GetFileBase64(fileName);
    curPath = Path.Combine(BaiduAI_OCRUrl, imgName);

    DateTime startTime = DateTime.Now;

    string result = GetOCRJson(imgBase64, OCR_API_KEY, OCR_SECRET_KEY);

    DateTime endTime = DateTime.Now;
    TimeSpan ts = endTime - startTime;

    JObject jo = (JObject)JsonStringToObj(result);

    try
    {
        if (jo["error_code"] != null)
        {
            msg.Add("调用失败:" + jo["error_code"].ToString() + "-" + jo["error_msg"].ToString());
        }
        else
        {
            List msgList = jo["words_result"].ToList();
            int number = msgList.Count;
            msg.Add("智能结构化识别结果(耗时" + ts.TotalSeconds + "秒):\n");
            msg.Add("识别结构数:" + number + "");
            foreach (JToken ms in msgList)
            {
                msg.Add(ms["key"]["word"].ToString() + ":" + ms["value"]["word"].ToString());
            }
        }
    }
    catch (Exception e)
    {
        msg.Add("发生异常:");
        msg.Add(result);
        msg.Add(e.Message);
    }
    return Page();
}

其他相关函数:

/// 
/// 文字识别Json字符串
/// 
/// 图片base64编码
/// API Key
/// Secret Key
/// 
public static string GetOCRJson( string strbaser64, string clientId, string clientSecret)
{
    string token = GetAccessToken(clientId, clientSecret);
    string host = "https://aip.baidubce.com/rest/2.0/ocr/v1/intelligent_ocr?access_token=" + token;
    Encoding encoding = Encoding.Default;
    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(host);
    request.Method = "post";
    request.ContentType = "application/x-www-form-urlencoded";
    request.KeepAlive = true;
    string str = "image=" + HttpUtility.UrlEncode(strbaser64);
    byte[] buffer = encoding.GetBytes(str);
    request.ContentLength = buffer.Length;
    request.GetRequestStream().Write(buffer, 0, buffer.Length);
    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
    StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
    string result = reader.ReadToEnd();
    return result;
}

/// 
/// 获取百度access_token
/// 
/// API Key
/// Secret Key
/// 
public static string GetAccessToken(string clientId, string clientSecret)
{
    string authHost = "https://aip.baidubce.com/oauth/2.0/token";
    HttpClient client = new HttpClient();
    List> paraList = new List>();
    paraList.Add(new KeyValuePair("grant_type", "client_credentials"));
    paraList.Add(new KeyValuePair("client_id", clientId));
    paraList.Add(new KeyValuePair("client_secret", clientSecret));

    HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result;
    string result = response.Content.ReadAsStringAsync().Result;
    JObject jo = (JObject)JsonConvert.DeserializeObject(result);

    string token = jo["access_token"].ToString();
    return token;
}

/// 
/// 生成一个随机唯一文件名(Guid)
/// 
/// 
public static string GetRandomName()
{
    return Guid.NewGuid().ToString("N");
}

/// 
/// 返回图片的base64编码
/// 
/// 文件绝对路径名称
/// 
public static String GetFileBase64(string fileName)
{
    FileStream filestream = new FileStream(fileName, FileMode.Open);
    byte[] arr = new byte[filestream.Length];
    filestream.Read(arr, 0, (int)filestream.Length);
    string baser64 = Convert.ToBase64String(arr);
    filestream.Close();
    return baser64;
}

/// 
/// json转为对象
/// 
/// Json字符串
/// 
public static Object JsonStringToObj(string jsonString)
{
    Object s = JsonConvert.DeserializeObject(jsonString);
    return s;
}

/// 
/// 上传文件,返回文件名
/// 
/// 文件上传控件
/// 文件绝对路径
/// 
public static async Task UploadFile(IFormFile formFile, string fileDir)
{
    if (!DirectoryExists(directory))
    {
        Directory.CreateDirectory(directory);
    }
    string extension = Path.GetExtension(formFile.FileName);
    string imgName = Guid.NewGuid().ToString("N") + extension;
    var filePath = Path.Combine(fileDir, imgName);

    using (var fileStream = new FileStream(filePath, FileMode.Create, FileAccess.Write))
    {
        await formFile.CopyToAsync(fileStream);
    }

    return imgName;
}

二、效果测试

1、页面:

2、识别结果:

2.1

2.2

2.3

2.4

2.5

三、测试结果及建议

从测试结果来看,百度新推出的智能结构化识别技术总体上还是比较准确的,它可以自动是被各类卡证、票据,不用针对特定类别进行定制开发,大大降低开发难度,节约开发成本,并且识别准确率也是比较好的,基本上只要提供识别的原图片比较清晰,就能够准确识别出来相应的内容。

但是,毕竟还处于邀测阶段,所以还存在一些小问题:比如如果要识别的值有多行(如图2的单位名称),就有可能只保留第一行而忽略了第二行;如果识别的键值不再同一行,可能就被忽略了(如图2的执行标准);

另外,目前好像只能识别横向排列的键值对,无法识别纵向排列的键值对,如果以后能够增加是被纵向排列的键值对就更好了。

收藏
点赞
0
个赞
TOP
切换版块