Windows下Java调用OCR进行图片识别
使用Java语言,通过Tesseract-OCR对图片进行识别。
1.Tesseract-OCR
下载windows版本并安装。
2.程序如下:
a.ImageIOHelper类
packageOCR;
importjava.awt.image.BufferedImage;
importjava.io.File;
importjava.io.IOException;
importjava.util.Iterator;
importjava.util.Locale;
importjavax.imageio.IIOImage;
importjavax.imageio.ImageIO;
importjavax.imageio.ImageReader;
importjavax.imageio.ImageWriteParam;
importjavax.imageio.ImageWriter;
importjavax.imageio.metadata.IIOMetadata;
importjavax.imageio.stream.ImageInputStream;
importjavax.imageio.stream.ImageOutputStream;
importcom.sun.media.imageio.plugins.tiff.TIFFImageWriteParam;
publicclassImageIOHelper{
/**
*图片文件转换为tif格式
*@paramimageFile文件路径
*@paramimageFormat文件扩展名
*@return
*/
publicstaticFilecreateImage(FileimageFile,StringimageFormat){
FiletempFile=null;
try{
Iteratorreaders=ImageIO.getImageReadersByFormatName(imageFormat);
ImageReaderreader=readers.next();
ImageInputStreamiis=ImageIO.createImageInputStream(imageFile);
reader.setInput(iis);
//Readthestreammetadata
IIOMetadatastreamMetadata=reader.getStreamMetadata();
//SetupthewriteParam
TIFFImageWriteParamtiffWriteParam=newTIFFImageWriteParam(Locale.CHINESE);
tiffWriteParam.setCompressionMode(ImageWriteParam.MODE_DISABLED);
//Gettifwriterandsetoutputtofile
Iteratorwriters=ImageIO.getImageWritersByFormatName("tiff");
ImageWriterwriter=writers.next();
BufferedImagebi=reader.read(0);
IIOImageimage=newIIOImage(bi,null,reader.getImageMetadata(0));
tempFile=tempImageFile(imageFile);
ImageOutputStreamios=ImageIO.createImageOutputStream(tempFile);
writer.setOutput(ios);
writer.write(streamMetadata,image,tiffWriteParam);
ios.close();
writer.dispose();
reader.dispose();
}catch(IOExceptione){
e.printStackTrace();
}
returntempFile;
}
privatestaticFiletempImageFile(FileimageFile){
Stringpath=imageFile.getPath();
StringBufferstrB=newStringBuffer(path);
strB.insert(path.lastIndexOf('.'),0);
returnnewFile(strB.toString().replaceFirst("(?<=//.)(//w+)$","tif"));
}
}
b.OCR核心类
packageOCR;
importjava.io.BufferedReader;
importjava.io.File;
importjava.io.FileInputStream;
importjava.io.InputStreamReader;
importjava.util.ArrayList;
importjava.util.List;
importorg.jdesktop.swingx.util.OS;
publicclassOCR{
privatefinalStringLANG_OPTION="-l";//英文字母小写l,并非数字1
privatefinalStringEOL=System.getProperty("line.separator");
privateStringtessPath="C://ProgramFiles//Tesseract-OCR";
//privateStringtessPath=newFile("tesseract").getAbsolutePath();
publicStringrecognizeText(FileimageFile,StringimageFormat)throwsException{
FiletempImage=ImageIOHelper.createImage(imageFile,imageFormat);
FileoutputFile=newFile(imageFile.getParentFile(),"output");
StringBufferstrB=newStringBuffer();
Listcmd=newArrayList();
if(OS.isWindowsXP()){
cmd.add(tessPath+"//tesseract");
}elseif(OS.isLinux()){
cmd.add("tesseract");
}else{
cmd.add(tessPath+"//tesseract");
}
cmd.add("");
cmd.add(outputFile.getName());
//cmd.add(LANG_OPTION);
//cmd.add("chi_sim");
//cmd.add("eng");
ProcessBuilderpb=newProcessBuilder();
pb.directory(imageFile.getParentFile());
cmd.set(1,tempImage.getName());
pb.command(cmd);
pb.redirectErrorStream(true);
Processprocess=pb.start();
//tesseract.exe1.jpg1-lchi_sim
intw=process.waitFor();
//删除临时正在工作文件
tempImage.delete();
if(w==0){
BufferedReaderin=newBufferedReader(newInputStreamReader(newFileInputStream(outputFile.getAbsolutePath()+".txt"),"UTF-8"));
Stringstr;
while((str=in.readLine())!=null){
strB.append(str).append(EOL);
}
in.close();
}else{
Stringmsg;
switch(w){
case1:
msg="Errorsaccessingfiles.Theremaybespacesinyourimage'sfilename.";
break;
case29:
msg="Cannotrecongnizetheimageoritsselectedregion.";
break;
case31:
msg="Unsupportedimageformat.";
break;
default:
msg="Errorsoccurred.";
}
tempImage.delete();
//thrownewRuntimeException(msg);
}
newFile(outputFile.getAbsolutePath()+".txt").delete();
returnstrB.toString();
}
}
c.main
packageOCR;
importjava.io.File;
importjava.io.IOException;
publicclassTestOcr{
/**
*@paramargs
*/
publicstaticvoidmain(String[]args){
//输入图片地址
Stringpath="d://test//test.bmp";
try{
StringvalCode=newOCR().recognizeText(newFile(path),"bmp");
System.out.println(valCode);
}catch(IOExceptione){
e.printStackTrace();
}catch(Exceptione){
e.printStackTrace();
}
}
}
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。