BAEL-3874 - OCR with Tesseract

This commit is contained in:
Anshul BANSAL
2020-03-08 17:46:11 +02:00
parent ceaac9022c
commit 475087a1ab
11 changed files with 641 additions and 80 deletions
@@ -0,0 +1,28 @@
package com.baeldung.tesseract;
import java.awt.Rectangle;
import java.io.File;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
public class Tess4JExample {
public static void main(String[] args) {
String result = null;
try {
File image = new File("src/main/resources/images/baeldung.png");
Tesseract tesseract = new Tesseract();
tesseract.setLanguage("spa");
tesseract.setPageSegMode(1);
tesseract.setOcrEngineMode(1);
tesseract.setHocr(true);
tesseract.setDatapath("src/main/resources/tessdata");
result = tesseract.doOCR(image, new Rectangle(1200, 200));
} catch (TesseractException e) {
e.printStackTrace();
}
System.out.println(result);
}
}