Skip to content

Commit

Permalink
add fast mode, which may be useful for non-English text, such as Germ…
Browse files Browse the repository at this point in the history
…an text
  • Loading branch information
srirangav committed Apr 25, 2022
1 parent 7c440af commit a0fc8e0
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 45 deletions.
21 changes: 12 additions & 9 deletions README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,22 @@ and derives its names from, the Vision framework (v for [V]ision).

Usage:

vocr [-i [no|tab]] [-p] [-v] [-l [lang]] [files]
vocr [-v] [-f] [-p] [-i [no|tab]] [-l [lang]] [files]

If -i is specified with the 'no' option, vocr will not attempt
to indent any text that is OCR'ed. If -i is specified with the
'tab' option, vocr will indent using tabs instead of spaces (by
default vocr indents using spaces).
If -v is specified, vocr runs in [v]erbose mode and outputs
errors and informational messages.

If -f is specified, vocr uses the fast algorithm. This may be
useful when recognizing text in non-English languages, such as
German.

If -p is specified, when OCR'ing a PDF, a page break (^L) will
be inserted at the end of each page.

If -v is specified, vocr runs in [v]erbose mode and outputs
errors and informational messages.
If -i is specified with the 'no' option, vocr will not attempt
to indent any text that is OCR'ed. If -i is specified with the
'tab' option, vocr will indent using tabs instead of spaces (by
default vocr indents using spaces).

If -l is specified, on MacOSX 11.x (BigSur) and newer, vocr
will ask the Vision framework to recognize the text in the
Expand All @@ -34,8 +38,7 @@ Usage:
'it' - Italian
'pt' - Portuguese
'es' - Spanish
'zh' - Simplified Chinese
'zt' - Traditional Chinese
'zh' - Chinese

Build:

Expand Down
8 changes: 6 additions & 2 deletions vocr.1
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
vocr - a simple utility for performing optical character recognition
on images and PDFs
.SH SYNOPSIS
vocr [-p] [-v] [-i [no|tab]] [-l [lang]] [files]
vocr [-p] [-v] [-f] [-i [no|tab]] [-l [lang]] [files]
.SH DESCRIPTION
vocr is a simple utility for performing optical character recognition
(OCR) on images and PDFs. It prints any text found in the specified
Expand All @@ -21,12 +21,16 @@ spaces).
When OCR'ing a PDF, tells vocr to insert a page break (^L) at the end
of each page.
.TP
.B \-f
Use the [f]ast algorithm for ocr. This may be useful when recognizing
text in non-English languages, such as German.
.TP
.B \-l [lang]
If -l is specified, on MacOSX 11.x (BigSur) and newer, vocr will ask the
Vision framework to recognize the text in the specified language. The
supported language options are: 'de' (German), 'en' (English), 'fr'
(French), 'it' (Italian), 'pt' (Portuguese), 'es' (Spanish), 'zh'
(Simplified Chinese), and 'zt' (Traditional Chinese).
(Chinese).
.TP
.B \-v
Enables verbose mode - vocr will print out informational and/or error
Expand Down
138 changes: 104 additions & 34 deletions vocr.m
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,19 @@ a copy of this software and associated documentation files (the

/*
command line options:
-f - use the [f]ast recognition algorithm
-h - print usage / [h]elp
-i [mode] - set the [i]ndent mode:
'no' disables indenting
'tab' indents with tabs (default is to use 4 spaces)
-l - specify the [l]anguage that the input is in (TODO)
'no' - disables indenting
'tab' - indents with tabs (default is to use 4 spaces)
-l - specify the [l]anguage to use for recognition
-p - add a page break / [l]ine feed between pages
-v - be [v]erbose
*/

enum
{
gPgmOptFast = 'f',
gPgmOptHelp = 'h',
gPgmOptIndent = 'i',
gPgmOptLang = 'l',
Expand All @@ -75,35 +77,34 @@ a copy of this software and associated documentation files (the

enum
{
gLangChineseSimplified = 'c', /* zh-Hans */
gLangGerman = 'd', /* de-DE */
gLangEnglish = 'e', /* en-US */
gLangFrench = 'f', /* fr-FR */
gLangItalian = 'i', /* it-IT */
gLangPortuguese = 'p', /* pt-BR */
gLangSpanish = 's', /* es-ES */
gLangChineseTraditional = 't' /* zh-Hant */
gLangGerman = 'd', /* de-DE */
gLangEnglish = 'e', /* en-US */
gLangFrench = 'f', /* fr-FR */
gLangItalian = 'i', /* it-IT */
gLangPortuguese = 'p', /* pt-BR */
gLangSpanish = 's', /* es-ES */
gLangChinese = 'z', /* zh-Hans and zh-Hant */
};

static const char *gPgmOpts = "hpvi:l:";
static const char *gPgmOpts = "fhpvi:l:";
static const char *gPgmIndentNo = "no";
static const char *gPgmIndentTab = "tab";
static BOOL gQuiet = YES;

static const char *gPgmLangGerman = "de";
static const char *gPgmLangEnglish = "en";
static const char *gPgmLangFrench = "fr";
static const char *gPgmLangItalian = "it";
static const char *gPgmLangPortuguese = "pt";
static const char *gPgmLangSpanish = "es";
static const char *gPgmLangChineseSimplified = "zh";
static const char *gPgmLangChineseTraditional = "zt";
static const char *gPgmLangGerman = "de";
static const char *gPgmLangEnglish = "en";
static const char *gPgmLangFrench = "fr";
static const char *gPgmLangItalian = "it";
static const char *gPgmLangPortuguese = "pt";
static const char *gPgmLangSpanish = "es";
static const char *gPgmLangChinese = "zh";

/* ocr options */

typedef struct
{
BOOL addPageBreak;
BOOL fast;
BOOL indent;
BOOL indentWithTabs;
int lang;
Expand Down Expand Up @@ -135,14 +136,16 @@ static BOOL ocrImage(CGImageRef cgImage,
static void printUsage(void)
{
fprintf(stderr,
"Usage: %s [-%c] | [-%c] [-%c] [-%c [%s|%s]] [files]\n",
"Usage: %s [-%c] | [-%c] [-%c] [-%c] [-%c [%s|%s]] [-%c [lang]] [files]\n",
gPgmName,
gPgmOptHelp,
gPgmOptVerbose,
gPgmOptFast,
gPgmOptPageBreak,
gPgmOptIndent,
gPgmIndentNo,
gPgmIndentTab);
gPgmIndentTab,
gPgmOptLang);
}

/* printError - print an error message */
Expand Down Expand Up @@ -339,8 +342,9 @@ static BOOL ocrImage(CGImageRef cgImage,
unsigned int indentLevel = 0, k = 0;
double prevStart = 0.0, prevEnd = 0.0;
double curStart = 0.0, curEnd = 0.0;
BOOL indent = YES;
BOOL indent = YES, fast = NO, langCorrect = YES;
NSString *indentStr = gIndentStr;
NSArray<NSString *> *langs = nil;

#ifdef VOCR_IMG2TXT
if (text == nil)
Expand All @@ -352,11 +356,63 @@ static BOOL ocrImage(CGImageRef cgImage,

if (opts != NULL)
{

/* is fast mode requested? */

fast = opts->fast;

/* desired indent */

indent = opts->indent;
if (opts->indentWithTabs)
{
indentStr = @"\t";
}

/*
on BigSur (11.x) and newer, try to set the
recognition language
*/

if (@available(macos 11, *))
{
switch(opts->lang)
{
case gLangGerman:
langs = [NSArray arrayWithObjects: @"de-DE", nil];
break;
case gLangEnglish:
break;
case gLangFrench:
langs = [NSArray arrayWithObjects: @"fr-FR", nil];
break;
case gLangItalian:
langs = [NSArray arrayWithObjects: @"it-IT", nil];
break;
case gLangPortuguese:
langs = [NSArray arrayWithObjects: @"pt-BR", nil];
break;
case gLangSpanish:
langs = [NSArray arrayWithObjects: @"es-ES", nil];
break;
case gLangChinese:
langs = [NSArray arrayWithObjects: @"zh-Hans",
@"zh-Hant",
@"en-US",
nil];

/*
disable language correction for Chinese, see:
https://developer.apple.com/documentation/vision/recognizing_text_in_images
*/

langCorrect = NO;
break;
default:
langs = nil;
break;
}
}
}

#ifdef VOCR_IMG2TXT
Expand Down Expand Up @@ -402,18 +458,28 @@ static BOOL ocrImage(CGImageRef cgImage,
}

/*
enable accurate recognition and language correction
enable fast/accurate recognition and language correction
https://developer.apple.com/documentation/vision/vnrequesttextrecognitionlevel?language=objc
https://developer.apple.com/documentation/vision/vnrecognizetextrequest/3166773-useslanguagecorrection?language=objc
*/

[request setRecognitionLevel:
VNRequestTextRecognitionLevelAccurate];
[request setUsesLanguageCorrection: YES];
if (fast)
{
[request setRecognitionLevel:
VNRequestTextRecognitionLevelFast];
}
else
{
[request setRecognitionLevel:
VNRequestTextRecognitionLevelAccurate];
}

[request setUsesLanguageCorrection: langCorrect];

/*
use the version 2 algorithm on MacOSX 11+, which supports
multiple languages:
multiple languages, and, if an alternate language is requested
set that as well:
https://developer.apple.com/documentation/vision/vnrecognizetextrequestrevision2?language=objc
https://stackoverflow.com/questions/63813709
Expand All @@ -422,6 +488,10 @@ static BOOL ocrImage(CGImageRef cgImage,
if (@available(macos 11, *))
{
[request setRevision: VNRecognizeTextRequestRevision2];
if (langs != nil)
{
[request setRecognitionLanguages: langs];
}
}
else
{
Expand Down Expand Up @@ -893,6 +963,7 @@ int main(int argc, char * const argv[])
return 1;
}

options.fast = NO;
options.addPageBreak = NO;
options.indent = YES;
options.indentWithTabs = NO;
Expand All @@ -905,6 +976,9 @@ int main(int argc, char * const argv[])
case gPgmOptHelp:
optHelp = YES;
break;
case gPgmOptFast:
options.fast = YES;
break;
case gPgmOptPageBreak:
options.addPageBreak = YES;
break;
Expand Down Expand Up @@ -952,13 +1026,9 @@ int main(int argc, char * const argv[])
{
options.lang = gLangSpanish;
}
else if (strcmp(optarg, gPgmLangChineseSimplified) == 0)
{
options.lang = gLangChineseSimplified;
}
else if (strcmp(optarg, gPgmLangChineseTraditional) == 0)
else if (strcmp(optarg, gPgmLangChinese) == 0)
{
options.lang = gLangChineseTraditional;
options.lang = gLangChinese;
}
else
{
Expand Down

0 comments on commit a0fc8e0

Please sign in to comment.