diff --git a/README.txt b/README.txt index 5f29691..e71f820 100644 --- a/README.txt +++ b/README.txt @@ -1,7 +1,7 @@ README ------ -vocr v0.3.0 +vocr v0.3.1 Homepage: @@ -9,10 +9,10 @@ Homepage: About: -vocr is a MacOSX command line program that can perform optical -character recognition (OCR) on images and PDF files. It outputs -any text found in the input files to stdout. vocr relies on, -and derives its names from, the Vision framework (v for [V]ision). + vocr is a MacOSX command line program that can perform optical + character recognition (OCR) on images and PDF files. It outputs + any text found in the input files to stdout. vocr relies on, + and derives its names from, the Vision framework (v for [V]ision). Usage: @@ -91,6 +91,7 @@ Dependencies: History: + v. 0.3.1 - updates for Monterey (MacOSX 12) v. 0.3.0 - switch to PDFKit v. 0.2.3 - fix manpage formatting v. 0.2.2 - move source files into configure.ac @@ -103,9 +104,8 @@ History: Platforms: - vocr has been tested on MacOSX 11 (BigSur) on M1 and x86_64. - It should also work on MacOSX 10.15+ (Catalina) x86_64 and - MacOSX 12.x (Monterey) x86_64 and M1/M2. + vocr has been tested on MacOSX 11 (BigSur) and 12 (Monterey) on M1 + and x86_64. It should also work on MacOSX 10.15+ (Catalina) x86_64. License: diff --git a/configure b/configure index 24f17ad..42fb920 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for vocr 0.2.3. +# Generated by GNU Autoconf 2.71 for vocr 0.3.1. # # Report bugs to . # @@ -610,8 +610,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='vocr' PACKAGE_TARNAME='vocr' -PACKAGE_VERSION='0.2.3' -PACKAGE_STRING='vocr 0.2.3' +PACKAGE_VERSION='0.3.1' +PACKAGE_STRING='vocr 0.3.1' PACKAGE_BUGREPORT='ranga@calalum.org' PACKAGE_URL='' @@ -1228,7 +1228,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures vocr 0.2.3 to adapt to many kinds of systems. +\`configure' configures vocr 0.3.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1290,7 +1290,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of vocr 0.2.3:";; + short | recursive ) echo "Configuration of vocr 0.3.1:";; esac cat <<\_ACEOF @@ -1370,7 +1370,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -vocr configure 0.2.3 +vocr configure 0.3.1 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1493,7 +1493,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by vocr $as_me 0.2.3, which was +It was created by vocr $as_me 0.3.1, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -2740,6 +2740,43 @@ else printf "%s\n" "yes" >&6; } fi +# check for UTType + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for UTType" >&5 +printf %s "checking for UTType... " >&6; } +SAVE_LIBS="$LIBS" +LIBS="$LIBS -framework UniformTypeIdentifiers" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#import +int +main (void) +{ + + [UTType typeWithIdentifier: @"public.image"]; + ; + return 0; +} + +_ACEOF +if ac_fn_objc_try_link "$LINENO" +then : + has_framework=1 +else $as_nop + has_framework=0 +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +if test $has_framework = 0; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + LIBS="$SAVE_LIBS" +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } + CFLAGS="$CFLAGS -DHAVE_UTT" +fi + # check for Vision { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for Vison" >&5 @@ -6766,7 +6803,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by vocr $as_me 0.2.3, which was +This file was extended by vocr $as_me 0.3.1, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -6821,7 +6858,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -vocr config.status 0.2.3 +vocr config.status 0.3.1 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index b986668..05ec8fe 100644 --- a/configure.ac +++ b/configure.ac @@ -1,9 +1,9 @@ dnl -*- Autoconf -*- dnl Process this file with autoconf to produce a configure script. -AC_INIT([vocr],[0.2.3],[ranga@calalum.org]) +AC_INIT([vocr],[0.3.1],[ranga@calalum.org]) -# We want to turn on warnings if we are using gcc and the user did +# We want to turn on warnings if we are using gcc and the user did # not specify CFLAGS. The autoconf check for the C compiler sets the # CFLAGS if gcc is used, so we will save it before we run that check. # @@ -60,6 +60,22 @@ else AC_MSG_RESULT([yes]) fi +# check for UTType + +AC_MSG_CHECKING([for UTType]) +SAVE_LIBS="$LIBS" +LIBS="$LIBS -framework UniformTypeIdentifiers" +AC_LINK_IFELSE([AC_LANG_PROGRAM([#import ], [ + [[UTType typeWithIdentifier: @"public.image"];]]) + ],[has_framework=1],[has_framework=0]) +if test $has_framework = 0; then + AC_MSG_RESULT([no]) + LIBS="$SAVE_LIBS" +else + AC_MSG_RESULT([yes]) + CFLAGS="$CFLAGS -DHAVE_UTT" +fi + # check for Vision AC_MSG_CHECKING([for Vison]) @@ -76,21 +92,21 @@ fi dnl TEST_AND_SET_CFLAG(flag, [program]) dnl dnl This attempts to compile a program with a certain compiler flag. -dnl If no program is given, then the minimal program is compiled, and -dnl this tests just the validity of the compiler flag. +dnl If no program is given, then the minimal program is compiled, and +dnl this tests just the validity of the compiler flag. dnl dnl based on: https://github.com/edrosten/autoconf_tutorial define([TEST_AND_SET_CFLAG],[ - AC_MSG_CHECKING([if compiler flag $1 works]) - + AC_MSG_CHECKING([if compiler flag $1 works]) + dnl Store the current CXXFLAGS save_CFLAGS="$OBJCFLAGS" dnl Append the flag of interest OBJCFLAGS="$OBJCFLAGS $1" - + dnl Create an M4 macro, "prog", which expands to a C program. dnl This should either be a default one or the one specified. dnl Note that macros are not local, but there is a stack so push @@ -98,15 +114,15 @@ define([TEST_AND_SET_CFLAG],[ dnl that might already exist. m4_if([$2],[],[pushdef(prog, [int main(){}])], [pushdef(prog, [$2])]) - + flag_test=0 - + dnl See if the compiler runs - + AC_COMPILE_IFELSE([AC_LANG_SOURCE([prog])], [flag_test=1],[flag_test=0]) - + dnl De-clobber the "prog" macro - + popdef([prog]) if test $flag_test = 1 @@ -127,9 +143,9 @@ define([TEST_AND_SET_CFLAG],[ TEST_AND_SET_CFLAG(-W) TEST_AND_SET_CFLAG(-Wall) TEST_AND_SET_CFLAG(-Wextra) -TEST_AND_SET_CFLAG(-Wpedantic) -TEST_AND_SET_CFLAG(-Werror) -#TEST_AND_SET_CFLAG(-Wformat=2) +TEST_AND_SET_CFLAG(-Wpedantic) +TEST_AND_SET_CFLAG(-Werror) +#TEST_AND_SET_CFLAG(-Wformat=2) #TEST_AND_SET_CFLAG(-Wformat-nonliteral) TEST_AND_SET_CFLAG(-Wformat-overflow=2) TEST_AND_SET_CFLAG(-Wformat-truncation=2) @@ -139,7 +155,7 @@ TEST_AND_SET_CFLAG(-Wformat-signedness) TEST_AND_SET_CFLAG(-Wtrampolines) TEST_AND_SET_CFLAG(-Walloca) TEST_AND_SET_CFLAG(-Wcast-qual) -TEST_AND_SET_CFLAG(-Wconversion) +TEST_AND_SET_CFLAG(-Wconversion) TEST_AND_SET_CFLAG(-Wtraditional-conversion) TEST_AND_SET_CFLAG(-Warith-conversion) TEST_AND_SET_CFLAG(-Wstack-protector) @@ -183,7 +199,7 @@ TEST_AND_SET_CFLAG(-Wduplicated-cond) TEST_AND_SET_CFLAG(-Wduplicated-branches) TEST_AND_SET_CFLAG(-Wundef) TEST_AND_SET_CFLAG(-fstack-usage) -TEST_AND_SET_CFLAG(-fstack-protector-all) +TEST_AND_SET_CFLAG(-fstack-protector-all) TEST_AND_SET_CFLAG(-fstack-protector-strong) TEST_AND_SET_CFLAG(-fstack-protector-explicit) TEST_AND_SET_CFLAG(-mshstk) diff --git a/listSupportedLangs.m b/listSupportedLangs.m index 07e17d0..af5b8a7 100644 --- a/listSupportedLangs.m +++ b/listSupportedLangs.m @@ -5,6 +5,7 @@ History: v. 0.1.0 (04/25/2022) - Initial version + v. 0.2.0 (10/29/2022) - Updates for MacOSX 12 (Monterey) Copyright (c) 2022 Sriranga R. Veeraraghavan @@ -54,12 +55,21 @@ static void listSupportedLangs(void) /* fast, v1 */ +#if (MAC_OS_X_VERSION_MIN_REQUIRED < 120000) langs = [VNRecognizeTextRequest supportedRecognitionLanguagesForTextRecognitionLevel: VNRequestTextRecognitionLevelFast revision: VNRecognizeTextRequestRevision1 error: nil]; +#else + VNRecognizeTextRequest *vnr = [[VNRecognizeTextRequest alloc] init]; + [vnr setRecognitionLevel: VNRequestTextRecognitionLevelFast]; + [vnr setRevision: VNRecognizeTextRequestRevision1]; + langs = [vnr + supportedRecognitionLanguagesAndReturnError: nil]; +#endif + if (langs != nil) { fprintf(stderr,"Fast, v1: "); @@ -85,12 +95,19 @@ static void listSupportedLangs(void) if (@available(macos 11, *)) { +#if (MAC_OS_X_VERSION_MIN_REQUIRED < 120000) langs = [VNRecognizeTextRequest supportedRecognitionLanguagesForTextRecognitionLevel: VNRequestTextRecognitionLevelFast revision: VNRecognizeTextRequestRevision2 error: nil]; +#else + [vnr setRecognitionLevel: VNRequestTextRecognitionLevelFast]; + [vnr setRevision: VNRecognizeTextRequestRevision2]; + langs = [vnr + supportedRecognitionLanguagesAndReturnError: nil]; +#endif if (langs != nil) { fprintf(stderr,"Fast, v2: "); @@ -115,12 +132,20 @@ static void listSupportedLangs(void) /* accurate, v1 */ +#if (MAC_OS_X_VERSION_MIN_REQUIRED < 120000) langs = [VNRecognizeTextRequest supportedRecognitionLanguagesForTextRecognitionLevel: VNRequestTextRecognitionLevelAccurate revision: VNRecognizeTextRequestRevision1 error: nil]; +#else + [vnr setRecognitionLevel: VNRequestTextRecognitionLevelAccurate]; + [vnr setRevision: VNRecognizeTextRequestRevision1]; + langs = [vnr + supportedRecognitionLanguagesAndReturnError: nil]; +#endif + if (langs != nil) { fprintf(stderr,"Accurate, v1: "); @@ -146,12 +171,18 @@ static void listSupportedLangs(void) if (@available(macos 11, *)) { +#if (MAC_OS_X_VERSION_MIN_REQUIRED < 120000) langs = [VNRecognizeTextRequest supportedRecognitionLanguagesForTextRecognitionLevel: VNRequestTextRecognitionLevelAccurate revision: VNRecognizeTextRequestRevision2 error: nil]; +#else + [vnr setRevision: VNRecognizeTextRequestRevision2]; + langs = [vnr + supportedRecognitionLanguagesAndReturnError: nil]; +#endif if (langs != nil) { fprintf(stderr,"Accurate, v2: "); @@ -173,6 +204,7 @@ static void listSupportedLangs(void) fprintf(stderr, "\n"); } } + } /* main */ diff --git a/vocr.1 b/vocr.1 index b0e2d3c..e206bcb 100644 --- a/vocr.1 +++ b/vocr.1 @@ -42,8 +42,7 @@ supported language options are: "de" (German), "en" (English), "fr" Enables verbose mode \- vocr will print out informational and error messages. .SH PLATFORMS -vorc has been tested on MacOSX 11.x (BigSur) on M1 and x86_64. It -should also work on MacOSX 10.15 (Catalina) x86_64, and MacOSX 12.x -(Monterey) x86_64 and M1/M2. +vorc has been tested on MacOSX 11.x (BigSur) and 12.x (Monterey) on M1 +and x86_64. It should also work on MacOSX 10.15 (Catalina) x86_64. .SH HISTORY vocr was written by Sriranga Veeraraghavan . diff --git a/vocr.m b/vocr.m index a676d40..fb8731a 100644 --- a/vocr.m +++ b/vocr.m @@ -13,6 +13,7 @@ v. 0.3.0 (04/25/2022) - add language support v. 0.3.1 (04/24/2022) - move printSupportedLangs to separate file v. 0.4.0 (07/10/2022) - switch to PDFKit + v. 0.4.1 (10/28/2022) - updates for Monterey (MacOSX 12.x) Copyright (c) 2022 Sriranga R. Veeraraghavan @@ -39,6 +40,16 @@ a copy of this software and associated documentation files (the #import #import #import + +/* + use UTT, if available + see: https://stackoverflow.com/questions/70512722 +*/ + +#ifdef HAVE_UTT +#import +#endif + #import #import #import @@ -47,14 +58,16 @@ a copy of this software and associated documentation files (the /* globals */ -static NSString *gUTIPDF = @"com.adobe.pdf"; -static NSString *gUTIIMG = @"public.image"; static NSString *gIndentStr = @" "; static const char *gPgmName = "vocr"; #ifdef VOCR_IMG2TXT static const NSUInteger gBufSize = 1024; #endif /* VOCR_IMG2TXT */ +#ifndef HAVE_UTT +static NSString *gUTIPDF = @"com.adobe.pdf"; +static NSString *gUTIIMG = @"public.image"; +#endif /* command line options: @@ -241,11 +254,11 @@ static BOOL ocrImage(CGImageRef cgImage, } /* - on BigSur (11.x) and newer, try to set the + on BigSur (10.16) and newer, try to set the recognition language */ - if (@available(macos 11, *)) + if (@available(macos 10.16, *)) { switch(opts->lang) { @@ -350,15 +363,15 @@ on BigSur (11.x) and newer, try to set the [request setUsesLanguageCorrection: langCorrect]; /* - use the version 2 algorithm on MacOSX 11+, which supports - multiple languages, and, if an alternate language is requested - set that as well: + use the version 2 algorithm on MacOSX 10.16+ (BigSur or newer), + which support multiple languages, and, if an alternate language + is requested set that as well: https://developer.apple.com/documentation/vision/vnrecognizetextrequestrevision2?language=objc https://stackoverflow.com/questions/63813709 */ - if (@available(macos 11, *)) + if (@available(macos 10.16, *)) { [request setRevision: VNRecognizeTextRequestRevision2]; if (langs != nil) @@ -580,6 +593,9 @@ static BOOL ocrFile(const char *file, #ifdef VOCR_IMG2TXT NSMutableString *pdfText = nil; #endif /* VOCR_IMG2TXT */ +#ifdef HAVE_UTT + UTType *utt = nil; +#endif if (file == NULL || file[0] == '\0') { @@ -639,8 +655,12 @@ static BOOL ocrFile(const char *file, } /* ocr a PDF */ - +#ifdef HAVE_UTT + utt = [UTType typeWithIdentifier: type]; + if ([utt conformsToType: UTTypePDF]) +#else if ([workspace type: type conformsToType: gUTIPDF]) +#endif { #ifdef VOCR_IMG2TXT @@ -790,7 +810,12 @@ static BOOL ocrFile(const char *file, /* ocr an image */ +#ifdef HAVE_UTT + utt = [UTType typeWithIdentifier: type]; + if ([utt conformsToType: UTTypeImage]) +#else if ([workspace type: type conformsToType: gUTIIMG]) +#endif { image = [[NSImage alloc] initWithContentsOfURL: fURL]; if (image == nil)