diff --git a/SOURCES/tesseract_cmake.patch b/SOURCES/tesseract_cmake.patch new file mode 100644 index 0000000..965720a --- /dev/null +++ b/SOURCES/tesseract_cmake.patch @@ -0,0 +1,78 @@ +diff -rupN tesseract-5.5.0/CMakeLists.txt tesseract-5.5.0-new/CMakeLists.txt +--- tesseract-5.5.0/CMakeLists.txt 2024-11-10 20:23:34.000000000 +0100 ++++ tesseract-5.5.0-new/CMakeLists.txt 2024-11-11 11:42:49.784187827 +0100 +@@ -364,7 +364,7 @@ elseif(UNIX) + set(LIB_pthread pthread) + endif() + elseif(WIN32) +- set(LIB_Ws2_32 Ws2_32) ++ set(LIB_Ws2_32 ws2_32) + endif() + + add_definitions("-DCMAKE_BUILD") +@@ -829,12 +829,17 @@ set_target_properties( + libtesseract PROPERTIES SOVERSION + ${VERSION_MAJOR}.${VERSION_MINOR}) + +-set_target_properties( +- libtesseract +- PROPERTIES +- OUTPUT_NAME +- tesseract$<$:${VERSION_MAJOR}${VERSION_MINOR}$<$:d>> +-) ++if(MINGW) ++ set_target_properties(libtesseract PROPERTIES SUFFIX "-${VERSION_MAJOR}${VERSION_MINOR}${CMAKE_SHARED_LIBRARY_SUFFIX}") ++ set_target_properties(libtesseract PROPERTIES OUTPUT_NAME tesseract) ++else() ++ set_target_properties( ++ libtesseract ++ PROPERTIES ++ OUTPUT_NAME ++ tesseract$<$:${VERSION_MAJOR}${VERSION_MINOR}$<$:d>> ++ ) ++endif() + + if(SW_BUILD) + target_link_libraries(libtesseract PUBLIC org.sw.demo.danbloomberg.leptonica +@@ -963,9 +968,9 @@ install( + + if(INSTALL_CONFIGS) + install(FILES ${TESSERACT_CONFIGS} +- DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/tessdata/configs) ++ DESTINATION ${TESSDATA_PREFIX}/tessdata/configs) + install(FILES ${TESSERACT_TESSCONFIGS} +- DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/tessdata/tessconfigs) ++ DESTINATION ${TESSDATA_PREFIX}/tessdata/tessconfigs) + endif() + + # ############################################################################## +diff -rupN tesseract-5.5.0/src/training/CMakeLists.txt tesseract-5.5.0-new/src/training/CMakeLists.txt +--- tesseract-5.5.0/src/training/CMakeLists.txt 2024-11-10 20:23:34.000000000 +0100 ++++ tesseract-5.5.0-new/src/training/CMakeLists.txt 2024-11-11 11:42:27.279013868 +0100 +@@ -115,7 +115,7 @@ if(NOT DISABLED_LEGACY_ENGINE) + common/trainingsampleset.h) + endif() + +-add_library(common_training ${COMMON_TRAINING_SRC}) ++add_library(common_training STATIC ${COMMON_TRAINING_SRC}) + target_include_directories(common_training PUBLIC common + ${CMAKE_CURRENT_BINARY_DIR}) + target_link_libraries(common_training PUBLIC libtesseract) +@@ -280,7 +280,7 @@ if(ICU_FOUND) + + file(GLOB unicharset_training_src unicharset/*) + +- add_library(unicharset_training ${unicharset_training_src}) ++ add_library(unicharset_training STATIC ${unicharset_training_src}) + if(SW_BUILD) + target_link_libraries(unicharset_training + PUBLIC common_training org.sw.demo.unicode.icu.i18n) +@@ -424,7 +424,7 @@ if(ICU_FOUND) + + file(GLOB pango_training_src pango/*) + +- add_library(pango_training ${pango_training_src}) ++ add_library(pango_training STATIC ${pango_training_src}) + target_link_libraries(pango_training PUBLIC unicharset_training) + if(SW_BUILD) + target_link_libraries(pango_training diff --git a/SOURCES/tesseract_neon.patch b/SOURCES/tesseract_neon.patch new file mode 100644 index 0000000..3548a72 --- /dev/null +++ b/SOURCES/tesseract_neon.patch @@ -0,0 +1,33 @@ +diff -rupN --no-dereference tesseract-5.5.0/CMakeLists.txt tesseract-5.5.0-new/CMakeLists.txt +--- tesseract-5.5.0/CMakeLists.txt 2024-11-11 10:41:55.641233422 +0100 ++++ tesseract-5.5.0-new/CMakeLists.txt 2024-11-11 10:41:55.645233467 +0100 +@@ -248,7 +248,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "a + set(HAVE_AVX512F FALSE) + set(HAVE_FMA FALSE) + set(HAVE_SSE4_1 FALSE) +- set(HAVE_NEON TRUE) ++ check_cxx_compiler_flag("-mfpu=neon" HAVE_NEON) + + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm.*") + +diff -rupN --no-dereference tesseract-5.5.0/src/arch/simddetect.cpp tesseract-5.5.0-new/src/arch/simddetect.cpp +--- tesseract-5.5.0/src/arch/simddetect.cpp 2024-11-10 20:23:34.000000000 +0100 ++++ tesseract-5.5.0-new/src/arch/simddetect.cpp 2024-11-11 10:41:55.645233467 +0100 +@@ -270,7 +270,7 @@ SIMDDetect::SIMDDetect() { + // SSE detected. + SetDotProduct(DotProductSSE, &IntSimdMatrix::intSimdMatrixSSE); + #endif +-#if defined(HAVE_NEON) || defined(__aarch64__) ++#if defined(HAVE_NEON) + } else if (neon_available_) { + // NEON detected. + SetDotProduct(DotProductNEON, &IntSimdMatrix::intSimdMatrixNEON); +@@ -331,7 +331,7 @@ void SIMDDetect::Update() { + } else if (dotproduct == "accelerate") { + SetDotProduct(DotProductAccelerate, IntSimdMatrix::intSimdMatrix); + #endif +-#if defined(HAVE_NEON) || defined(__aarch64__) ++#if defined(HAVE_NEON) + } else if (dotproduct == "neon" && neon_available_) { + // NEON selected by config variable. + SetDotProduct(DotProductNEON, &IntSimdMatrix::intSimdMatrixNEON); diff --git a/SPECS/tesseract.spec b/SPECS/tesseract.spec index 64ed637..9de7e14 100644 --- a/SPECS/tesseract.spec +++ b/SPECS/tesseract.spec @@ -1,96 +1,202 @@ #global pre beta.4 +%if 0%{?rhel} +%bcond_with mingw +%else +%bcond_without mingw +%endif + Name: tesseract -Version: 4.1.1 -Release: 7%{?pre:.%pre}%{?dist} +Version: 5.5.0 +Release: 1%{?dist} Summary: Raw OCR Engine -License: ASL 2.0 +License: Apache-2.0 URL: https://github.com/tesseract-ocr/%{name} Source0: https://github.com/tesseract-ocr/tesseract/archive/%{version}%{?pre:-%pre}/%{name}-%{version}%{?pre:-%pre}.tar.gz -# Tweak location of tessdata folder -Patch0: tesseract_datadir.patch +# Fix library name case +# Build training libs statically +Patch1: tesseract_cmake.patch +# Don't assume neon available on arm64/aarch64 +Patch2: tesseract_neon.patch + -BuildRequires: make -BuildRequires: automake -BuildRequires: autoconf-archive +BuildRequires: cmake +BuildRequires: libcurl-devel BuildRequires: gcc-c++ -BuildRequires: libtool -BuildRequires: libtiff-devel +BuildRequires: giflib-devel BuildRequires: leptonica-devel -BuildRequires: cairo-devel BuildRequires: libicu-devel +BuildRequires: libjpeg-turbo-devel +BuildRequires: libtool +BuildRequires: libtiff-devel +BuildRequires: libwebp-devel BuildRequires: pango-devel - +BuildRequires: /usr/bin/asciidoc +BuildRequires: /usr/bin/xsltproc + +%if %{with mingw} +BuildRequires: mingw32-curl +BuildRequires: mingw32-filesystem +BuildRequires: mingw32-gcc-c++ +BuildRequires: mingw32-giflib +BuildRequires: mingw32-binutils +BuildRequires: mingw32-icu +BuildRequires: mingw32-leptonica +BuildRequires: mingw32-libgomp +BuildRequires: mingw32-libjpeg-turbo +BuildRequires: mingw32-libtiff +BuildRequires: mingw32-libwebp +BuildRequires: mingw32-pango + +BuildRequires: mingw64-curl +BuildRequires: mingw64-filesystem +BuildRequires: mingw64-gcc-c++ +BuildRequires: mingw64-giflib +BuildRequires: mingw64-binutils +BuildRequires: mingw64-icu +BuildRequires: mingw64-leptonica +BuildRequires: mingw64-libgomp +BuildRequires: mingw64-libjpeg-turbo +BuildRequires: mingw64-libtiff +BuildRequires: mingw64-libwebp +BuildRequires: mingw64-pango +%endif + +Requires: %{name}-libs%{?_isa} = %{version}-%{release} Requires: tesseract-langpack-eng -%description +%global _description %{expand: A commercial quality OCR engine originally developed at HP between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by UNLV. It was -open-sourced by HP and UNLV in 2005. +open-sourced by HP and UNLV in 2005.} + +%description %_description %package devel Summary: Development files for %{name} -Requires: %{name}%{?_isa} = %{version}-%{release} +Requires: %{name}-libs%{?_isa} = %{version}-%{release} + +%description devel %_description -%description devel The %{name}-devel package contains header file for developing applications that use %{name}. +%package libs +Summary: Shared libraries for %{name} +Conflicts: %{name} < 5.4.1-4 + +%description libs %_description + +The %{name}-libs package contains shared libraries +for %{name}. + + %package tools Summary: Training tools for %{name} Requires: %{name}%{?_isa} = %{version}-%{release} -%description tools +%description tools %_description + The %{name}-tools package contains tools for training %{name}. -%prep -%autosetup -p1 -n %{name}-%{version}%{?pre:-%pre} +%package -n mingw32-%{name} +Summary: MinGW Windows tesseract-ocr library +BuildArch: noarch +%description -n mingw32-%{name} +MinGW Windows tesseract-ocr library. -%build -autoreconf -ifv -%configure --disable-static -%make_build -%make_build training +%package -n mingw32-%{name}-tools +Summary: MinGW Windows tesseract-ocr library tools +Requires: mingw32-%{name} = %{version}-%{release} +BuildArch: noarch +%description -n mingw32-%{name}-tools +MinGW Windows tesseract-ocr library tools. -%install -%make_install -%make_install training-install -find %{buildroot}%{_libdir} -type f -name '*.la' -delete +%package -n mingw64-%{name} +Summary: MinGW Windows tesseract-ocr library +BuildArch: noarch + +%description -n mingw64-%{name} +MinGW Windows tesseract-ocr library. -# Create directory for tessdata -mkdir -p %{buildroot}/%{_datadir}/%{name}/tessdata/ +%package -n mingw64-%{name}-tools +Summary: MinGW Windows tesseract-ocr library tools +Requires: mingw64-%{name} = %{version}-%{release} +BuildArch: noarch +%description -n mingw64-%{name}-tools +MinGW Windows tesseract-ocr library tools. -%ldconfig_scriptlets + +%{?mingw_debug_package} + + +%prep +%autosetup -p1 -n %{name}-%{version}%{?pre:-%pre} + + +%build +# Native build +%cmake -DCMAKE_INSTALL_LIBDIR=%{_lib} -DTESSDATA_PREFIX=%{_datadir}/%{name} +%cmake_build + +# Manually build manfiles, cmake does not build them +man_xslt=http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl +for file in doc/*.asc; do + asciidoc -b docbook -d manpage -o - $file | XML_CATALOG_FILES=%{_sysconfdir}/xml/catalog xsltproc --nonet -o ${file/.asc/} $man_xslt - +done + +%if %{with mingw} +# MinGW build +MINGW32_CMAKE_ARGS=-DTESSDATA_PREFIX=%{mingw32_datadir}/%{name} \ +MINGW64_CMAKE_ARGS=-DTESSDATA_PREFIX=%{mingw64_datadir}/%{name} +%mingw_cmake -DSW_BUILD=OFF -DLEPT_TIFF_RESULT=1 +%mingw_make_build +%endif + + +%install +%cmake_install +mkdir -p %{buildroot}%{_mandir}/{man1,man5}/ +cp -a doc/*.1 %{buildroot}%{_mandir}/man1/ +cp -a doc/*.5 %{buildroot}%{_mandir}/man5/ + +%if %{with mingw} +%mingw_make_install +%mingw_debug_install_post +%endif %files %license LICENSE %doc AUTHORS ChangeLog README.md %{_bindir}/%{name} -%dir %{_datadir}/%{name} -%dir %{_datadir}/%{name}/tessdata -%{_datadir}/%{name}/tessdata/configs/ -%{_datadir}/%{name}/tessdata/tessconfigs/ -%{_datadir}/%{name}/tessdata/pdf.ttf -%{_libdir}/lib%{name}*.so.4* +%{_datadir}/%{name}/ +%{_mandir}/man1/tesseract.1* %files devel %{_includedir}/%{name} -%{_libdir}/lib%{name}*.so +%{_libdir}/lib%{name}.so +%{_libdir}/libcommon_training.a +%{_libdir}/libunicharset_training.a +%{_libdir}/cmake/%{name}/ %{_libdir}/pkgconfig/%{name}.pc +%files libs +%{_libdir}/lib%{name}.so.5.5 +%{_libdir}/lib%{name}.so.%{version} + %files tools %{_bindir}/ambiguous_words %{_bindir}/classifier_tester @@ -98,46 +204,196 @@ mkdir -p %{buildroot}/%{_datadir}/%{name}/tessdata/ %{_bindir}/combine_lang_model %{_bindir}/combine_tessdata %{_bindir}/dawg2wordlist -%{_bindir}/language-specific.sh %{_bindir}/lstmeval %{_bindir}/lstmtraining %{_bindir}/merge_unicharsets %{_bindir}/mftraining %{_bindir}/set_unicharset_properties %{_bindir}/shapeclustering -%{_bindir}/tesstrain.sh -%{_bindir}/tesstrain_utils.sh %{_bindir}/text2image %{_bindir}/unicharset_extractor %{_bindir}/wordlist2dawg -%{_mandir}/man1/ambiguous_words.1.gz -%{_mandir}/man1/classifier_tester.1.gz -%{_mandir}/man1/cntraining.1.gz -%{_mandir}/man1/combine_lang_model.1.gz -%{_mandir}/man1/combine_tessdata.1.gz -%{_mandir}/man1/dawg2wordlist.1.gz -%{_mandir}/man1/lstmeval.1.gz -%{_mandir}/man1/lstmtraining.1.gz -%{_mandir}/man1/merge_unicharsets.1.gz -%{_mandir}/man1/mftraining.1.gz -%{_mandir}/man1/set_unicharset_properties.1.gz -%{_mandir}/man1/shapeclustering.1.gz -%{_mandir}/man1/tesseract.1.gz -%{_mandir}/man1/text2image.1.gz -%{_mandir}/man1/unicharset_extractor.1.gz -%{_mandir}/man1/wordlist2dawg.1.gz -%{_mandir}/man5/unicharambigs.5.gz -%{_mandir}/man5/unicharset.5.gz +%{_mandir}/man1/ambiguous_words.1* +%{_mandir}/man1/classifier_tester.1* +%{_mandir}/man1/cntraining.1* +%{_mandir}/man1/combine_lang_model.1* +%{_mandir}/man1/combine_tessdata.1* +%{_mandir}/man1/dawg2wordlist.1* +%{_mandir}/man1/lstmeval.1* +%{_mandir}/man1/lstmtraining.1* +%{_mandir}/man1/merge_unicharsets.1* +%{_mandir}/man1/mftraining.1* +%{_mandir}/man1/set_unicharset_properties.1* +%{_mandir}/man1/shapeclustering.1* +%{_mandir}/man1/text2image.1* +%{_mandir}/man1/unicharset_extractor.1* +%{_mandir}/man1/wordlist2dawg.1* +%{_mandir}/man5/unicharambigs.5.gz* +%{_mandir}/man5/unicharset.5.gz* + +%if %{with mingw} +%files -n mingw32-%{name} +%license LICENSE +%{mingw32_bindir}/libtesseract-55.dll +%{mingw32_includedir}/tesseract/ +%{mingw32_libdir}/libtesseract.dll.a +%{mingw32_libdir}/libcommon_training.a +%{mingw32_libdir}/libunicharset_training.a +%{mingw32_libdir}/pkgconfig/tesseract.pc +%{mingw32_libdir}/cmake/%{name}/ +%{mingw32_datadir}/%{name}/ + +%files -n mingw32-%{name}-tools +%{mingw32_bindir}/*.exe + +%files -n mingw64-%{name} +%license LICENSE +%{mingw64_bindir}/libtesseract-55.dll +%{mingw64_includedir}/tesseract/ +%{mingw64_libdir}/libtesseract.dll.a +%{mingw64_libdir}/libcommon_training.a +%{mingw64_libdir}/libunicharset_training.a +%{mingw64_libdir}/pkgconfig/tesseract.pc +%{mingw64_libdir}/cmake/%{name}/ +%{mingw64_datadir}/%{name}/ +%files -n mingw64-%{name}-tools +%{mingw64_bindir}/*.exe +%endif %changelog -* Tue Aug 10 2021 Mohan Boddu - 4.1.1-7 -- Rebuilt for IMA sigs, glibc 2.34, aarch64 flags - Related: rhbz#1991688 +* Mon Nov 11 2024 Sandro Mani - 5.5.0-1 +- Update to 5.5.0 + +* Sat Oct 05 2024 Neal Gompa - 5.4.1-5 +- Fix upgrade path for package split + +* Mon Sep 30 2024 Neal Gompa - 5.4.1-4 +- Rebuild for ffmpeg 7 + +* Mon Sep 23 2024 Michel Lind - 5.4.1-3 +- Correctly set the soversion based on SemVer properties + Backport of upstream PR#4319 from Neal Gompa (ngompa) +- Split shared libraries into their own -libs subpackage + +* Sat Jul 20 2024 Fedora Release Engineering - 5.4.1-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_41_Mass_Rebuild + +* Thu Jun 13 2024 Sandro Mani - 5.4.1-1 +- Update to 5.4.1 + +* Thu Jun 06 2024 Sandro Mani - 5.4.0-1 +- Update to 5.4.0 + +* Mon Feb 05 2024 Sandro Mani - 5.3.4-4 +- Rebuild (icu) + +* Wed Jan 31 2024 Pete Walter - 5.3.4-3 +- Rebuild for ICU 74 + +* Sat Jan 27 2024 Fedora Release Engineering - 5.3.4-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_40_Mass_Rebuild + +* Sun Jan 21 2024 Sandro Mani - 5.3.4-1 +- Update to 5.3.4 + +* Sat Oct 07 2023 Sandro Mani - 5.3.3-1 +- Update to 5.3.3 + +* Sat Jul 22 2023 Fedora Release Engineering - 5.3.2-3 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_39_Mass_Rebuild + +* Tue Jul 18 2023 Sandro Mani - 5.3.2-2 +- Rebuild (mingw-icu) + +* Thu Jul 13 2023 Sandro Mani - 5.3.2-1 +- Update to 5.3.2 + +* Tue Jul 11 2023 FrantiĊĦek Zatloukal - 5.3.1-2 +- Rebuilt for ICU 73.2 + +* Mon Apr 03 2023 Sandro Mani - 5.3.1-1 +- Update to 5.3.1 + +* Mon Mar 20 2023 Vitaly Zaitsev - 5.3.0-6 +- Backported GCC 13 build fix. Fixed FTBFS on Fedora 38+. + +* Fri Feb 03 2023 FeRD (Frank Dana) - 5.3.0-5 +- Add patch from upstream to fix pkg-config libdir value + +* Sat Jan 21 2023 Fedora Release Engineering - 5.3.0-4 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_38_Mass_Rebuild + +* Tue Jan 03 2023 Sandro Mani - 5.3.0-3 +- Rebuild (mingw-icu) + +* Sat Dec 31 2022 Pete Walter - 5.3.0-2 +- Rebuild for ICU 72 + +* Fri Dec 23 2022 Sandro Mani - 5.3.0-1 +- Update to 5.3.0 + +* Wed Dec 21 2022 Sandro Mani - 5.2.0-6 +- Rebuild (leptonica) + +* Fri Sep 23 2022 Sandro Mani - 5.2.0-5 +- Backport patch to restore equality between cmake and autotools generated + pkgconfig file + +* Fri Aug 05 2022 Sandro Mani - 5.2.0-4 +- Rebuild (icu) + +* Mon Aug 01 2022 Frantisek Zatloukal - 5.2.0-3 +- Rebuilt for ICU 71.1 + +* Sat Jul 23 2022 Fedora Release Engineering - 5.2.0-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_37_Mass_Rebuild + +* Thu Jul 07 2022 Sandro Mani - 5.2.0-1 +- Update to 5.2.0 + +* Fri Mar 25 2022 Sandro Mani - 5.1.0-3 +- Rebuild with mingw-gcc-12 + +* Fri Mar 11 2022 Sandro Mani - 5.1.0-2 +- Build training tool libraries statically + +* Wed Mar 02 2022 Sandro Mani - 5.1.0-1 +- Update to 5.1.0 + +* Fri Feb 25 2022 Sandro Mani - 5.0.1-5 +- Bump as F36 needs another rebuild + +* Thu Feb 24 2022 Sandro Mani - 5.0.1-4 +- Make mingw subpackages noarch + +* Sat Feb 19 2022 Sandro Mani - 5.0.1-3 +- Add mingw subpackage + +* Sat Jan 22 2022 Fedora Release Engineering - 5.0.1-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_36_Mass_Rebuild + +* Sat Jan 08 2022 Sandro Mani - 5.0.1-1 +- Update to 5.0.1 + +* Fri Dec 17 2021 Sandro Mani - 5.0.0-3 +- Switch back to autotools + +* Wed Dec 15 2021 Sandro Mani - 5.0.0-2 +- Also install training libraries + +* Fri Dec 10 2021 Sandro Mani - 5.0.0-1 +- Update to 5.0.0 + +* Wed Nov 17 2021 Sandro Mani - 4.1.3-1 +- Update to 4.1.7 + +* Fri Jul 23 2021 Fedora Release Engineering - 4.1.1-7 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_35_Mass_Rebuild -* Fri Apr 16 2021 Mohan Boddu - 4.1.1-6 -- Rebuilt for RHEL 9 BETA on Apr 15th 2021. Related: rhbz#1947937 +* Thu May 20 2021 Pete Walter - 4.1.1-6 +- Rebuild for ICU 69 * Wed Jan 27 2021 Fedora Release Engineering - 4.1.1-5 - Rebuilt for https://fedoraproject.org/wiki/Fedora_34_Mass_Rebuild