Fixed path settings for tesseract. #349
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: tests-ocr-service | |
| permissions: | |
| contents: read | |
| on: | |
| push: | |
| branches: [ "*" ] | |
| pull_request: | |
| branches: [ "*" ] | |
| release: | |
| types: [published] | |
| # Allows you to run this workflow manually from the Actions tab | |
| workflow_dispatch: | |
| jobs: | |
| build: | |
| runs-on: ubuntu-24.04 | |
| env: | |
| working-directory: ./ocr-service | |
| strategy: | |
| matrix: | |
| python-version: ["3.12"] | |
| max-parallel: 4 | |
| # Steps represent a sequence of tasks that will be executed as part of the job | |
| steps: | |
| - name: checkout repo | |
| uses: actions/checkout@v5 | |
| - name: Install Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| architecture: "x64" | |
| cache: 'pip' | |
| # Cache pip | |
| - name: Main Cache pip | |
| if: ${{ !env.ACT }} | |
| uses: actions/cache@v4 | |
| with: | |
| path: ~/.cache/pip | |
| key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} | |
| restore-keys: | | |
| ${{ runner.os }}-pip- | |
| - name: Prepare apt cache | |
| if: ${{ !env.ACT }} | |
| run: | | |
| sudo mkdir -p /var/cache/apt/archives/partial | |
| sudo chown -R $USER:$USER /var/cache/apt/archives | |
| - name: Cache apt downloads | |
| if: ${{ !env.ACT }} | |
| uses: actions/cache@v4 | |
| with: | |
| path: /var/cache/apt/archives | |
| key: ${{ runner.os }}-apt-${{ hashFiles('.github/workflows/run_tests.yml') }} | |
| restore-keys: | | |
| ${{ runner.os }}-apt- | |
| - name: Remove cached apt locks | |
| if: ${{ !env.ACT }} | |
| run: sudo rm -f /var/cache/apt/archives/lock | |
| - name: Install dependencies | |
| run: | | |
| export DEBIAN_FRONTEND=noninteractive | |
| export DEBIAN_PRIORITY=critical | |
| sudo apt update -yq | |
| sudo apt install -y --no-install-recommends software-properties-common nodejs debconf-utils apt-utils | |
| # add extra repos | |
| sudo apt-add-repository -y -n multiverse | |
| sudo apt-add-repository -y -n universe | |
| sudo add-apt-repository -y -n ppa:graphics-drivers/ppa | |
| sudo apt update -yq | |
| sudo apt upgrade -y | |
| # install req packages | |
| sudo apt install -y --no-install-recommends python3-all-dev python3-dev python3-pip python${{ matrix.python-version }} python${{ matrix.python-version }}-dev | |
| sudo apt -y --no-install-recommends -o Dpkg::Options::="--force-confold" -y -o Dpkg::Options::="--force-confdef" -fuy dist-upgrade | |
| sudo apt install -y --no-install-recommends \ | |
| gnupg \ | |
| libssl-dev \ | |
| wget \ | |
| curl \ | |
| gnupg \ | |
| gnupg-agent \ | |
| dirmngr \ | |
| ca-certificates \ | |
| apt-transport-https \ | |
| fonts-dejavu \ | |
| build-essential \ | |
| gfortran \ | |
| gcc \ | |
| g++ | |
| ##### utils for python and TESSERACT | |
| echo "ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true" | sudo debconf-set-selections | |
| sudo apt install -y --no-install-recommends fontconfig ttf-mscorefonts-installer libimage-exiftool-perl libtcnative-1 \ | |
| libsm6 libxext6 gstreamer1.0-libav fonts-deva fonts-dejavu fonts-gfs-didot fonts-gfs-didot-classic fonts-junicode fonts-ebgaramond fonts-noto-cjk fonts-takao-gothic fonts-vlgothic \ | |
| ghostscript ghostscript-x gsfonts gsfonts-other gsfonts-x11 fonts-croscore fonts-crosextra-caladea fonts-crosextra-carlito fonts-liberation fonts-open-sans fonts-noto-core fonts-ibm-plex fonts-urw-base35 \ | |
| fonts-noto fonts-noto-cjk fonts-noto-extra xfonts-terminus fonts-font-awesome fonts-hack fonts-inconsolata fonts-liberation2 fonts-mononoki \ | |
| libpcre3 libpcre3-dev \ | |
| mesa-opencl-icd pocl-opencl-icd libvips-tools libvips libvips-dev \ | |
| imagemagick libcairo2-dev tesseract-ocr tesseract-ocr-all libtesseract5 libtesseract-dev libleptonica-dev liblept5 | |
| # tessaract language packages | |
| sudo apt install -y --no-install-recommends --fix-missing tesseract-ocr-osd tesseract-ocr-lat \ | |
| tesseract-ocr-eng tesseract-ocr-enm tesseract-ocr-ita tesseract-ocr-osd tesseract-ocr-script-latn \ | |
| tesseract-ocr-fra tesseract-ocr-frk tesseract-ocr-deu tesseract-ocr-ces tesseract-ocr-dan tesseract-ocr-nld tesseract-ocr-nor \ | |
| tesseract-ocr-spa tesseract-ocr-swe tesseract-ocr-slk tesseract-ocr-ron tesseract-ocr-script-grek | |
| # Pillow package requirements | |
| sudo apt install -y --no-install-recommends tcl8.6-dev tk8.6-dev libopenjp2-7-dev libharfbuzz-dev libfribidi-dev libxcb1-dev libtiff5-dev libjpeg8-dev zlib1g-dev libfreetype6-dev liblcms2-dev libwebp-dev libglib2.0-dev libgl1 | |
| # python3 poppler requirement | |
| sudo apt install -y --no-install-recommends poppler-utils | |
| # libre office and java | |
| sudo apt install -y --no-install-recommends default-jre libreoffice-java-common libreoffice libreoffice-script-provider-python | |
| # build font cache | |
| sudo fc-cache -f -v | |
| # there is a bug in the blinker package that causes issues with uwsgi | |
| # (this removes software-properties-common) | |
| sudo apt remove -y python3-blinker | |
| # other openCL packages | |
| # beignet-opencl-icd | |
| # keep apt caches so the actions/cache step can reuse downloads | |
| sudo rm -f /var/cache/apt/archives/lock | |
| sudo chown -R $USER:$USER /var/cache/apt/archives | |
| - name: Install python deps & create virtual environment | |
| run: | | |
| # BEFORE creating the venv so /usr/bin/python3.12 can run unoserver | |
| # the reason for this is that the uno python bindings are tied to the system python | |
| # and will not work in a venv | |
| # so we need to install unoserver globally to match the version in requirements.txt | |
| # this is a bit hacky but it works around the issue of unoserver not being available | |
| # via pip for python3.12 (as of 2025-08) | |
| set -eux | |
| UNOSERVER_PIN=$(awk -F'==' '/^unoserver==/ {print $2; exit}' requirements.txt || true) | |
| if [ -n "$UNOSERVER_PIN" ]; then | |
| /usr/bin/python3 -m pip install --no-cache-dir --break-system-packages "unoserver==${UNOSERVER_PIN}" | |
| else | |
| /usr/bin/python3 -m pip install --no-cache-dir --break-system-packages unoserver | |
| fi | |
| python${{ matrix.python-version }} -m venv venv | |
| source venv/bin/activate | |
| python -m pip install --upgrade pip | |
| pip install --no-cache-dir -r ./requirements.txt | |
| pip install --no-cache-dir -r ./requirements-dev.txt | |
| - name: Check linting and types | |
| run: | | |
| source venv/bin/activate | |
| mypy . --ignore-missing-imports | |
| shell: bash | |
| - name: Run tests | |
| # Since we are using a virtual env we have to make sure we use the right python version | |
| env: | |
| LIBRE_OFFICE_PYTHON_PATH: ${{ github.workspace }}/venv/bin/python | |
| run: | | |
| export OCR_SERVICE_LOG_LEVEL=10 | |
| export OCR_SERVICE_DEBUG_MODE=True | |
| export LIBRE_OFFICE_PYTHON_PATH=/usr/bin/python3.12 | |
| export OCR_TMP_DIR=${{ github.workspace }}/tmp | |
| source venv/bin/activate | |
| python -m unittest discover -s ocr_service/tests -p 'test_process.py' | |
| shell: bash |