Skip to content

Commit 5a8d542

Browse files
Test doc extraction
1 parent 9d78d2c commit 5a8d542

File tree

1 file changed

+45
-2
lines changed

1 file changed

+45
-2
lines changed

cardinal_pythonlib/tests/extract_text_tests.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,34 @@
2626
"""
2727

2828
import os
29+
import subprocess
2930
from tempfile import TemporaryDirectory, NamedTemporaryFile
30-
from unittest import TestCase
31+
from unittest import mock, TestCase
3132

32-
from cardinal_pythonlib.extract_text import document_to_text
33+
from cardinal_pythonlib.extract_text import (
34+
document_to_text,
35+
TextProcessingConfig,
36+
update_external_tools,
37+
)
3338

3439

3540
class DocumentToTextTests(TestCase):
41+
def setUp(self) -> None:
42+
update_external_tools(
43+
{
44+
"antiword": "/path/to/antiword",
45+
}
46+
)
47+
48+
self.config = TextProcessingConfig()
49+
50+
mock_decode = mock.Mock(return_value="")
51+
mock_stdout = mock.Mock(decode=mock_decode)
52+
mock_communicate = mock.Mock(return_value=(mock_stdout, None))
53+
self.mock_popen = mock.Mock(
54+
return_value=mock.Mock(communicate=mock_communicate)
55+
)
56+
3657
def test_raises_when_no_filename_or_blob(self) -> None:
3758
with self.assertRaises(ValueError) as cm:
3859
document_to_text()
@@ -68,3 +89,25 @@ def test_csv_converted(self) -> None:
6889
text = document_to_text(temp_file.name)
6990

7091
self.assertEqual(text, content)
92+
93+
def test_doc_converted_with_antiword(self) -> None:
94+
with mock.patch.multiple(
95+
"cardinal_pythonlib.extract_text.subprocess",
96+
Popen=self.mock_popen,
97+
):
98+
with NamedTemporaryFile(suffix=".doc", delete=False) as temp_file:
99+
temp_file.close()
100+
document_to_text(temp_file.name)
101+
102+
expected_calls = [
103+
mock.call(
104+
(
105+
"/path/to/antiword",
106+
"-w",
107+
str(self.config.width),
108+
temp_file.name,
109+
),
110+
stdout=subprocess.PIPE,
111+
),
112+
]
113+
self.mock_popen.assert_has_calls(expected_calls)

0 commit comments

Comments
 (0)