diff --git a/pyproject.toml b/pyproject.toml index 330f0c57..375254c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,6 @@ dependencies = [ "markdownify>=0.14.1", # HTML to Markdown conversion for News items "pymupdf>=1.26.6", "pymupdf4llm>=0.2.2", - "pymupdf-layout>=1.26.6", "openai>=2.8.1", "dynaconf>=3.2.13,<4.0", ] diff --git a/tools/parse-doc.py b/tools/parse-doc.py deleted file mode 100644 index 78fd987e..00000000 --- a/tools/parse-doc.py +++ /dev/null @@ -1,41 +0,0 @@ -import logging -import pathlib - -import anyio -import pymupdf -import pymupdf.layout - -from nextcloud_mcp_server.client import NextcloudClient - -pymupdf.layout.activate() -import pymupdf4llm # noqa: E402 - -client = NextcloudClient.from_env() -logger = logging.getLogger(__name__) - -TMP_DIR = pathlib.Path("/tmp/tmp-images") -TMP_DIR.mkdir(exist_ok=True, parents=True) - - -async def print_markdown(filename): - content, _ = await client.webdav.read_file(filename) - doc = pymupdf.open("pdf", content) - md_text = pymupdf4llm.to_markdown(doc, write_images=True, image_path=str(TMP_DIR)) - print(md_text) - - -async def run1(): - response = await client.webdav.find_by_type("application/pdf") - # print(response) - for file in response: - await print_markdown(file["path"]) - - -async def run(): - tags = await client.tags.get_all_tags() - print(tags) - - -if __name__ == "__main__": - logging.basicConfig(level="INFO") - anyio.run(run) diff --git a/uv.lock b/uv.lock index 67b71d70..c5a7e1d8 100644 --- a/uv.lock +++ b/uv.lock @@ -2084,15 +2084,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" }, ] -[[package]] -name = "networkx" -version = "3.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e8/fc/7b6fd4d22c8c4dc5704430140d8b3f520531d4fe7328b8f8d03f5a7950e8/networkx-3.6.tar.gz", hash = "sha256:285276002ad1f7f7da0f7b42f004bcba70d381e936559166363707fdad3d72ad", size = 2511464, upload-time = "2025-11-24T03:03:47.158Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/07/c7/d64168da60332c17d24c0d2f08bdf3987e8d1ae9d84b5bbd0eec2eb26a55/networkx-3.6-py3-none-any.whl", hash = "sha256:cdb395b105806062473d3be36458d8f1459a4e4b98e236a66c3a48996e07684f", size = 2063713, upload-time = "2025-11-24T03:03:45.21Z" }, -] - [[package]] name = "nextcloud-mcp-server" version = "0.73.0" @@ -2125,7 +2116,6 @@ dependencies = [ { name = "pydantic" }, { name = "pyjwt", extra = ["crypto"] }, { name = "pymupdf" }, - { name = "pymupdf-layout" }, { name = "pymupdf4llm" }, { name = "python-json-logger" }, { name = "pythonvcard4" }, @@ -2179,7 +2169,6 @@ requires-dist = [ { name = "pydantic", specifier = ">=2.11.4" }, { name = "pyjwt", extras = ["crypto"], specifier = ">=2.8.0" }, { name = "pymupdf", specifier = ">=1.26.6" }, - { name = "pymupdf-layout", specifier = ">=1.26.6" }, { name = "pymupdf4llm", specifier = ">=0.2.2" }, { name = "python-json-logger", specifier = ">=3.2.0" }, { name = "pythonvcard4", specifier = ">=0.2.0" }, @@ -3233,25 +3222,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f9/e8/989f4eaa369c7166dc24f0eaa3023f13788c40ff1b96701f7047421554a8/pymupdf-1.26.6-cp310-abi3-win_amd64.whl", hash = "sha256:ce02ca96ed0d1acfd00331a4d41a34c98584d034155b06fd4ec0f051718de7ba", size = 18405680, upload-time = "2025-11-05T14:34:48.672Z" }, ] -[[package]] -name = "pymupdf-layout" -version = "1.26.6" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "networkx" }, - { name = "numpy" }, - { name = "onnxruntime" }, - { name = "pymupdf" }, - { name = "pyyaml" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/70/86/31f8d05b36ebf43cca88d5c6415de46eb748e487b618a589671a610be8c8/pymupdf_layout-1.26.6-cp310-abi3-macosx_10_9_x86_64.whl", hash = "sha256:d632f83208db8b24600eb8ac54d3135fab6ab1f251a38fa6061e7470e81b9481", size = 12727222, upload-time = "2025-11-05T14:35:44.367Z" }, - { url = "https://files.pythonhosted.org/packages/ff/d3/0e52d7d1e2f975843f5354ac3b210a98471b690105efc332d3c285bd794b/pymupdf_layout-1.26.6-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:f1d45f72ec08ef7f644928487e7a067df6df63172d682d0bb05158896d0d9c71", size = 12725266, upload-time = "2025-11-05T14:36:50.727Z" }, - { url = "https://files.pythonhosted.org/packages/ae/49/ad1a5edccc45477493d6a53a41df7620d6147febb897c3dd8354f413e154/pymupdf_layout-1.26.6-cp310-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0561b9485a6ac1a40bb1e2ec7a1648aa64e4be56dab2f39182b11a69e3e43024", size = 12732580, upload-time = "2025-11-06T11:04:09.065Z" }, - { url = "https://files.pythonhosted.org/packages/a7/bd/3e049b359dd0c3a101ae915484b87ff73bfdedfb24a924e0a8e6783b33f3/pymupdf_layout-1.26.6-cp310-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:ee8e2bfed12d4b6421b27a1f89837ac09d8bc3f783f79670db397ec24614bf3d", size = 12732539, upload-time = "2025-11-05T14:38:01.244Z" }, - { url = "https://files.pythonhosted.org/packages/f8/7a/69078bf16669f8361360321ea6bede4cbfede35bf3f4ca5842a7c2387825/pymupdf_layout-1.26.6-cp310-abi3-win_amd64.whl", hash = "sha256:2305aac24fd6e12217afaaea8ec95be297be9b250b6077a3f4e92f7f9beeaf92", size = 12734904, upload-time = "2025-11-05T14:39:05.83Z" }, -] - [[package]] name = "pymupdf4llm" version = "0.2.7"