From 9220c00eb3a16b5575d43f13f6096fb77c05c992 Mon Sep 17 00:00:00 2001 From: dpunj Date: Mon, 25 May 2026 15:31:42 -0500 Subject: [PATCH 1/8] feat: reboot weft as bun ai-native reader --- .gitignore | 3 + README.md | 73 +-- bun.lock | 78 +++ docs/WEFT_REBOOT.md | 180 +++++++ docs/shaped/ai-native-reader-reboot.md | 453 +++++++++++++++++ package.json | 27 + src/cli.ts | 36 ++ src/document.ts | 273 ++++++++++ src/reader.ts | 236 +++++++++ src/web.ts | 669 +++++++++++++++++++++++++ tsconfig.json | 15 + 11 files changed, 1996 insertions(+), 47 deletions(-) create mode 100644 bun.lock create mode 100644 docs/WEFT_REBOOT.md create mode 100644 docs/shaped/ai-native-reader-reboot.md create mode 100644 package.json create mode 100644 src/cli.ts create mode 100644 src/document.ts create mode 100644 src/reader.ts create mode 100644 src/web.ts create mode 100644 tsconfig.json diff --git a/.gitignore b/.gitignore index 1c22475..ec9b720 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ venv/ ENV/ .env +# Bun / Node +node_modules/ + # Python __pycache__/ *.py[cod] diff --git a/README.md b/README.md index eafdf8b..d0ec161 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,16 @@ # weft 🪢 -A vim-like terminal reader to chat with your books +A vim-like, AI-native terminal reader for books and documents. + +Weft starts with EPUBs today: it converts chapters into Markdown-like text, renders them in a fast Bun-powered terminal reader, and lets you navigate with keyboard-first controls. + +The next direction is bigger: EPUB/PDF/Markdown in → snappy Markdown reading surface → durable highlights/comments → AI tools that understand the book, your current location, and where they looked. -## Features +See [`docs/WEFT_REBOOT.md`](docs/WEFT_REBOOT.md) for the reboot plan. + +## Current features ### Vim-like navigation @@ -13,66 +19,39 @@ A vim-like terminal reader to chat with your books - Jump to start/end: `g`/`G` - See table of contents: `t` -### Chat with your books - -- `a` - Chat with your current text -- `s` - Generate summary -- `r` - Listen text -- `>` - Listen to the compass - -Uses [LLM](https://github.com/simonw/llm) to interface with OpenAI, Anthropic, and other providers. You can also install [plugins](https://llm.datasette.io/en/stable/other-models.html) to run local models on your machine. +### AI-native direction -## Getting started +The original Python prototype can chat, summarize, and read aloud. The Bun reboot is rebuilding that on top of a stronger document model first, so AI can operate over chapters, blocks, source spans, annotations, and search results instead of only the current page. -Clone this repo and setup & activate venv using either [uv](https://github.com/astral-sh/uv) (recommended) +## Reboot direction -```bash -uv venv -source .venv/bin/activate -``` +Weft should stay small and sharp, but grow a real document spine: -Or, standard Python tools: +- **Normalized document model** — chapters, sections, blocks, and source spans instead of raw strings +- **Stable annotations** — highlights/comments in a sidecar file that can later export to Markdown +- **Reader-native AI tools** — `toc`, `current_location`, `get_section`, `search_text`, and eventually `repl_exec` over book blocks +- **Visible AI navigation** — show the reader what the model inspected, inspired by `recrsv`'s long-document exploration +- **Recrsv-style exploration rail** — web preview includes `toc`, `search_text`, and `context_get` slices so you can watch document tools move through the book -```bash -python3 -m pip install virtualenv -python3 -m virtualenv .venv -source .venv/bin/activate -``` - -Install dependencies with: - -```bash -uv pip install -r requirements.txt # if using `uv` - faster! -# or -pip install -r requirements.txt -``` +## Getting started -Bring your keys from OpenAI (default): +Install dependencies with Bun: ```bash -llm keys set OPENAI_API_KEY +bun install ``` -Or use Anthropic's Claude: +Open the modern Markdown reader preview: ```bash -llm install llm-claude-3 -llm keys set ANTHROPIC_API_KEY -llm models default claude-3-5-sonnet-latest +bun run web path/to/book.epub +# then open http://localhost:4173 ``` -Or, install a local model and run it on your machine: +Or use the minimal terminal reader: ```bash -llm install llm-gpt4all -llm models list # shows a list of available models -llm -m orca-mini-3b-gguf2-q4_0 '3 names for a pet cow' # tests the orca model locally (and downloads it first if needed) +bun run read path/to/book.epub ``` -## Try it! - -Get a book from [Project Gutenberg](https://www.gutenberg.org/) and try it out: - -```bash -uv run reader.py path/to/book.epub -``` +For now, the original Python prototype remains in `reader.py` as a reference implementation for chat/TTS experiments. The reboot path is Bun + TypeScript under `src/`. diff --git a/bun.lock b/bun.lock new file mode 100644 index 0000000..15d9665 --- /dev/null +++ b/bun.lock @@ -0,0 +1,78 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "weft", + "dependencies": { + "fast-xml-parser": "^5.3.2", + "jszip": "^3.10.1", + "marked": "^18.0.4", + "turndown": "^7.2.2", + }, + "devDependencies": { + "@types/bun": "^1.3.14", + "@types/node": "^24.10.1", + "@types/turndown": "^5.0.5", + "typescript": "^5.9.3", + }, + }, + }, + "packages": { + "@mixmark-io/domino": ["@mixmark-io/domino@2.2.0", "", {}, "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw=="], + + "@nodable/entities": ["@nodable/entities@2.1.0", "", {}, "sha512-nyT7T3nbMyBI/lvr6L5TyWbFJAI9FTgVRakNoBqCD+PmID8DzFrrNdLLtHMwMszOtqZa8PAOV24ZqDnQrhQINA=="], + + "@types/bun": ["@types/bun@1.3.14", "", { "dependencies": { "bun-types": "1.3.14" } }, "sha512-h1hFqFVcvAvD9j9K7ZW7vd82aSA+rTdznZa+5bwvCwqSB1jmmfLcbIWhOLx1/+boy/xmjgCs/OMUL8hRJSmnPw=="], + + "@types/node": ["@types/node@24.12.4", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GUUEShf+PBCGW2KaXwcIt3Yk+e3pkKwWKb9GSyM9WQVE+ep2jzmHdGsHzu4wgcZy5fN9FBdVzjpBQsYlpfpgLA=="], + + "@types/turndown": ["@types/turndown@5.0.6", "", {}, "sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg=="], + + "bun-types": ["bun-types@1.3.14", "", { "dependencies": { "@types/node": "*" } }, "sha512-4N0ig0fEomHt5R0KCFWjovxow98rIoRwKolrYdCcknNwMekCXRnWEUvgu5soYV8QXtVsrUD8B95MBOZGPvr6KQ=="], + + "core-util-is": ["core-util-is@1.0.3", "", {}, "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ=="], + + "fast-xml-builder": ["fast-xml-builder@1.2.0", "", { "dependencies": { "path-expression-matcher": "^1.5.0", "xml-naming": "^0.1.0" } }, "sha512-00aAWieqff+ZJhsXA4g1g7M8k+7AYoMUUHF+/zFb5U6Uv/P0Vl4QZo84/IcufzYalLuEj9928bXN9PbbFzMF0Q=="], + + "fast-xml-parser": ["fast-xml-parser@5.8.0", "", { "dependencies": { "@nodable/entities": "^2.1.0", "fast-xml-builder": "^1.2.0", "path-expression-matcher": "^1.5.0", "strnum": "^2.3.0", "xml-naming": "^0.1.0" }, "bin": { "fxparser": "src/cli/cli.js" } }, "sha512-6bIM7fsJxeo3uXv7OncQYsBAMPJ7V16Slahl/6M98C/i2q+vB1+4a0MtrvYwDFEUrwDSbAmeLDRXsOBwrL7yAg=="], + + "immediate": ["immediate@3.0.6", "", {}, "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ=="], + + "inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="], + + "isarray": ["isarray@1.0.0", "", {}, "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ=="], + + "jszip": ["jszip@3.10.1", "", { "dependencies": { "lie": "~3.3.0", "pako": "~1.0.2", "readable-stream": "~2.3.6", "setimmediate": "^1.0.5" } }, "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g=="], + + "lie": ["lie@3.3.0", "", { "dependencies": { "immediate": "~3.0.5" } }, "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ=="], + + "marked": ["marked@18.0.4", "", { "bin": { "marked": "bin/marked.js" } }, "sha512-c/BTaKzg0G6ezQx97DAkYU7k0HM6ys0FqYeKBL6hlBByZwy+ycA1+f0vDdjMHKKeEjdgkx0GOv9Il6D+85cOqA=="], + + "pako": ["pako@1.0.11", "", {}, "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="], + + "path-expression-matcher": ["path-expression-matcher@1.5.0", "", {}, "sha512-cbrerZV+6rvdQrrD+iGMcZFEiiSrbv9Tfdkvnusy6y0x0GKBXREFg/Y65GhIfm0tnLntThhzCnfKwp1WRjeCyQ=="], + + "process-nextick-args": ["process-nextick-args@2.0.1", "", {}, "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag=="], + + "readable-stream": ["readable-stream@2.3.8", "", { "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.3", "isarray": "~1.0.0", "process-nextick-args": "~2.0.0", "safe-buffer": "~5.1.1", "string_decoder": "~1.1.1", "util-deprecate": "~1.0.1" } }, "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA=="], + + "safe-buffer": ["safe-buffer@5.1.2", "", {}, "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="], + + "setimmediate": ["setimmediate@1.0.5", "", {}, "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA=="], + + "string_decoder": ["string_decoder@1.1.1", "", { "dependencies": { "safe-buffer": "~5.1.0" } }, "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg=="], + + "strnum": ["strnum@2.3.0", "", {}, "sha512-ums3KNd42PGyx5xaoVTO1mjU1bH3NpY4vsrVlnv9PNGqQj8wd7rJ6nEypLrJ7z5vxK5RP0yMLo6J/Gsm62DI5Q=="], + + "turndown": ["turndown@7.2.4", "", { "dependencies": { "@mixmark-io/domino": "^2.2.0" } }, "sha512-I8yFsfRzmzK0WV1pNNOA4A7y4RDfFxPRxb3t+e3ui14qSGOxGtiSP6GjeX+Y6CHb7HYaFj7ECUD7VE5kQMZWGQ=="], + + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + + "undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + + "util-deprecate": ["util-deprecate@1.0.2", "", {}, "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="], + + "xml-naming": ["xml-naming@0.1.0", "", {}, "sha512-k8KO9hrMyNk6tUWqUfkTEZbezRRpONVOzUTnc97VnCvyj6Tf9lyUR9EDAIeiVLv56jsMcoXEwjW8Kv5yPY52lw=="], + } +} diff --git a/docs/WEFT_REBOOT.md b/docs/WEFT_REBOOT.md new file mode 100644 index 0000000..d638ad7 --- /dev/null +++ b/docs/WEFT_REBOOT.md @@ -0,0 +1,180 @@ +# Weft Reboot + +For the fuller shaped plan, see [`docs/shaped/ai-native-reader-reboot.md`](shaped/ai-native-reader-reboot.md). + +Weft should stay Weft: a vim-like reader for books and documents, with AI woven into the act of reading instead of bolted on as a chat box. + +## North star + +Weft is an AI-native reader for EPUBs, PDFs, and long Markdown documents. + +It turns source files into a fast, navigable Markdown-like reading surface where readers can move with vim keys, leave durable annotations, and ask AI questions that understand the book, their current location, and their reading history. + +## What Weft already has + +The current public repo has the right seed: + +- EPUB ingestion via `ebooklib` +- HTML-to-Markdown conversion via `html2text` +- terminal rendering via Rich Markdown +- vim-like navigation across sections/pages +- current-page AI chat and summarization through `llm` +- text-to-speech and an audio “compass” guide + +The reboot keeps that spirit, but moves the forward path to **Bun + TypeScript** so the document pipeline, AI tools, and terminal UI can share one typed model. + +## Reference points + +### recrsv / RLM + +`recrsv` proves the important AI pattern: do not paste a whole long document into the model. Give the model tools to explore it strategically. + +Useful ideas to bring into Weft: + +- `context_get(offset, limit)` as a primitive read tool +- `repl_exec(code)` as a computational exploration tool +- reading timeline / dive mode for reviewing where the AI looked +- minimap-style coverage of document exploration + +For Weft, raw character offsets should evolve into reader-native anchors: chapters, sections, blocks, pages, and source spans. + +### Roughdraft + +Roughdraft is adjacent, not competitive. It is a local-first Markdown review app for collaborating with agents through comments, replies, and suggestions stored in Markdown with CriticMarkup. + +Useful ideas to bring into Weft: + +- annotations as portable text, not trapped app state +- comments/replies/suggestions as first-class collaboration objects +- CLI/agent-friendly workflows +- optional export to Roughdraft-flavored Markdown + +Weft should not become a Markdown review app. It should be a reader for books/docs whose annotations can round-trip to Markdown when useful. + +## Product pillars + +1. **Snappy Markdown reading** + - EPUB/PDF/Markdown in + - normalized Markdown-like blocks out + - fast terminal-first rendering + - vim navigation by page, section, heading, search result, and mark + +2. **Stable source mapping** + - every rendered block has a durable id + - every annotation points at a source span or block id + - PDF/EPUB quirks are hidden behind a common document model + +3. **Annotations as knowledge** + - highlights, comments, replies, and AI-suggested notes + - stored in a sidecar file first + - exportable to Markdown / Roughdraft-flavored Markdown later + +4. **AI as a reading companion** + - ask about current page, section, chapter, or whole book + - summarize since last mark + - explain selected passage + - find motifs, definitions, contradictions, references + - show where the AI looked, not just what it answered + +## Proposed document model + +```text +Document + id + title + authors[] + source_path + source_type: epub | pdf | markdown | text + sections[] + +Section + id + title + level + parent_id? + block_ids[] + source_span + +Block + id + section_id + kind: heading | paragraph | quote | list | code | table | image | page_break + markdown + plain_text + source_span +``` + +A source span is intentionally abstract: + +```text +SourceSpan + source_path + href? # EPUB item path + page? # PDF page when available + char_start? + char_end? + selector? # future: text quote selector / CFI / PDF coordinates +``` + +## AI tool surface + +Start with safe reader tools before embeddings: + +```text +current_location() +toc() +get_block(block_id) +get_section(section_id) +get_near(anchor, before, after) +search_text(query) +list_annotations(filter?) +``` + +Then add heavier tools: + +```text +summarize_range(start_anchor, end_anchor) +repl_exec(code) # over normalized blocks, inspired by recrsv +semantic_search(query) # later, optional +``` + +The model should cite block ids / sections in answers so the reader can jump there. + +## First build slice + +Keep it terminal-first and incremental, using Bun + TypeScript. + +1. Extract EPUB ingestion into a document model module. +2. Render from blocks instead of raw section strings. +3. Add block ids and section ids to the reader state. +4. Add a sidecar annotations file: + +```text +book.epub +book.weft.json +``` + +5. Add minimal commands: + +```text +m mark current block +c comment on current block +n/N next/previous annotation +``` + +6. Upgrade AI context from current page text to current location + surrounding blocks + section metadata. + +## Non-goals for the reboot slice + +- no web app yet +- no cloud sync +- no account system +- no embeddings until structure/source maps work +- no replacement of the terminal reader core +- no full Roughdraft clone + +## Naming + +This remains Weft. + +The concept is not “another AI document app.” It is a woven reading surface: source file, rendered text, reader marks, and AI exploration all tied together by durable anchors. diff --git a/docs/shaped/ai-native-reader-reboot.md b/docs/shaped/ai-native-reader-reboot.md new file mode 100644 index 0000000..ba3b1f6 --- /dev/null +++ b/docs/shaped/ai-native-reader-reboot.md @@ -0,0 +1,453 @@ +# Shape: Weft as an AI-Native Reader + +## Status + +Shaped draft. Weft has a working Bun/Web preview and the first deterministic `recrsv`-style exploration rail. This document clarifies what we want, why it matters, what exists today, and what is missing before it becomes the product we mean. + +## One-line pitch + +Turn Weft into a vim-like, AI-native reader for EPUB/PDF/Markdown where books render as snappy Markdown, annotations become durable knowledge, and AI explores the document through visible reader-native tools instead of opaque chat. + +## Why this, why now + +The original Weft already had the right spark: + +- a terminal reader for books +- vim-like motion +- Markdown-ish EPUB rendering +- AI chat/summaries over current text +- audio read-aloud / compass experiments + +The newer `recrsv` work proved a deeper interaction pattern: + +> Don’t paste the entire document into the model. Give the model tools to inspect it strategically, then show the user where it looked. + +That is more compelling for reading than a normal chat sidebar. A reader should be able to watch the AI move through the book: first the table of contents, then searches, then nearby context, then synthesis. + +Roughdraft adds another useful signal: people want local-first Markdown review surfaces that let humans and agents exchange comments and suggestions. Weft should not become Roughdraft, but it should learn from its portability: annotations should not be trapped in an app database. + +## Product thesis + +Weft is not “an ebook app with AI.” + +Weft is a woven reading surface: + +```text +source file + ↓ +structured Markdown blocks + ↓ +vim navigation + reading state + ↓ +highlights/comments/marks + ↓ +AI tools over the same anchors + ↓ +visible exploration timeline +``` + +The unique feeling should be: + +> I’m reading a book, and the AI is reading *with spatial awareness* — it knows where I am, can move around the document, can cite exact blocks, and I can see its path. + +## Target user experience + +A user opens an EPUB/PDF: + +```bash +bun run web book.epub +``` + +The browser shows a clean, fast Markdown reading surface: + +- left TOC +- centered readable text +- vim navigation +- progress meter +- current section/page + +The user types a question or motif into the exploration rail: + +```text +where does the author discuss the Panama Canal's economic impact? +``` + +Weft streams tool slices: + +```text +01 toc + Read the structure first + +02 search_text + Search for “Panama Canal economic impact” + +03 context_get + Inspect s11.b96 + +04 context_get + Inspect s13.b41 + +05 answer + Synthesis with citations back to block ids +``` + +The reader can click any slice to jump to that part of the book. + +Later, the user can mark or comment: + +```text +m mark current block +c comment on current block +n next annotation +N previous annotation +``` + +Annotations save beside the source: + +```text +book.epub +book.weft.json +``` + +## What exists now + +### Bun + TypeScript reboot + +Files: + +- `package.json` +- `tsconfig.json` +- `src/document.ts` +- `src/reader.ts` +- `src/cli.ts` +- `src/web.ts` + +Current commands: + +```bash +bun run read panama.epub +bun run web panama.epub +``` + +### Document spine + +`src/document.ts` parses EPUBs into: + +```text +WeftDocument + sections[] + blocks[] + sourceSpan +``` + +Current EPUB path: + +- unzip with `jszip` +- read OPF/package/spine with `fast-xml-parser` +- read XHTML items +- convert HTML to Markdown with `turndown` +- split into blocks +- assign ids like `s11.b96` + +### Modern Markdown web reader + +`src/web.ts` serves: + +- `GET /` +- `GET /api/document` +- `GET /api/page` + +The web UI includes: + +- dark modern reader shell +- TOC sidebar +- Markdown rendering via `marked` +- vim keys: `j/k`, `h/l`, `g/G`, `t` +- page/section progress + +### First recrsv-style exploration rail + +`src/web.ts` also serves: + +- `GET /api/rlm?q=...` + +Current deterministic tools: + +```text +toc +search_text +context_get +``` + +The UI shows: + +- tool slices +- chars read +- % explored +- context snippets +- click-to-jump to section + +This is not yet model-driven, but it establishes the substrate and the visual grammar. + +## What is missing + +### 1. Real model-driven tool loop + +Current `/api/rlm` is deterministic search. The next step is a real agent loop inspired by `recrsv`: + +- model receives tool definitions +- model chooses tools +- server executes tools +- UI streams each tool call/result +- final answer cites sections/blocks + +Initial tools: + +```text +toc() +current_location() +search_text(query) +get_block(block_id) +get_section(section_id) +context_get(anchor, before, after) +``` + +Later: + +```text +repl_exec(code) +semantic_search(query) +``` + +### 2. Streaming exploration + +Current exploration returns one JSON response. It should stream events: + +```text +meta +tool_call +tool_result +reading +answer_token +done +error +``` + +This is where the `recrsv` feeling really lands: the reader watches the model move. + +### 3. Robust EPUB cleanup and TOC + +Current EPUB rendering follows the spine and includes some Gutenberg cruft. + +Needed: + +- better title extraction +- use EPUB nav/NCX when available +- skip cover/preamble junk when reasonable +- preserve chapter hierarchy +- avoid duplicated title/header blocks + +### 4. Annotations + +Need sidecar storage: + +```json +{ + "version": 1, + "source": "book.epub", + "annotations": [ + { + "id": "a1", + "kind": "comment", + "anchor": { "blockId": "s11.b96" }, + "body": "Important canal economics point.", + "createdAt": "..." + } + ] +} +``` + +Reader commands: + +```text +m mark current block +c comment current block +n/N next/previous annotation +``` + +Future export: + +- Markdown notes +- Roughdraft-flavored Markdown / CriticMarkup + +### 5. PDF support + +PDF should enter the same document model: + +```text +PDF page → text blocks → source spans with page numbers +``` + +First version can be text extraction only. Layout/OCR can come later. + +### 6. AI answer grounding + +Answers should cite block ids and section names: + +```text +The strongest discussion is in s11.b96, where the author says... +``` + +Citations should be clickable in the UI. + +## Boundaries / non-goals for this cycle + +- No hosted service +- No account system +- No sync +- No mobile app +- No full Roughdraft clone +- No embeddings as the first solution +- No complex PDF layout reconstruction yet +- No replacing the reader with a generic chat app + +## Appetite + +One focused cycle: **1–2 weeks**. + +The goal is not to finish every document format. The goal is to prove the distinctive loop: + +```text +read → ask → visible tool exploration → grounded answer → jump/cite → annotate +``` + +## Core bet + +If Weft has stable block anchors and an evented tool loop, then AI features become straightforward and differentiated. + +Without stable anchors, AI answers are just prose. + +With stable anchors, Weft can do: + +- citations +- jump-to-source +- durable comments +- reading history +- AI exploration timelines +- exportable notes +- later semantic indexing + +## Proposed build slices + +### Slice 1 — Clean reader spine + +Goal: make EPUB navigation feel less prototype-y. + +- Use EPUB nav/NCX for TOC when available +- improve section titles +- skip obvious cover-only sections +- render block ids as hidden data attributes +- add current block tracking in the web reader + +Demo: + +> Open `panama.epub`, TOC looks like real chapters, vim nav works, current block is known. + +### Slice 2 — Sidecar annotations + +Goal: first durable user knowledge layer. + +- create `book.weft.json` +- add mark/comment actions in web UI +- display annotations in rail or margin +- next/previous annotation nav + +Demo: + +> Comment on a paragraph, reload, comment persists, click annotation jumps back. + +### Slice 3 — Recrsv event loop + +Goal: make the exploration rail model-driven. + +- define tool schemas +- add server-side loop using one provider first +- stream events to UI +- show timeline slices as they happen +- final answer cites block ids + +Demo: + +> Ask “what does the author think the canal changes economically?” and watch `toc → search_text → context_get → answer` stream live. + +### Slice 4 — REPL over blocks + +Goal: port the most insane `recrsv` power. + +- add sandboxed `repl_exec(code)` +- expose normalized blocks as `context` +- stream code/result slices +- cap runtime/output + +Demo: + +> Ask for recurring place names or motif counts and watch the model write code over the book. + +### Slice 5 — PDF import + +Goal: broaden source formats without disturbing reader model. + +- parse PDF text +- create page-backed sections/blocks +- source spans include page numbers + +Demo: + +> Open a PDF and use the same reader/exploration/annotation surface. + +## Risks + +### EPUB messiness + +EPUBs vary wildly. The parser may produce junk sections or bad headings. + +Mitigation: keep the document model simple and improve cleanup incrementally. + +### Overbuilding AI before anchors + +It is tempting to wire LLMs immediately. But without stable anchors, answers cannot cite, annotations cannot persist, and the UI cannot jump reliably. + +Mitigation: finish enough block/source mapping first. + +### Becoming Roughdraft + +Roughdraft is good, but Weft’s job is reading books/docs, not reviewing Markdown drafts. + +Mitigation: keep annotations reader-centered and source-anchored. Export to Markdown later; do not make Markdown the only source of truth. + +### Losing terminal identity + +The web preview is useful for modern Markdown rendering, but Weft should preserve a keyboard-first, local-first spirit. + +Mitigation: Bun server + local files + vim nav. Terminal CLI can remain as a secondary surface. + +## Open questions + +1. Should the primary product surface be web-local, terminal, or both? +2. Should AI provider config use env vars first, browser localStorage keys, or Simon Willison `llm`-style config? +3. Do annotations belong in `.weft.json`, SQLite, or both? +4. How much Roughdraft compatibility matters for v1 export? +5. Should `repl_exec` ship before or after real LLM `context_get` streaming? +6. Should PDF support come before annotations, or after the reader loop feels magical on EPUB? + +## Current recommendation + +Keep going in this order: + +1. Clean EPUB TOC/section quality +2. Add current block tracking and sidecar annotations +3. Add real streaming LLM tool loop +4. Add `repl_exec` +5. Add PDF + +This keeps the product coherent: first the reader knows where it is, then the user can mark it, then the AI can move through it visibly. diff --git a/package.json b/package.json new file mode 100644 index 0000000..b927a77 --- /dev/null +++ b/package.json @@ -0,0 +1,27 @@ +{ + "name": "weft", + "version": "0.2.0", + "description": "A vim-like, AI-native terminal reader for books and documents", + "type": "module", + "bin": { + "weft": "./src/cli.ts" + }, + "scripts": { + "read": "bun run src/cli.ts", + "web": "bun run src/web.ts", + "dev": "bun run src/web.ts panama.epub", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "fast-xml-parser": "^5.3.2", + "jszip": "^3.10.1", + "marked": "^18.0.4", + "turndown": "^7.2.2" + }, + "devDependencies": { + "@types/bun": "^1.3.14", + "@types/node": "^24.10.1", + "@types/turndown": "^5.0.5", + "typescript": "^5.9.3" + } +} diff --git a/src/cli.ts b/src/cli.ts new file mode 100644 index 0000000..621dfd9 --- /dev/null +++ b/src/cli.ts @@ -0,0 +1,36 @@ +#!/usr/bin/env bun +import { loadDocument } from "./document"; +import { TerminalReader } from "./reader"; + +function printHelp(): void { + console.log(`weft 🪢 + +Usage: + weft read + weft + +Keys: + h/l previous/next section + j/k next/previous page + g/G start/end + t table of contents + q quit +`); +} + +const args = process.argv.slice(2); +const command = args[0]; +const sourcePath = command === "read" ? args[1] : command; + +if (!sourcePath || sourcePath === "--help" || sourcePath === "-h") { + printHelp(); + process.exit(sourcePath ? 0 : 1); +} + +try { + const document = await loadDocument(sourcePath); + new TerminalReader(document).start(); +} catch (error) { + console.error(error instanceof Error ? error.message : error); + process.exit(1); +} diff --git a/src/document.ts b/src/document.ts new file mode 100644 index 0000000..de6a07f --- /dev/null +++ b/src/document.ts @@ -0,0 +1,273 @@ +import path from "node:path"; + +import { XMLParser } from "fast-xml-parser"; +import JSZip from "jszip"; +import TurndownService from "turndown"; + +export type SourceType = "epub" | "pdf" | "markdown" | "text"; +export type BlockKind = "heading" | "paragraph" | "quote" | "list" | "code" | "table" | "image" | "page_break"; + +export interface SourceSpan { + sourcePath: string; + href?: string; + page?: number; + charStart?: number; + charEnd?: number; + selector?: string; +} + +export interface Block { + id: string; + sectionId: string; + kind: BlockKind; + markdown: string; + plainText: string; + sourceSpan: SourceSpan; +} + +export interface Section { + id: string; + title: string; + level: number; + parentId?: string; + blocks: Block[]; + sourceSpan: SourceSpan; +} + +export interface WeftDocument { + id: string; + title: string; + authors: string[]; + language?: string; + description?: string; + sourcePath: string; + sourceType: SourceType; + sections: Section[]; +} + +interface ManifestItem { + id: string; + href: string; + mediaType?: string; +} + +const xmlParser = new XMLParser({ + ignoreAttributes: false, + attributeNamePrefix: "", + removeNSPrefix: true, +}); + +const markdownConverter = new TurndownService({ + headingStyle: "atx", + bulletListMarker: "-", + codeBlockStyle: "fenced", +}); + +markdownConverter.remove(["script", "style", "img"]); + +export async function loadDocument(sourcePath: string): Promise { + if (sourcePath.toLowerCase().endsWith(".epub")) { + return loadEpubDocument(sourcePath); + } + + throw new Error(`Unsupported document type: ${path.extname(sourcePath) || "unknown"}`); +} + +export async function loadEpubDocument(sourcePath: string): Promise { + const data = await Bun.file(sourcePath).arrayBuffer(); + const zip = await JSZip.loadAsync(data); + const opfPath = await readOpfPath(zip); + const opfText = await readZipText(zip, opfPath); + const opf = xmlParser.parse(opfText) as Record; + const packageNode = objectAt(opf, "package"); + const metadata = objectAt(packageNode, "metadata"); + const manifestItems = readManifest(packageNode); + const spineIds = readSpineIds(packageNode); + const manifestById = new Map(manifestItems.map((item) => [item.id, item])); + const opfDir = path.posix.dirname(opfPath); + const sections: Section[] = []; + + for (const idref of spineIds) { + const item = manifestById.get(idref); + if (!item || !isReadableDocumentItem(item)) continue; + + const href = normalizeZipPath(path.posix.join(opfDir, item.href)); + const html = await readZipText(zip, href); + const markdown = normalizeMarkdown(markdownConverter.turndown(html)); + if (!markdown) continue; + + const sectionId = `s${sections.length + 1}`; + const sourceSpan: SourceSpan = { sourcePath, href }; + sections.push({ + id: sectionId, + title: extractSectionTitle(markdown, item.href), + level: 1, + parentId: sections.at(-1)?.id, + blocks: blocksFromMarkdown(markdown, sectionId, sourceSpan), + sourceSpan, + }); + } + + return { + id: path.basename(sourcePath, path.extname(sourcePath)), + title: firstMetadataValue(metadata, "title") ?? path.basename(sourcePath), + authors: metadataValues(metadata, "creator"), + language: firstMetadataValue(metadata, "language"), + description: firstMetadataValue(metadata, "description"), + sourcePath, + sourceType: "epub", + sections, + }; +} + +function readManifest(packageNode: Record): ManifestItem[] { + const manifest = objectAt(packageNode, "manifest"); + return arrayAt(manifest, "item") + .map((item) => objectOrNull(item)) + .filter((item): item is Record => item !== null) + .map((item) => ({ + id: String(item.id ?? ""), + href: String(item.href ?? ""), + mediaType: typeof item["media-type"] === "string" ? item["media-type"] : undefined, + })) + .filter((item) => item.id && item.href); +} + +function readSpineIds(packageNode: Record): string[] { + const spine = objectAt(packageNode, "spine"); + return arrayAt(spine, "itemref") + .map((item) => objectOrNull(item)) + .filter((item): item is Record => item !== null) + .map((item) => String(item.idref ?? "")) + .filter(Boolean); +} + +async function readOpfPath(zip: JSZip): Promise { + const container = await readZipText(zip, "META-INF/container.xml"); + const parsed = xmlParser.parse(container) as Record; + const rootfiles = objectAt(objectAt(parsed, "container"), "rootfiles"); + const rootfile = objectOrNull(arrayAt(rootfiles, "rootfile")[0]) ?? objectAt(rootfiles, "rootfile"); + const fullPath = rootfile["full-path"]; + + if (typeof fullPath !== "string" || !fullPath) { + throw new Error("Invalid EPUB: missing OPF rootfile path."); + } + + return normalizeZipPath(fullPath); +} + +async function readZipText(zip: JSZip, filePath: string): Promise { + const file = zip.file(normalizeZipPath(filePath)); + if (!file) throw new Error(`Invalid EPUB: missing ${filePath}.`); + return file.async("text"); +} + +function blocksFromMarkdown(markdown: string, sectionId: string, sectionSpan: SourceSpan): Block[] { + const blocks: Block[] = []; + let cursor = 0; + + for (const rawBlock of markdown.split(/\n{2,}/)) { + const blockMarkdown = rawBlock.trim(); + if (!blockMarkdown) continue; + + const start = markdown.indexOf(rawBlock, cursor); + const end = start >= 0 ? start + rawBlock.length : undefined; + blocks.push({ + id: `${sectionId}.b${blocks.length + 1}`, + sectionId, + kind: blockKind(blockMarkdown), + markdown: blockMarkdown, + plainText: plainText(blockMarkdown), + sourceSpan: { + sourcePath: sectionSpan.sourcePath, + href: sectionSpan.href, + charStart: start >= 0 ? start : undefined, + charEnd: end, + }, + }); + cursor = end === undefined ? cursor : end; + } + + return blocks; +} + +function blockKind(markdown: string): BlockKind { + if (markdown.startsWith("#")) return "heading"; + if (markdown.startsWith(">")) return "quote"; + if (markdown.startsWith("```")) return "code"; + if (/^[-*+]\s/.test(markdown) || /^\d+[.)]\s/.test(markdown)) return "list"; + if (markdown.includes("| ---")) return "table"; + return "paragraph"; +} + +function extractSectionTitle(markdown: string, fallbackHref: string): string { + for (const line of markdown.split("\n").slice(0, 8)) { + if (!line.startsWith("#")) continue; + const title = line.replace(/^#+\s*/, "").trim(); + if (title && !/^wrap\d+$/i.test(title)) return title; + } + + const firstText = markdown + .split("\n") + .map((line) => line.trim()) + .find((line) => line && !line.startsWith("![") && !/^wrap\d+$/i.test(line)); + + return firstText ?? path.posix.basename(fallbackHref, path.posix.extname(fallbackHref)).replaceAll("_", " "); +} + +function isReadableDocumentItem(item: ManifestItem): boolean { + return !item.mediaType || ["application/xhtml+xml", "text/html"].includes(item.mediaType); +} + +function metadataValues(metadata: Record, key: string): string[] { + return arrayAt(metadata, key) + .map((value) => { + if (typeof value === "string") return value; + if (typeof value === "number") return String(value); + if (typeof value === "object" && value && "#text" in value) return String(value["#text"]); + return ""; + }) + .map((value) => value.trim()) + .filter(Boolean); +} + +function firstMetadataValue(metadata: Record, key: string): string | undefined { + return metadataValues(metadata, key)[0]; +} + +function normalizeMarkdown(markdown: string): string { + return markdown + .replace(/^!\[[^\]]*\]\([^)]*\)\s*$/gm, "") + .replace(/\n{3,}/g, "\n\n") + .trim(); +} + +function plainText(markdown: string): string { + return markdown + .replace(/```[\s\S]*?```/g, " ") + .replace(/!\[[^\]]*\]\([^)]*\)/g, " ") + .replace(/[#>*_`~\[\]()]/g, " ") + .replace(/\s+/g, " ") + .trim(); +} + +function normalizeZipPath(filePath: string): string { + return filePath.replace(/^\.\//, ""); +} + +function objectAt(source: Record, key: string): Record { + const value = source[key]; + if (!value || typeof value !== "object" || Array.isArray(value)) return {}; + return value as Record; +} + +function objectOrNull(value: unknown): Record | null { + if (!value || typeof value !== "object" || Array.isArray(value)) return null; + return value as Record; +} + +function arrayAt(source: Record, key: string): unknown[] { + const value = source[key]; + if (Array.isArray(value)) return value; + return value === undefined || value === null ? [] : [value]; +} diff --git a/src/reader.ts b/src/reader.ts new file mode 100644 index 0000000..a033144 --- /dev/null +++ b/src/reader.ts @@ -0,0 +1,236 @@ +import readline from "node:readline"; + +import type { Section, WeftDocument } from "./document"; + +interface ReaderState { + sectionIndex: number; + pageIndex: number; + showToc: boolean; +} + +export class TerminalReader { + private state: ReaderState = { sectionIndex: 0, pageIndex: 0, showToc: false }; + + constructor(private readonly document: WeftDocument) {} + + start(): void { + if (this.document.sections.length === 0) { + console.error("No readable sections found."); + return; + } + + readline.emitKeypressEvents(process.stdin); + if (process.stdin.isTTY) process.stdin.setRawMode(true); + process.stdin.resume(); + process.stdin.on("keypress", (_, key) => this.handleKey(key)); + process.stdout.on("resize", () => this.render()); + this.render(); + } + + private handleKey(key: readline.Key): void { + if (key.ctrl && key.name === "c") this.quit(); + + switch (key.name) { + case "q": + this.quit(); + break; + case "h": + case "left": + this.moveSection(-1); + break; + case "l": + case "right": + this.moveSection(1); + break; + case "j": + case "down": + this.movePage(1); + break; + case "k": + case "up": + this.movePage(-1); + break; + case "g": + this.jumpStart(); + break; + case "G": + this.jumpEnd(); + break; + case "t": + this.state.showToc = !this.state.showToc; + break; + case "escape": + this.state.showToc = false; + break; + } + + this.render(); + } + + private render(): void { + clearScreen(); + if (this.state.showToc) { + this.renderToc(); + return; + } + + const section = this.currentSection(); + const pages = this.pagesFor(section); + this.state.pageIndex = clamp(this.state.pageIndex, 0, pages.length - 1); + const page = pages[this.state.pageIndex] ?? ""; + const sectionProgress = percent(this.state.pageIndex + 1, pages.length); + const documentProgress = percent(this.state.sectionIndex + 1, this.document.sections.length); + + writeDim(`weft 🪢 ${this.document.title}`); + process.stdout.write("\n"); + writeStrong(section.title); + process.stdout.write( + `\n${dim(`section ${this.state.sectionIndex + 1}/${this.document.sections.length}`)} ` + + `${dim(`page ${this.state.pageIndex + 1}/${pages.length}`)} ` + + `${dim(`section ${sectionProgress}% · book ${documentProgress}%`)}\n\n`, + ); + process.stdout.write(page); + process.stdout.write("\n\n"); + writeDim("h/l sections · j/k pages · g/G ends · t toc · q quit"); + } + + private renderToc(): void { + writeDim(`weft 🪢 ${this.document.title}`); + process.stdout.write("\n\n"); + writeStrong("Table of contents"); + process.stdout.write("\n\n"); + + for (const [index, section] of this.document.sections.entries()) { + const marker = index === this.state.sectionIndex ? "→" : " "; + const label = `${marker} ${String(index + 1).padStart(2, " ")}. ${section.title}`; + process.stdout.write(index === this.state.sectionIndex ? cyan(label) : label); + process.stdout.write("\n"); + } + + process.stdout.write("\n"); + writeDim("t/Esc close toc · q quit"); + } + + private moveSection(delta: number): void { + this.state.sectionIndex = clamp( + this.state.sectionIndex + delta, + 0, + this.document.sections.length - 1, + ); + this.state.pageIndex = 0; + this.state.showToc = false; + } + + private movePage(delta: number): void { + const pages = this.pagesFor(this.currentSection()); + const nextPage = this.state.pageIndex + delta; + + if (nextPage >= 0 && nextPage < pages.length) { + this.state.pageIndex = nextPage; + return; + } + + if (delta > 0 && this.state.sectionIndex < this.document.sections.length - 1) { + this.state.sectionIndex += 1; + this.state.pageIndex = 0; + } + + if (delta < 0 && this.state.sectionIndex > 0) { + this.state.sectionIndex -= 1; + this.state.pageIndex = this.pagesFor(this.currentSection()).length - 1; + } + } + + private jumpStart(): void { + this.state.sectionIndex = 0; + this.state.pageIndex = 0; + this.state.showToc = false; + } + + private jumpEnd(): void { + this.state.sectionIndex = this.document.sections.length - 1; + this.state.pageIndex = this.pagesFor(this.currentSection()).length - 1; + this.state.showToc = false; + } + + private currentSection(): Section { + return this.document.sections[this.state.sectionIndex] ?? this.document.sections[0]!; + } + + private pagesFor(section: Section): string[] { + const rows = Math.max(10, process.stdout.rows || 30); + const columns = Math.max(40, process.stdout.columns || 100); + const contentRows = rows - 7; + const contentWidth = columns - 2; + const lines = section.blocks.flatMap((block) => [ + ...wrapMarkdown(block.markdown, contentWidth), + "", + ]); + const pages: string[] = []; + + for (let index = 0; index < lines.length; index += contentRows) { + pages.push(lines.slice(index, index + contentRows).join("\n")); + } + + return pages.length ? pages : ["[empty section]"]; + } + + private quit(): never { + clearScreen(); + if (process.stdin.isTTY) process.stdin.setRawMode(false); + process.stdin.pause(); + process.exit(0); + } +} + +function wrapMarkdown(markdown: string, width: number): string[] { + return markdown.split("\n").flatMap((line) => wrapLine(line, width)); +} + +function wrapLine(line: string, width: number): string[] { + if (line.length <= width) return [line]; + const words = line.split(/\s+/); + const lines: string[] = []; + let current = ""; + + for (const word of words) { + if (`${current} ${word}`.trim().length > width) { + if (current) lines.push(current); + current = word; + } else { + current = `${current} ${word}`.trim(); + } + } + + if (current) lines.push(current); + return lines; +} + +function clearScreen(): void { + process.stdout.write("\x1b[2J\x1b[H"); +} + +function percent(current: number, total: number): number { + if (total <= 0) return 0; + return Math.round((current / total) * 100); +} + +function clamp(value: number, min: number, max: number): number { + return Math.max(min, Math.min(value, max)); +} + +function writeStrong(value: string): void { + process.stdout.write(`\x1b[1m${value}\x1b[0m`); +} + +function writeDim(value: string): void { + process.stdout.write(dim(value)); +} + +function dim(value: string): string { + return `\x1b[2m${value}\x1b[0m`; +} + +function cyan(value: string): string { + return `\x1b[36m${value}\x1b[0m`; +} diff --git a/src/web.ts b/src/web.ts new file mode 100644 index 0000000..d1a533e --- /dev/null +++ b/src/web.ts @@ -0,0 +1,669 @@ +#!/usr/bin/env bun +import { marked } from "marked"; + +import type { Block, Section } from "./document"; +import { loadDocument } from "./document"; + +interface PagePayload { + title: string; + sectionTitle: string; + sectionIndex: number; + sectionCount: number; + pageIndex: number; + pageCount: number; + html: string; + blockIds: string[]; +} + +const sourcePath = process.argv[2] ?? "panama.epub"; +const port = Number(process.env.PORT ?? 4173); +const document = await loadDocument(sourcePath); + +marked.use({ + gfm: true, + breaks: false, +}); + +Bun.serve({ + port, + async fetch(req) { + const url = new URL(req.url); + + if (url.pathname === "/") return htmlResponse(indexHtml()); + if (url.pathname === "/api/document") return jsonResponse(documentSummary()); + if (url.pathname === "/api/page") return jsonResponse(pageFromUrl(url)); + if (url.pathname === "/api/rlm") return jsonResponse(exploreQuery(url)); + + return new Response("Not found", { status: 404 }); + }, +}); + +console.log(`weft web preview → http://localhost:${port}`); +console.log(`reading ${sourcePath}`); + +function documentSummary() { + return { + title: document.title, + authors: document.authors, + sourcePath: document.sourcePath, + sections: document.sections.map((section, index) => ({ + id: section.id, + title: section.title, + index, + blockCount: section.blocks.length, + })), + }; +} + +function pageFromUrl(url: URL): PagePayload { + const sectionIndex = clamp( + Number(url.searchParams.get("section") ?? 0), + 0, + document.sections.length - 1, + ); + const section = document.sections[sectionIndex] ?? document.sections[0]; + if (!section) throw new Error("Document has no sections."); + + const pages = pagesFor(section); + const pageIndex = clamp(Number(url.searchParams.get("page") ?? 0), 0, pages.length - 1); + const blocks = pages[pageIndex] ?? []; + + return { + title: document.title, + sectionTitle: section.title, + sectionIndex, + sectionCount: document.sections.length, + pageIndex, + pageCount: pages.length, + html: renderBlocks(blocks), + blockIds: blocks.map((block) => block.id), + }; +} + + +function exploreQuery(url: URL) { + const query = (url.searchParams.get("q") ?? "").trim(); + const terms = query.toLowerCase().split(/\s+/).filter((term) => term.length > 2); + const blocks = allBlocks(); + const hits = terms.length === 0 + ? [] + : blocks + .map((entry) => ({ ...entry, score: scoreBlock(entry.block, terms) })) + .filter((entry) => entry.score > 0) + .sort((a, b) => b.score - a.score) + .slice(0, 8); + + const slices = [ + { + id: "toc", + tool: "toc", + title: "Read the structure first", + content: document.sections.slice(0, 18).map((section, index) => `${index + 1}. ${section.title}`).join("\n"), + sectionIndex: 0, + chars: 0, + }, + { + id: "search", + tool: "search_text", + title: query ? `Search for “${query}”` : "Awaiting a query", + content: hits.length + ? hits.map((hit) => `${hit.block.id} · ${document.sections[hit.sectionIndex]?.title ?? "section"}\n${snippet(hit.block.plainText, terms)}`).join("\n\n") + : "No matching blocks yet. Try a motif, character, phrase, or question.", + sectionIndex: hits[0]?.sectionIndex ?? 0, + chars: hits.reduce((sum, hit) => sum + snippet(hit.block.plainText, terms).length, 0), + }, + ...hits.slice(0, 4).map((hit, index) => ({ + id: `context-${index}`, + tool: "context_get", + title: `Inspect ${hit.block.id}`, + content: contextAround(hit.sectionIndex, hit.blockIndex), + sectionIndex: hit.sectionIndex, + blockId: hit.block.id, + chars: contextAround(hit.sectionIndex, hit.blockIndex).length, + })), + ]; + + const charsRead = slices.reduce((sum, slice) => sum + slice.chars, 0); + const totalChars = blocks.reduce((sum, entry) => sum + entry.block.plainText.length, 0); + + return { + query, + totalChars, + charsRead, + coverage: totalChars > 0 ? charsRead / totalChars : 0, + slices, + }; +} + +function allBlocks() { + return document.sections.flatMap((section, sectionIndex) => + section.blocks.map((block, blockIndex) => ({ section, sectionIndex, block, blockIndex })), + ); +} + +function scoreBlock(block: Block, terms: string[]): number { + const text = block.plainText.toLowerCase(); + return terms.reduce((score, term) => { + const matches = text.matchAll(new RegExp(escapeRegExp(term), "g")); + return score + Array.from(matches).length; + }, 0); +} + +function snippet(text: string, terms: string[]): string { + const lower = text.toLowerCase(); + const firstIndex = terms + .map((term) => lower.indexOf(term)) + .filter((index) => index >= 0) + .sort((a, b) => a - b)[0] ?? 0; + const start = Math.max(0, firstIndex - 160); + const end = Math.min(text.length, firstIndex + 420); + const prefix = start > 0 ? "…" : ""; + const suffix = end < text.length ? "…" : ""; + return `${prefix}${text.slice(start, end)}${suffix}`; +} + +function contextAround(sectionIndex: number, blockIndex: number): string { + const section = document.sections[sectionIndex]; + if (!section) return ""; + return section.blocks + .slice(Math.max(0, blockIndex - 2), blockIndex + 3) + .map((block) => `[${block.id}] ${block.plainText.slice(0, 900)}${block.plainText.length > 900 ? "…" : ""}`) + .join("\n\n"); +} + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function pagesFor(section: Section): Block[][] { + const pages: Block[][] = []; + let current: Block[] = []; + let chars = 0; + + for (const block of section.blocks) { + const nextChars = chars + block.markdown.length; + if (current.length > 0 && nextChars > 5200) { + pages.push(current); + current = []; + chars = 0; + } + current.push(block); + chars += block.markdown.length; + } + + if (current.length > 0) pages.push(current); + return pages.length ? pages : [[]]; +} + +function renderBlocks(blocks: Block[]): string { + const markdown = blocks.map((block) => block.markdown).join("\n\n"); + return marked.parse(markdown, { async: false }); +} + +function jsonResponse(payload: unknown): Response { + return Response.json(payload, { + headers: { "Access-Control-Allow-Origin": "*" }, + }); +} + +function htmlResponse(html: string): Response { + return new Response(html, { + headers: { "Content-Type": "text/html; charset=utf-8" }, + }); +} + +function clamp(value: number, min: number, max: number): number { + if (Number.isNaN(value)) return min; + return Math.max(min, Math.min(value, max)); +} + +function indexHtml(): string { + return ` + + + + + Weft + + + +
+ +
+
+
+

weft reboot preview

+

Loading…

+
+
+ +
+
+
+
+
+ +
+

current section

+

+
+ +
+
+
+
+
+
+

recrsv graft

+

AI exploration rail

+
+
+ + +
+
+
context tools waiting
+
+
+
+ j/k page + h/l section + g/G ends + t TOC + explore rail = recrsv-style context tools +
+
+
+ + +`; +} + +function clientScript(): string { + return String.raw` +const state = { section: 0, page: 0, tocOpen: true, summary: null }; +const els = { + toc: document.getElementById("toc"), + title: document.getElementById("book-title"), + sectionTitle: document.getElementById("section-title"), + position: document.getElementById("position"), + meter: document.getElementById("meter"), + page: document.getElementById("page"), + prevSection: document.getElementById("prev-section"), + nextSection: document.getElementById("next-section"), + rlmForm: document.getElementById("rlm-form"), + rlmQuery: document.getElementById("rlm-query"), + rlmStats: document.getElementById("rlm-stats"), + rlmTimeline: document.getElementById("rlm-timeline"), +}; + +async function boot() { + state.summary = await fetchJson("/api/document"); + els.title.textContent = state.summary.title; + renderToc(); + await renderPage(); +} + +async function renderPage() { + const page = await fetchJson("/api/page?section=" + state.section + "&page=" + state.page); + state.section = page.sectionIndex; + state.page = page.pageIndex; + els.sectionTitle.textContent = page.sectionTitle; + els.position.textContent = "section " + (page.sectionIndex + 1) + "/" + page.sectionCount + " · page " + (page.pageIndex + 1) + "/" + page.pageCount; + els.meter.style.width = Math.round(((page.sectionIndex + page.pageIndex / page.pageCount) / page.sectionCount) * 100) + "%"; + els.page.innerHTML = page.html; + document.querySelectorAll("#toc button").forEach((button) => { + button.classList.toggle("active", Number(button.dataset.index) === state.section); + }); + window.scrollTo({ top: 0, behavior: "instant" }); +} + +function renderToc() { + const items = state.summary.sections.map((section) => { + return ''; + }).join(""); + + els.toc.innerHTML = '
Contents
' + items + '
'; + els.toc.querySelectorAll("button").forEach((button) => { + button.addEventListener("click", async () => { + state.section = Number(button.dataset.index); + state.page = 0; + await renderPage(); + }); + }); +} + +async function movePage(delta) { + state.page += delta; + const before = state.page; + await renderPage(); + if (before === state.page && delta > 0) await moveSection(1); + if (before === state.page && delta < 0) await moveSection(-1, true); +} + +async function moveSection(delta, end = false) { + state.section = Math.max(0, Math.min(state.summary.sections.length - 1, state.section + delta)); + state.page = end ? 9999 : 0; + await renderPage(); +} + +function toggleToc() { + state.tocOpen = !state.tocOpen; + document.body.classList.toggle("toc-closed", !state.tocOpen); +} + +async function fetchJson(path) { + const response = await fetch(path); + if (!response.ok) throw new Error(await response.text()); + return response.json(); +} + +function escapeHtml(value) { + return value.replace(/[&<>\"]/g, (char) => ({ "&": "&", "<": "<", ">": ">", '\"': """ }[char])); +} + +els.prevSection.addEventListener("click", () => moveSection(-1)); +els.nextSection.addEventListener("click", () => moveSection(1)); +window.addEventListener("keydown", (event) => { + if (["INPUT", "TEXTAREA"].includes(event.target?.tagName)) return; + if (event.key === "j") { event.preventDefault(); movePage(1); } + if (event.key === "k") { event.preventDefault(); movePage(-1); } + if (event.key === "h") { event.preventDefault(); moveSection(-1); } + if (event.key === "l") { event.preventDefault(); moveSection(1); } + if (event.key === "g") { event.preventDefault(); state.section = 0; state.page = 0; renderPage(); } + if (event.key === "G") { event.preventDefault(); state.section = state.summary.sections.length - 1; state.page = 9999; renderPage(); } + if (event.key === "t") { event.preventDefault(); toggleToc(); } +}); + +boot().catch((error) => { + els.page.innerHTML = "
" + escapeHtml(error.stack || error.message) + "
"; +}); +`; +} + +function styles(): string { + return String.raw` +:root { + color-scheme: dark; + --bg: #07090d; + --panel: rgba(255,255,255,0.055); + --panel-strong: rgba(255,255,255,0.09); + --border: rgba(255,255,255,0.12); + --text: #eef3ff; + --muted: #8d99ae; + --dim: #596579; + --cyan: #77e7ff; + --blue: #7aa7ff; + --green: #a7f3d0; + --gold: #f6d365; + --max-reader: 880px; +} +* { box-sizing: border-box; } +body { + margin: 0; + min-height: 100vh; + background: + radial-gradient(circle at top left, rgba(119,231,255,0.16), transparent 34rem), + radial-gradient(circle at bottom right, rgba(122,167,255,0.13), transparent 38rem), + linear-gradient(135deg, #07090d 0%, #0c111b 48%, #071018 100%); + color: var(--text); + font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; +} +#app { + display: grid; + grid-template-columns: minmax(230px, 320px) minmax(0, 1fr); + min-height: 100vh; +} +body.toc-closed #app { grid-template-columns: 0 minmax(0, 1fr); } +body.toc-closed #toc { transform: translateX(-100%); } +#toc { + position: sticky; + top: 0; + height: 100vh; + overflow: auto; + border-right: 1px solid var(--border); + background: rgba(3, 7, 12, 0.68); + backdrop-filter: blur(24px); + transition: transform 160ms ease; +} +.toc-title { + position: sticky; + top: 0; + padding: 1rem; + background: rgba(3, 7, 12, 0.9); + border-bottom: 1px solid var(--border); + color: var(--muted); + font-size: 0.72rem; + letter-spacing: 0.16em; + text-transform: uppercase; +} +.toc-list { padding: 0.5rem; } +#toc button { + width: 100%; + display: grid; + grid-template-columns: 2.4rem minmax(0, 1fr); + gap: 0.65rem; + border: 0; + border-radius: 0.8rem; + padding: 0.72rem 0.75rem; + background: transparent; + color: var(--muted); + text-align: left; + font: inherit; + cursor: pointer; +} +#toc button:hover, #toc button.active { + color: var(--text); + background: var(--panel-strong); +} +#toc button span { color: var(--dim); font-variant-numeric: tabular-nums; } +main { + width: min(100%, 1180px); + margin: 0 auto; + padding: 2rem clamp(1rem, 3vw, 3rem); +} +header { + display: flex; + align-items: end; + justify-content: space-between; + gap: 1.25rem; + margin-bottom: 1.25rem; +} +.eyebrow, .label { + margin: 0 0 0.35rem; + color: var(--cyan); + font-size: 0.72rem; + font-weight: 700; + letter-spacing: 0.16em; + text-transform: uppercase; +} +h1 { margin: 0; font-size: clamp(1.6rem, 3vw, 3.4rem); letter-spacing: -0.05em; } +.progress-card { + min-width: 260px; + padding: 0.85rem; + border: 1px solid var(--border); + border-radius: 1rem; + background: var(--panel); + color: var(--muted); + font-size: 0.85rem; +} +.meter { height: 0.42rem; margin-top: 0.65rem; border-radius: 999px; background: rgba(255,255,255,0.08); overflow: hidden; } +.meter i { display: block; height: 100%; width: 0%; border-radius: inherit; background: linear-gradient(90deg, var(--cyan), var(--blue)); transition: width 180ms ease; } +.reader-shell { + border: 1px solid var(--border); + border-radius: 1.6rem; + background: rgba(255,255,255,0.045); + box-shadow: 0 30px 120px rgba(0,0,0,0.35); + overflow: hidden; +} +.section-bar { + display: grid; + grid-template-columns: auto minmax(0, 1fr) auto; + gap: 1rem; + align-items: center; + padding: 1rem 1.1rem; + border-bottom: 1px solid var(--border); + background: rgba(0,0,0,0.2); +} +.section-bar h2 { margin: 0; font-size: 1.05rem; overflow: hidden; white-space: nowrap; text-overflow: ellipsis; } +.section-bar button { + width: 2.4rem; + height: 2.4rem; + border: 1px solid var(--border); + border-radius: 999px; + background: var(--panel); + color: var(--text); + cursor: pointer; +} +.section-bar button:hover { border-color: rgba(119,231,255,0.5); color: var(--cyan); } +.markdown-body { + max-width: var(--max-reader); + margin: 0 auto; + padding: clamp(1.5rem, 4vw, 4.5rem); + font-family: ui-serif, Georgia, Cambria, "Times New Roman", serif; + font-size: clamp(1.08rem, 1.5vw, 1.28rem); + line-height: 1.78; +} +.markdown-body h1, .markdown-body h2, .markdown-body h3 { + font-family: Inter, ui-sans-serif, system-ui, sans-serif; + line-height: 1.08; + letter-spacing: -0.045em; +} +.markdown-body h1 { font-size: clamp(2.1rem, 4vw, 4.2rem); } +.markdown-body h2 { font-size: clamp(1.7rem, 3vw, 2.7rem); margin-top: 2em; } +.markdown-body h3 { font-size: 1.35rem; margin-top: 1.8em; } +.markdown-body p { margin: 1.05em 0; } +.markdown-body a { color: var(--cyan); } +.markdown-body blockquote { + margin: 1.6em 0; + padding: 0.2rem 1.2rem; + border-left: 3px solid var(--cyan); + color: #c8d3e5; + background: rgba(119,231,255,0.055); + border-radius: 0 1rem 1rem 0; +} +.markdown-body code { + font-family: "SF Mono", ui-monospace, Menlo, monospace; + font-size: 0.88em; + color: var(--green); + background: rgba(0,0,0,0.32); + border: 1px solid rgba(255,255,255,0.1); + border-radius: 0.35rem; + padding: 0.1rem 0.28rem; +} +.markdown-body pre { + overflow: auto; + padding: 1rem; + border-radius: 1rem; + background: rgba(0,0,0,0.45); + border: 1px solid var(--border); +} +.markdown-body hr { border: 0; height: 1px; background: var(--border); margin: 2.5rem 0; } +.rlm-shell { + margin-top: 1rem; + border: 1px solid var(--border); + border-radius: 1.4rem; + background: rgba(0,0,0,0.24); + overflow: hidden; +} +.rlm-head { + display: grid; + grid-template-columns: minmax(0, 1fr) minmax(280px, 440px); + gap: 1rem; + align-items: end; + padding: 1rem; + border-bottom: 1px solid var(--border); +} +.rlm-head h2 { margin: 0; font-size: 1.05rem; letter-spacing: -0.03em; } +#rlm-form { display: flex; gap: 0.5rem; } +#rlm-query { + min-width: 0; + flex: 1; + border: 1px solid var(--border); + border-radius: 999px; + padding: 0.72rem 0.95rem; + background: rgba(255,255,255,0.055); + color: var(--text); + outline: none; +} +#rlm-query:focus { border-color: rgba(119,231,255,0.62); box-shadow: 0 0 0 3px rgba(119,231,255,0.08); } +#rlm-form button { + border: 1px solid rgba(119,231,255,0.38); + border-radius: 999px; + padding: 0.72rem 1rem; + background: rgba(119,231,255,0.12); + color: var(--cyan); + cursor: pointer; +} +.rlm-stats { + padding: 0.75rem 1rem; + border-bottom: 1px solid var(--border); + color: var(--muted); + font-size: 0.84rem; + font-family: ui-monospace, Menlo, monospace; +} +.rlm-timeline { + display: grid; + gap: 0.75rem; + padding: 1rem; +} +.rlm-slice { + display: grid; + grid-template-columns: 2.4rem minmax(0, 1fr); + gap: 0.85rem; + width: 100%; + border: 1px solid var(--border); + border-radius: 1rem; + padding: 0.9rem; + background: rgba(255,255,255,0.04); + color: var(--text); + text-align: left; + cursor: pointer; +} +.rlm-slice:hover { border-color: rgba(119,231,255,0.45); background: rgba(119,231,255,0.055); } +.rlm-index { color: var(--cyan); font-family: ui-monospace, Menlo, monospace; } +.rlm-slice strong { color: var(--green); font-size: 0.72rem; letter-spacing: 0.12em; text-transform: uppercase; } +.rlm-slice h3 { margin: 0.25rem 0 0.6rem; font-size: 1rem; } +.rlm-slice pre { + max-height: 12rem; + overflow: auto; + margin: 0; + white-space: pre-wrap; + color: #c8d3e5; + font: 0.82rem/1.55 ui-monospace, Menlo, monospace; +} +footer { + display: flex; + flex-wrap: wrap; + gap: 0.6rem 1rem; + padding: 1rem 0; + color: var(--muted); + font-size: 0.86rem; +} +kbd { + display: inline-flex; + min-width: 1.45rem; + justify-content: center; + border: 1px solid var(--border); + border-bottom-color: rgba(255,255,255,0.25); + border-radius: 0.4rem; + padding: 0.05rem 0.32rem; + background: var(--panel); + color: var(--text); + font-family: ui-monospace, Menlo, monospace; +} +@media (max-width: 820px) { + #app { grid-template-columns: 1fr; } + #toc { position: fixed; z-index: 10; width: min(86vw, 320px); } + body.toc-closed #toc { transform: translateX(-105%); } + header { align-items: start; flex-direction: column; } + .progress-card { min-width: 0; width: 100%; } + .markdown-body { padding: 1.25rem; } + .rlm-head { grid-template-columns: 1fr; } +} + +`; +} diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..9b88df8 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "Bundler", + "strict": true, + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "skipLibCheck": true, + "noUncheckedIndexedAccess": true, + "resolveJsonModule": true, + "types": ["bun", "node"] + }, + "include": ["src/**/*.ts"] +} From c73841da821980dd5223e92e8d813a52df16755d Mon Sep 17 00:00:00 2001 From: dpunj Date: Tue, 26 May 2026 18:30:40 -0500 Subject: [PATCH 2/8] feat: add block-level vim reader navigation --- README.md | 1 + docs/shaped/ai-native-reader-reboot.md | 14 +++- src/web.ts | 109 +++++++++++++++++++++---- 3 files changed, 107 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index d0ec161..b69647d 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ Weft should stay small and sharp, but grow a real document spine: - **Reader-native AI tools** — `toc`, `current_location`, `get_section`, `search_text`, and eventually `repl_exec` over book blocks - **Visible AI navigation** — show the reader what the model inspected, inspired by `recrsv`'s long-document exploration - **Recrsv-style exploration rail** — web preview includes `toc`, `search_text`, and `context_get` slices so you can watch document tools move through the book +- **Block-level vim motion** — `j/k` moves through actual passages/blocks, while `ctrl+d/ctrl+u` turns pages ## Getting started diff --git a/docs/shaped/ai-native-reader-reboot.md b/docs/shaped/ai-native-reader-reboot.md index ba3b1f6..a5f2623 100644 --- a/docs/shaped/ai-native-reader-reboot.md +++ b/docs/shaped/ai-native-reader-reboot.md @@ -162,7 +162,8 @@ The web UI includes: - dark modern reader shell - TOC sidebar - Markdown rendering via `marked` -- vim keys: `j/k`, `h/l`, `g/G`, `t` +- vim keys: `j/k`, `ctrl+d/ctrl+u`, `h/l`, `g/G`, `t` +- block-level cursor over actual passages - page/section progress ### First recrsv-style exploration rail @@ -343,11 +344,18 @@ With stable anchors, Weft can do: Goal: make EPUB navigation feel less prototype-y. +Already started: + +- render block ids as data attributes +- add current block tracking in the web reader +- make `j/k` move through actual passages/blocks rather than browser scroll +- keep `ctrl+d/ctrl+u` for page movement + +Still needed: + - Use EPUB nav/NCX for TOC when available - improve section titles - skip obvious cover-only sections -- render block ids as hidden data attributes -- add current block tracking in the web reader Demo: diff --git a/src/web.ts b/src/web.ts index d1a533e..9975509 100644 --- a/src/web.ts +++ b/src/web.ts @@ -196,8 +196,16 @@ function pagesFor(section: Section): Block[][] { } function renderBlocks(blocks: Block[]): string { - const markdown = blocks.map((block) => block.markdown).join("\n\n"); - return marked.parse(markdown, { async: false }); + return blocks + .map((block) => { + const html = marked.parse(block.markdown, { async: false }); + return `
${block.id}
${html}
`; + }) + .join("\n"); +} + +function escapeAttribute(value: string): string { + return value.replace(/&/g, "&").replace(/"/g, """).replace(/