From 7b0839f136a2e75466891507477846e2cb91c7a8 Mon Sep 17 00:00:00 2001 From: Da Shen Date: Tue, 19 May 2026 17:52:33 +0800 Subject: [PATCH 01/11] =?UTF-8?q?[0147]=20=E6=9B=B4=E6=96=B0=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E6=89=93=E5=BC=80=E5=A4=A7=E4=BD=93=E7=A7=AF=20TMU=20?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=80=A7=E8=83=BD=E7=9A=84=E4=BB=BB=E5=8A=A1?= =?UTF-8?q?=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.7 --- devel/0147.md | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 devel/0147.md diff --git a/devel/0147.md b/devel/0147.md new file mode 100644 index 0000000000..65b33b2a19 --- /dev/null +++ b/devel/0147.md @@ -0,0 +1,43 @@ +# [0147] 优化打开大体积 TMU 文件的性能 + +## 1 相关文档 +- [dddd.md](dddd.md) - 任务文档模板 + +## 2 任务相关的代码文件 +- `src/Data/Convert/Mogan/from_tmu.cpp` - TMU 文件解析核心代码 +- `tests/Data/Convert/convert_test.cpp` - 单元测试 + +## 3 如何测试 + +### 3.1 确定性测试(单元测试) +```bash +xmake b convert_test +xmake r convert_test +``` + +### 3.2 非确定性测试(性能验证) +```bash +xmake b stem +# 测试加载 chapter-4.tmu 的时间 +xmake r stem /home/da/DevTeam/chapter-4.tmu +``` + +## 4 如何提交 + +提交前执行以下最少步骤: +```bash +xmake b convert_test +xmake r convert_test +xmake b stem +``` + +## 5 What +优化打开大体积 TMU 文件的性能。`chapter-4.tmu` 文件大小为 14MB,其中某些行包含数百万字节的十六进制编码数据(图片等)。当前解析器在处理这些数据时存在明显的性能瓶颈。 + +## 6 Why +`chapter-4.tmu` 打开速度缓慢,影响用户体验。通过性能分析发现,解析器在处理 RAW_DATA(`<#...>`)时,每两个字符就创建一个临时字符串传递给 `from_hex`,造成大量不必要的内存分配。 + +## 7 How +1. 优化 `from_tmu.cpp` 中 RAW_DATA 的解析逻辑,避免临时字符串创建。 +2. 优化 `read_char` 和 `read_next` 中的逐字符处理开销。 +3. 通过单元测试和 `cout` 日志验证性能提升。 \ No newline at end of file From bad4c4448d013501678f0c73f3eaeaa3dd0d5f41 Mon Sep 17 00:00:00 2001 From: Da Shen Date: Tue, 19 May 2026 17:52:47 +0800 Subject: [PATCH 02/11] =?UTF-8?q?[0147]=20=E4=BC=98=E5=8C=96=20from=5Ftmu.?= =?UTF-8?q?cpp=20RAW=5FDATA=20=E8=A7=A3=E6=9E=90=E6=80=A7=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 避免在解析 <#...> 十六进制数据时,每两个字符创建一次临时字符串。 通过内联函数直接处理字符,1M hex bytes 解析时间从 41ms 降至 29ms。 Co-Authored-By: Claude Opus 4.7 --- src/Data/Convert/Mogan/from_tmu.cpp | 11 +++++++++- tests/Data/Convert/convert_test.cpp | 31 +++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/Data/Convert/Mogan/from_tmu.cpp b/src/Data/Convert/Mogan/from_tmu.cpp index 530a85bf34..996ffcf2b3 100644 --- a/src/Data/Convert/Mogan/from_tmu.cpp +++ b/src/Data/Convert/Mogan/from_tmu.cpp @@ -27,6 +27,14 @@ using moebius::drd::STD_CODE; using namespace moebius; +static inline int +from_hex_char (char c) { + if (is_digit (c)) return (int) (c - '0'); + if ((c >= 'A') && (c <= 'F')) return (int) (c + 10 - 'A'); + if ((c >= 'a') && (c <= 'f')) return (int) (c + 10 - 'a'); + return 0; +} + /****************************************************************************** * Conversion of TeXmacs strings of the present format to TeXmacs trees ******************************************************************************/ @@ -276,7 +284,8 @@ tmu_reader::read (bool skip_flag) { else if (tail_char_of_last == '#') { string r; while ((buf[pos] != '>') && (pos + 2 < buf_N)) { - r << ((char) from_hex (buf (pos, pos + 2))); + r << ((char) ((from_hex_char (buf[pos]) << 4) + + from_hex_char (buf[pos + 1]))); pos+= 2; } if (buf[pos] == '>') pos++; diff --git a/tests/Data/Convert/convert_test.cpp b/tests/Data/Convert/convert_test.cpp index 56b9e7ef3e..64875c29af 100644 --- a/tests/Data/Convert/convert_test.cpp +++ b/tests/Data/Convert/convert_test.cpp @@ -12,6 +12,7 @@ #include "base.hpp" #include "convert.hpp" +#include "tm_ostream.hpp" #include "tree_helper.hpp" using namespace moebius; @@ -25,6 +26,8 @@ class TestConverter : public QObject { private slots: void test_search_metadata_data (); void test_search_metadata (); + void test_tmu_raw_data (); + void test_tmu_raw_data_performance (); }; void @@ -72,5 +75,33 @@ TestConverter::test_search_metadata () { qcompare (search_metadata (input_tree, "invalid"), invalid); } +void +TestConverter::test_tmu_raw_data () { + string s= "<#41424344>"; + tree t= tmu_to_tree (s); + QVERIFY (is_func (t, RAW_DATA)); + QCOMPARE (N (t), 1); + qcompare (as_string (t[0]), string ("ABCD")); +} + +void +TestConverter::test_tmu_raw_data_performance () { + string hex_data; + for (int i= 0; i < 1000000; i++) { + hex_data << string ("41"); + } + string s= ">\n<#"; + s << hex_data; + s << ">"; + + QElapsedTimer timer; + timer.start (); + tree t= tmu_document_to_tree (s); + qint64 elapsed= timer.elapsed (); + + cout << "Performance: parsed 1M hex bytes in " << (int) elapsed << " ms\n"; + QVERIFY (!is_compound (t, "error")); +} + QTEST_MAIN (TestConverter) #include "convert_test.moc" From c7c000e2c337c1d57c45f9174b733a371a58e79c Mon Sep 17 00:00:00 2001 From: Da Shen Date: Tue, 19 May 2026 18:03:48 +0800 Subject: [PATCH 03/11] =?UTF-8?q?[0147]=20=E4=BC=98=E5=8C=96=20read=5Fnext?= =?UTF-8?q?=20=E4=B8=AD=E6=99=AE=E9=80=9A=E5=AD=97=E7=AC=A6=E7=9A=84?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=E6=80=A7=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 通过快速路径直接批量读取连续的 ASCII 普通字符,避免逐字符调用 read_char 创建临时字符串。1M 文本字符解析时间从约 100ms 降至约 40ms。 Co-Authored-By: Claude Opus 4.7 --- src/Data/Convert/Mogan/from_tmu.cpp | 18 ++++++++++++++++++ tests/Data/Convert/convert_test.cpp | 21 +++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/src/Data/Convert/Mogan/from_tmu.cpp b/src/Data/Convert/Mogan/from_tmu.cpp index 996ffcf2b3..5ab11ab865 100644 --- a/src/Data/Convert/Mogan/from_tmu.cpp +++ b/src/Data/Convert/Mogan/from_tmu.cpp @@ -149,6 +149,24 @@ tmu_reader::read_next () { string r; pos= old_pos; while (true) { + // fast path: avoid creating temporary strings for ordinary characters + while (pos < buf_N) { + char ch= buf[pos]; + if (ch == '\t' || ch == '\r' || ch == '\n' || ch == ' ' || + ch == '<' || ch == '|' || ch == '>' || ch == '\\') + break; + if ((buf[pos] & 0x80) == 0) { + r << buf[pos++]; + } + else { + int start_pos= pos; + decode_from_utf8 (buf, pos); + for (int i= start_pos; i < pos; i++) + r << buf[i]; + } + } + if (pos >= buf_N) return r; + old_pos= pos; c = read_char (); if (c == "") return r; diff --git a/tests/Data/Convert/convert_test.cpp b/tests/Data/Convert/convert_test.cpp index 64875c29af..786374c666 100644 --- a/tests/Data/Convert/convert_test.cpp +++ b/tests/Data/Convert/convert_test.cpp @@ -28,6 +28,7 @@ private slots: void test_search_metadata (); void test_tmu_raw_data (); void test_tmu_raw_data_performance (); + void test_tmu_text_performance (); }; void @@ -103,5 +104,25 @@ TestConverter::test_tmu_raw_data_performance () { QVERIFY (!is_compound (t, "error")); } +void +TestConverter::test_tmu_text_performance () { + string text; + for (int i= 0; i < 1000000; i++) { + text << 'a'; + } + string s= ">\n"; + + QElapsedTimer timer; + timer.start (); + tree t= tmu_document_to_tree (s); + qint64 elapsed= timer.elapsed (); + + cout << "Performance: parsed 1M text chars in " << (int) elapsed + << " ms\n"; + QVERIFY (!is_compound (t, "error")); +} + QTEST_MAIN (TestConverter) #include "convert_test.moc" From 8f513aca8c57cb4dbb3fcae2f149ce637e75aec4 Mon Sep 17 00:00:00 2001 From: Da Shen Date: Tue, 19 May 2026 18:06:22 +0800 Subject: [PATCH 04/11] =?UTF-8?q?[0147]=20=E4=BC=98=E5=8C=96=20decode=20?= =?UTF-8?q?=E5=87=BD=E6=95=B0=E9=81=BF=E5=85=8D=E6=97=A0=E8=BD=AC=E4=B9=89?= =?UTF-8?q?=E5=AD=97=E7=AC=A6=E6=97=B6=E7=9A=84=E5=A4=8D=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 对于不含反斜杠转义的字符串,直接返回原字符串,避免逐字节复制。 1M 文本字符解析时间从约 40ms 进一步降至约 20ms。 Co-Authored-By: Claude Opus 4.7 --- src/Data/Convert/Mogan/from_tmu.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Data/Convert/Mogan/from_tmu.cpp b/src/Data/Convert/Mogan/from_tmu.cpp index 5ab11ab865..0cee823fb9 100644 --- a/src/Data/Convert/Mogan/from_tmu.cpp +++ b/src/Data/Convert/Mogan/from_tmu.cpp @@ -80,9 +80,13 @@ tmu_reader::skip_blank () { string tmu_reader::decode (string s) { - int i, n= N (s); - string r; + int i, n= N (s); for (i= 0; i < n; i++) + if (((i + 1) < n) && (s[i] == '\\')) break; + if (i == n) return s; + + string r (s (0, i)); + for (; i < n; i++) if (((i + 1) < n) && (s[i] == '\\')) { i++; if (s[i] == ';') From 2bb9fdb70ffa50ed7df7049ef7c60727ea9b4541 Mon Sep 17 00:00:00 2001 From: Da Shen Date: Tue, 19 May 2026 18:09:55 +0800 Subject: [PATCH 05/11] =?UTF-8?q?[0147]=20=E6=B7=BB=E5=8A=A0=20TMU=20?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=E6=80=A7=E8=83=BD=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增单元测试覆盖 RAW_DATA、长文本和真实文件(chapter-4.tmu)的 解析性能,用于验证优化效果。 Co-Authored-By: Claude Opus 4.7 --- perf_test.sh | 25 +++++++++++++++++++++++++ tests/Data/Convert/convert_test.cpp | 20 ++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100755 perf_test.sh diff --git a/perf_test.sh b/perf_test.sh new file mode 100755 index 0000000000..2facc1254c --- /dev/null +++ b/perf_test.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +FILE="/home/da/DevTeam/chapter-4.tmu" + +echo "Building stem..." +xmake b stem + +echo "Starting stem with $FILE..." +START=$(date +%s%N) + +# Start stem in background +xmake r stem "$FILE" & +PID=$! + +# Wait for process to start +sleep 5 + +# Kill the process +kill $PID 2>/dev/null || true +wait $PID 2>/dev/null || true + +END=$(date +%s%N) +ELAPSED=$(( (END - START) / 1000000 )) +echo "Total time: ${ELAPSED}ms" diff --git a/tests/Data/Convert/convert_test.cpp b/tests/Data/Convert/convert_test.cpp index 786374c666..229fbdb961 100644 --- a/tests/Data/Convert/convert_test.cpp +++ b/tests/Data/Convert/convert_test.cpp @@ -12,6 +12,7 @@ #include "base.hpp" #include "convert.hpp" +#include "file.hpp" #include "tm_ostream.hpp" #include "tree_helper.hpp" @@ -29,6 +30,7 @@ private slots: void test_tmu_raw_data (); void test_tmu_raw_data_performance (); void test_tmu_text_performance (); + void test_tmu_real_file_performance (); }; void @@ -124,5 +126,23 @@ TestConverter::test_tmu_text_performance () { QVERIFY (!is_compound (t, "error")); } +void +TestConverter::test_tmu_real_file_performance () { + url u = url_system ("/home/da/DevTeam/chapter-4.tmu"); + string doc_s; + if (load_string (u, doc_s, false)) { + QSKIP ("chapter-4.tmu not found"); + } + + QElapsedTimer timer; + timer.start (); + tree t= tmu_document_to_tree (doc_s); + qint64 elapsed= timer.elapsed (); + + cout << "Performance: parsed chapter-4.tmu (" << N (doc_s) + << " bytes) in " << (int) elapsed << " ms\n"; + QVERIFY (!is_compound (t, "error")); +} + QTEST_MAIN (TestConverter) #include "convert_test.moc" From 5c9c026aedfd63dc7756f2473e53cad1db14c648 Mon Sep 17 00:00:00 2001 From: Da Shen Date: Tue, 19 May 2026 18:12:00 +0800 Subject: [PATCH 06/11] =?UTF-8?q?[0147]=20=E6=9B=B4=E6=96=B0=E4=BB=BB?= =?UTF-8?q?=E5=8A=A1=E6=96=87=E6=A1=A3=E8=AE=B0=E5=BD=95=E6=80=A7=E8=83=BD?= =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=BB=93=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.7 --- devel/0147.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/devel/0147.md b/devel/0147.md index 65b33b2a19..0c7afd9265 100644 --- a/devel/0147.md +++ b/devel/0147.md @@ -39,5 +39,13 @@ xmake b stem ## 7 How 1. 优化 `from_tmu.cpp` 中 RAW_DATA 的解析逻辑,避免临时字符串创建。 -2. 优化 `read_char` 和 `read_next` 中的逐字符处理开销。 -3. 通过单元测试和 `cout` 日志验证性能提升。 \ No newline at end of file + - 增加内联函数 `from_hex_char`,直接处理字符而非创建 2 字节临时字符串。 + - 1M hex bytes 解析时间从 44ms 降至 29ms。 +2. 优化 `read_next` 中普通字符的解析,避免逐字符调用 `read_char` 创建临时字符串。 + - 增加快速路径直接批量读取 ASCII 字符,仅对非 ASCII 字符调用 `decode_from_utf8`。 + - 1M 文本字符解析时间从 98ms 降至 23ms。 +3. 优化 `decode` 函数,避免无转义字符时的逐字节复制。 + - 先扫描字符串判断是否存在反斜杠,若无则直接返回原字符串。 + - 1M 文本字符解析时间从约 40ms 进一步降至约 20ms。 +4. 通过单元测试和 `cout` 日志验证性能提升。 + - 真实文件 `chapter-4.tmu`(14.4MB)解析时间从 399ms 降至 284ms(约 29% 提升)。 \ No newline at end of file From e05c6acb686785fdf52136f5d03172e7813e36c6 Mon Sep 17 00:00:00 2001 From: Da Shen Date: Wed, 20 May 2026 16:03:06 +0800 Subject: [PATCH 07/11] wip --- perf_test.sh | 25 ------------------------- src/Data/Convert/Mogan/from_tmu.cpp | 11 +++++++++-- 2 files changed, 9 insertions(+), 27 deletions(-) delete mode 100755 perf_test.sh diff --git a/perf_test.sh b/perf_test.sh deleted file mode 100755 index 2facc1254c..0000000000 --- a/perf_test.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -set -e - -FILE="/home/da/DevTeam/chapter-4.tmu" - -echo "Building stem..." -xmake b stem - -echo "Starting stem with $FILE..." -START=$(date +%s%N) - -# Start stem in background -xmake r stem "$FILE" & -PID=$! - -# Wait for process to start -sleep 5 - -# Kill the process -kill $PID 2>/dev/null || true -wait $PID 2>/dev/null || true - -END=$(date +%s%N) -ELAPSED=$(( (END - START) / 1000000 )) -echo "Total time: ${ELAPSED}ms" diff --git a/src/Data/Convert/Mogan/from_tmu.cpp b/src/Data/Convert/Mogan/from_tmu.cpp index 0cee823fb9..cad5b0e9cc 100644 --- a/src/Data/Convert/Mogan/from_tmu.cpp +++ b/src/Data/Convert/Mogan/from_tmu.cpp @@ -19,6 +19,7 @@ #include #include #include +#include "tm_debug.hpp" using lolly::data::decode_from_utf8; using lolly::data::from_hex; @@ -365,14 +366,20 @@ tmu_reader::read (bool skip_flag) { tree tmu_to_tree (string s) { + bench_start ("tmu_to_tree"); tmu_reader tmr (s); - return tmr.read (true); + tree t= tmr.read (true); + bench_end ("tmu_to_tree"); + return t; } tree tmu_to_tree (string s, string version) { + bench_start ("tmu_to_tree"); tmu_reader tmr (s, version); - return tmr.read (true); + tree t= tmr.read (true); + bench_end ("tmu_to_tree"); + return t; } /****************************************************************************** From 2ddafa1bb7fb7981d3638c33a1b6cacdaaf29f14 Mon Sep 17 00:00:00 2001 From: Da Shen Date: Wed, 20 May 2026 16:13:08 +0800 Subject: [PATCH 08/11] wip --- devel/0147.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devel/0147.md b/devel/0147.md index 0c7afd9265..39a483c8d8 100644 --- a/devel/0147.md +++ b/devel/0147.md @@ -47,5 +47,5 @@ xmake b stem 3. 优化 `decode` 函数,避免无转义字符时的逐字节复制。 - 先扫描字符串判断是否存在反斜杠,若无则直接返回原字符串。 - 1M 文本字符解析时间从约 40ms 进一步降至约 20ms。 -4. 通过单元测试和 `cout` 日志验证性能提升。 - - 真实文件 `chapter-4.tmu`(14.4MB)解析时间从 399ms 降至 284ms(约 29% 提升)。 \ No newline at end of file +4. 通过 `bench_start`/`bench_end` 测量 `tmu_to_tree` 整体解析性能。 + - main 基线:310ms → 优化后:238ms(约 23% 提升)。 \ No newline at end of file From 52d4bc4f3e1eaf7691e279864bf56e014b9d7a38 Mon Sep 17 00:00:00 2001 From: Da Shen Date: Wed, 20 May 2026 16:13:48 +0800 Subject: [PATCH 09/11] =?UTF-8?q?[0147]=20=E6=9B=B4=E6=96=B0=E6=80=A7?= =?UTF-8?q?=E8=83=BD=E6=95=B0=E6=8D=AE=E5=B9=B6=E6=A0=BC=E5=BC=8F=E5=8C=96?= =?UTF-8?q?=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更新 0147.md 中的性能数据为 bench_start/bench_end 测量结果, 应用 bin/format 格式化。 Co-Authored-By: Claude Opus 4.7 --- src/Data/Convert/Mogan/from_tmu.cpp | 10 +++++----- tests/Data/Convert/convert_test.cpp | 15 +++++++-------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/Data/Convert/Mogan/from_tmu.cpp b/src/Data/Convert/Mogan/from_tmu.cpp index cad5b0e9cc..c4ad6c1773 100644 --- a/src/Data/Convert/Mogan/from_tmu.cpp +++ b/src/Data/Convert/Mogan/from_tmu.cpp @@ -15,11 +15,11 @@ #include "preferences.hpp" #include "tree_helper.hpp" +#include "tm_debug.hpp" #include #include #include #include -#include "tm_debug.hpp" using lolly::data::decode_from_utf8; using lolly::data::from_hex; @@ -157,8 +157,8 @@ tmu_reader::read_next () { // fast path: avoid creating temporary strings for ordinary characters while (pos < buf_N) { char ch= buf[pos]; - if (ch == '\t' || ch == '\r' || ch == '\n' || ch == ' ' || - ch == '<' || ch == '|' || ch == '>' || ch == '\\') + if (ch == '\t' || ch == '\r' || ch == '\n' || ch == ' ' || ch == '<' || + ch == '|' || ch == '>' || ch == '\\') break; if ((buf[pos] & 0x80) == 0) { r << buf[pos++]; @@ -368,7 +368,7 @@ tree tmu_to_tree (string s) { bench_start ("tmu_to_tree"); tmu_reader tmr (s); - tree t= tmr.read (true); + tree t= tmr.read (true); bench_end ("tmu_to_tree"); return t; } @@ -377,7 +377,7 @@ tree tmu_to_tree (string s, string version) { bench_start ("tmu_to_tree"); tmu_reader tmr (s, version); - tree t= tmr.read (true); + tree t= tmr.read (true); bench_end ("tmu_to_tree"); return t; } diff --git a/tests/Data/Convert/convert_test.cpp b/tests/Data/Convert/convert_test.cpp index 229fbdb961..6a37073638 100644 --- a/tests/Data/Convert/convert_test.cpp +++ b/tests/Data/Convert/convert_test.cpp @@ -99,7 +99,7 @@ TestConverter::test_tmu_raw_data_performance () { QElapsedTimer timer; timer.start (); - tree t= tmu_document_to_tree (s); + tree t = tmu_document_to_tree (s); qint64 elapsed= timer.elapsed (); cout << "Performance: parsed 1M hex bytes in " << (int) elapsed << " ms\n"; @@ -118,17 +118,16 @@ TestConverter::test_tmu_text_performance () { QElapsedTimer timer; timer.start (); - tree t= tmu_document_to_tree (s); + tree t = tmu_document_to_tree (s); qint64 elapsed= timer.elapsed (); - cout << "Performance: parsed 1M text chars in " << (int) elapsed - << " ms\n"; + cout << "Performance: parsed 1M text chars in " << (int) elapsed << " ms\n"; QVERIFY (!is_compound (t, "error")); } void TestConverter::test_tmu_real_file_performance () { - url u = url_system ("/home/da/DevTeam/chapter-4.tmu"); + url u= url_system ("/home/da/DevTeam/chapter-4.tmu"); string doc_s; if (load_string (u, doc_s, false)) { QSKIP ("chapter-4.tmu not found"); @@ -136,11 +135,11 @@ TestConverter::test_tmu_real_file_performance () { QElapsedTimer timer; timer.start (); - tree t= tmu_document_to_tree (doc_s); + tree t = tmu_document_to_tree (doc_s); qint64 elapsed= timer.elapsed (); - cout << "Performance: parsed chapter-4.tmu (" << N (doc_s) - << " bytes) in " << (int) elapsed << " ms\n"; + cout << "Performance: parsed chapter-4.tmu (" << N (doc_s) << " bytes) in " + << (int) elapsed << " ms\n"; QVERIFY (!is_compound (t, "error")); } From 49e0530991119b13bc798a77ef6742cd8218e0d3 Mon Sep 17 00:00:00 2001 From: Da Shen Date: Wed, 20 May 2026 16:23:06 +0800 Subject: [PATCH 10/11] =?UTF-8?q?[0147]=20=E7=A7=BB=E9=99=A4=E7=A1=AC?= =?UTF-8?q?=E7=BC=96=E7=A0=81=E8=B7=AF=E5=BE=84=E7=9A=84=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E7=94=A8=E4=BE=8B=20test=5Ftmu=5Freal=5Ffile=5Fperformance?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.7 --- tests/Data/Convert/convert_test.cpp | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tests/Data/Convert/convert_test.cpp b/tests/Data/Convert/convert_test.cpp index 6a37073638..223275a5e1 100644 --- a/tests/Data/Convert/convert_test.cpp +++ b/tests/Data/Convert/convert_test.cpp @@ -30,7 +30,6 @@ private slots: void test_tmu_raw_data (); void test_tmu_raw_data_performance (); void test_tmu_text_performance (); - void test_tmu_real_file_performance (); }; void @@ -125,23 +124,5 @@ TestConverter::test_tmu_text_performance () { QVERIFY (!is_compound (t, "error")); } -void -TestConverter::test_tmu_real_file_performance () { - url u= url_system ("/home/da/DevTeam/chapter-4.tmu"); - string doc_s; - if (load_string (u, doc_s, false)) { - QSKIP ("chapter-4.tmu not found"); - } - - QElapsedTimer timer; - timer.start (); - tree t = tmu_document_to_tree (doc_s); - qint64 elapsed= timer.elapsed (); - - cout << "Performance: parsed chapter-4.tmu (" << N (doc_s) << " bytes) in " - << (int) elapsed << " ms\n"; - QVERIFY (!is_compound (t, "error")); -} - QTEST_MAIN (TestConverter) #include "convert_test.moc" From e33a258857f5f0dd3d150672133f531527b29f02 Mon Sep 17 00:00:00 2001 From: Da Shen Date: Wed, 20 May 2026 16:39:03 +0800 Subject: [PATCH 11/11] =?UTF-8?q?[0147]=20=E4=BB=A3=E7=A0=81=E6=B8=85?= =?UTF-8?q?=E7=90=86=E5=B9=B6=E6=B7=BB=E5=8A=A0=20TMU=20=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=E5=8D=95=E5=85=83=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 移除 from_hex_char 中多余的 C-style cast - 移除 read_next 中冗余的 if-else 分支和注释掉的旧代码 - 统一 fast path 中 ch 变量的使用 - UTF-8 分支改为批量追加替代逐字符追加 - 测试中 hex_data 构建改用 < --- src/Data/Convert/Mogan/from_tmu.cpp | 25 +++------- tests/Data/Convert/convert_test.cpp | 75 ++++++++++++++++++++++++++++- 2 files changed, 81 insertions(+), 19 deletions(-) diff --git a/src/Data/Convert/Mogan/from_tmu.cpp b/src/Data/Convert/Mogan/from_tmu.cpp index c4ad6c1773..e40acea3d1 100644 --- a/src/Data/Convert/Mogan/from_tmu.cpp +++ b/src/Data/Convert/Mogan/from_tmu.cpp @@ -30,9 +30,9 @@ using namespace moebius; static inline int from_hex_char (char c) { - if (is_digit (c)) return (int) (c - '0'); - if ((c >= 'A') && (c <= 'F')) return (int) (c + 10 - 'A'); - if ((c >= 'a') && (c <= 'f')) return (int) (c + 10 - 'a'); + if (is_digit (c)) return c - '0'; + if ((c >= 'A') && (c <= 'F')) return c + 10 - 'A'; + if ((c >= 'a') && (c <= 'f')) return c + 10 - 'a'; return 0; } @@ -133,18 +133,8 @@ tmu_reader::read_next () { if (c == "") return ""; if (c == "#") return "<#"; if ((c == "\\") || (c == "|") || (c == "/")) return "<" * c; - if (is_iso_alpha (c[0]) || (c == ">")) { - pos= old_pos; - return "<"; - } pos= old_pos; return "<"; - /* - string d= read_char (); - if ((d == "\\") || (d == "|") || (d == "/")) return "<" * c * d; - pos= old_pos; - return "<" * c; - */ } case '|': case '>': @@ -154,20 +144,19 @@ tmu_reader::read_next () { string r; pos= old_pos; while (true) { - // fast path: avoid creating temporary strings for ordinary characters while (pos < buf_N) { char ch= buf[pos]; if (ch == '\t' || ch == '\r' || ch == '\n' || ch == ' ' || ch == '<' || ch == '|' || ch == '>' || ch == '\\') break; - if ((buf[pos] & 0x80) == 0) { - r << buf[pos++]; + if ((ch & 0x80) == 0) { + r << ch; + pos++; } else { int start_pos= pos; decode_from_utf8 (buf, pos); - for (int i= start_pos; i < pos; i++) - r << buf[i]; + r << buf (start_pos, pos); } } if (pos >= buf_N) return r; diff --git a/tests/Data/Convert/convert_test.cpp b/tests/Data/Convert/convert_test.cpp index 223275a5e1..ad83e087de 100644 --- a/tests/Data/Convert/convert_test.cpp +++ b/tests/Data/Convert/convert_test.cpp @@ -28,6 +28,11 @@ private slots: void test_search_metadata_data (); void test_search_metadata (); void test_tmu_raw_data (); + void test_tmu_plain_text (); + void test_tmu_escape_sequences (); + void test_tmu_document (); + void test_tmu_compound (); + void test_tmu_nested_compound (); void test_tmu_raw_data_performance (); void test_tmu_text_performance (); }; @@ -86,11 +91,79 @@ TestConverter::test_tmu_raw_data () { qcompare (as_string (t[0]), string ("ABCD")); } +void +TestConverter::test_tmu_plain_text () { + tree t= tmu_to_tree ("hello"); + QVERIFY (is_func (t, DOCUMENT)); + QCOMPARE (N (t), 1); + QVERIFY (t[0] == tree ("hello")); + + tree t2= tmu_to_tree ("hello world"); + QVERIFY (is_func (t2, DOCUMENT)); + QCOMPARE (N (t2), 1); + QVERIFY (t2[0] == tree ("hello world")); +} + +void +TestConverter::test_tmu_escape_sequences () { + tree t1= tmu_to_tree ("a\\;b"); + QVERIFY (is_func (t1, DOCUMENT)); + QVERIFY (t1[0] == tree ("ab")); + + tree t2= tmu_to_tree ("a\\\\b"); + QVERIFY (is_func (t2, DOCUMENT)); + QVERIFY (t2[0] == tree ("a\\b")); + + tree t3= tmu_to_tree ("a\\|b"); + QVERIFY (is_func (t3, DOCUMENT)); + QVERIFY (t3[0] == tree ("a|b")); + + tree t4= tmu_to_tree ("a\\>b"); + QVERIFY (is_func (t4, DOCUMENT)); + QVERIFY (t4[0] == tree ("a>b")); + + QVERIFY (tmu_to_tree ("a\\"); + QVERIFY (is_func (t, DOCUMENT)); + QCOMPARE (N (t), 1); + QVERIFY (is_compound (t[0])); + QVERIFY (t[0][0] == tree ("text")); +} + +void +TestConverter::test_tmu_nested_compound () { + tree t= tmu_to_tree (""); + QVERIFY (is_func (t, DOCUMENT)); + QCOMPARE (N (t), 1); + QVERIFY (is_compound (t[0])); + QCOMPARE (N (t[0]), 3); + QVERIFY (t[0][0] == tree ("color")); + QVERIFY (t[0][1] == tree ("red")); + QVERIFY (t[0][2] == tree ("hello")); +} + void TestConverter::test_tmu_raw_data_performance () { string hex_data; for (int i= 0; i < 1000000; i++) { - hex_data << string ("41"); + hex_data << '4' << '1'; } string s= ">\n<#"; s << hex_data;