-
Notifications
You must be signed in to change notification settings - Fork 40
Expand file tree
/
Copy pathmake_fmhy_bookmarks.py
More file actions
399 lines (316 loc) ยท 13.3 KB
/
make_fmhy_bookmarks.py
File metadata and controls
399 lines (316 loc) ยท 13.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
"""Generate FMHY bookmark HTML files from FMHY markdown sections."""
from __future__ import annotations
import asyncio
import base64
import logging
import re
from dataclasses import dataclass
from typing import Dict, List, Tuple
import aiohttp
# Configure logging
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class Config:
"""Configuration constants for the FMHY bookmark generator."""
site_base_url: str = "https://fmhy.net/"
reddit_base_url: str = "https://www.reddit.com/r/FREEMEDIAHECKYEAH/wiki/"
base64_rentry_url: str = "https://rentry.co/FMHYBase64/raw"
github_raw_base: str = (
"https://raw.githubusercontent.com/fmhy/edit/refs/heads/main/docs/"
)
folder_name: str = "FMHY"
decode_base64: bool = True
@dataclass
class BookmarkLine:
"""Represents one original content line at a leaf."""
is_starred: bool # line contains โญ or ๐
description_raw: str # raw trailing text after last ")", may be empty
links: List[Tuple[str, str]] # list of (title, url) exactly as matched
@dataclass
class WikiSection:
"""Represents a wiki section to be processed."""
filename: str
icon: str
url_key: str
CONFIG = Config()
def parse_heading(line: str, sub_url: str) -> Tuple[str, str]:
"""Parse heading line and return (subcategory, subsubcategory)."""
if sub_url != "storage":
if line.startswith("# โบ"):
return line.replace("# โบ", "").strip(), "/"
elif line.startswith("## โท"):
return "", line.replace("## โท", "").strip()
else: # storage section uses different heading levels
if line.startswith("## "):
return line.replace("## ", "").strip(), "/"
elif line.startswith("### "):
return "", line.replace("### ", "").strip()
return "", ""
def clean_category_name(category: str) -> str:
"""Remove URLs from category names."""
return "" if "http" in category else category
def add_hierarchy_prefix(
lines: List[str], section_name: str, sub_url: str
) -> List[str]:
"""Add hierarchy prefix to content lines."""
modified_lines = []
curr_subcat = ""
curr_subsubcat = ""
for line in lines:
if line.startswith("#"): # Heading line
subcat, subsubcat = parse_heading(line, sub_url)
if subcat:
curr_subcat = clean_category_name(subcat)
if subsubcat:
curr_subsubcat = clean_category_name(subsubcat)
elif any(char.isalpha() for char in line): # Content line
prefix = f'{{"{section_name.replace(".md", "")}", "{curr_subcat}", "{curr_subsubcat}"}}'
content = line[2:] if line.startswith("* ") else line
modified_lines.append(prefix + content)
return modified_lines
# Base64 processing functions
def fix_base64_padding(encoded_string: str) -> str:
"""Fix base64 padding."""
missing_padding = len(encoded_string) % 4
if missing_padding:
encoded_string += "=" * (4 - missing_padding)
return encoded_string
def decode_base64_content(input_string: str) -> str:
"""Decode base64 content within backticks."""
if not CONFIG.decode_base64:
return input_string
def base64_decode(match):
encoded_data = match.group(0)[1:-1] # Remove backticks
decoded_bytes = base64.b64decode(fix_base64_padding(encoded_data))
return decoded_bytes.decode()
pattern = r"`[^`]+`"
return re.sub(pattern, base64_decode, input_string)
def process_base64_sections(base64_page: str) -> List[str]:
"""Process base64 page sections."""
sections = base64_page.split("***")
formatted_sections = []
for section in sections:
# Clean up section formatting
clean_section = (
section.strip()
.replace("#### ", "")
.replace("\n\n", " - ")
.replace("\n", ", ")
)
# Remove empty lines
lines = [line for line in clean_section.split("\n") if line.strip()]
clean_section = "\n".join(lines)
# Decode base64 if enabled
clean_section = decode_base64_content(clean_section)
# Add base64 prefix
formatted_section = (
"[๐Base64](https://rentry.co/FMHYBase64) โบ " + clean_section
)
formatted_sections.append(formatted_section)
return formatted_sections
async def download_wiki_content_async(
session: aiohttp.ClientSession, filename: str
) -> Tuple[str, List[str]]:
"""Download and process wiki content asynchronously."""
# First try to load locally
try:
with open(filename, "r", encoding="utf-8") as f:
content = f.read()
logger.info("Loaded %s locally", filename)
if filename != "base64.md":
sub_url = filename.replace(".md", "").lower()
return filename, add_hierarchy_prefix(
content.split("\n"), filename, sub_url
)
else:
return filename, process_base64_sections(content)
except FileNotFoundError:
pass
# Download remotely if not found locally
try:
if filename != "base64.md":
url = CONFIG.github_raw_base + filename
else:
url = CONFIG.base64_rentry_url
async with session.get(url, timeout=30) as resp:
resp.raise_for_status()
content = await resp.text()
if filename == "base64.md":
content = content.replace("\r", "")
logger.info("Downloaded base64 page")
return filename, process_base64_sections(content)
else:
logger.info("Downloaded %s", filename)
sub_url = filename.replace(".md", "").lower()
return filename, add_hierarchy_prefix(
content.split("\n"), filename, sub_url
)
except Exception as e:
logger.error("Failed to fetch %s (%s). Skipping.", filename, e)
return filename, []
async def collect_all_wiki_content_async() -> List[str]:
"""Collect and process all wiki sections concurrently."""
async with aiohttp.ClientSession() as session:
tasks = []
for section in WIKI_SECTIONS:
task = download_wiki_content_async(session, section.filename)
tasks.append(task)
logger.info("Starting concurrent fetching of %d sections...", len(tasks))
results = await asyncio.gather(*tasks, return_exceptions=True)
all_lines = []
for result in results:
if isinstance(result, Exception):
logger.error("Download task failed: %s", result)
continue
filename, lines = result
all_lines.extend(lines)
return all_lines
# Wiki sections to process
WIKI_SECTIONS = [
WikiSection("video.md", "๐บ", "video"),
WikiSection("ai.md", "๐ค", "ai"),
WikiSection("mobile.md", "๐ฑ", "mobile"),
WikiSection("audio.md", "๐ต", "audio"),
WikiSection("downloading.md", "๐พ", "downloading"),
WikiSection("educational.md", "๐ง ", "educational"),
WikiSection("gaming.md", "๐ฎ", "gaming"),
WikiSection("privacy.md", "๐", "privacy"),
WikiSection("system-tools.md", "๐ป", "system-tools"),
WikiSection("file-tools.md", "๐๏ธ", "file-tools"),
WikiSection("internet-tools.md", "๐", "internet-tools"),
WikiSection("social-media-tools.md", "๐ฌ", "social-media-tools"),
WikiSection("text-tools.md", "๐", "text-tools"),
WikiSection("video-tools.md", "๐ผ", "video-tools"),
WikiSection("misc.md", "๐", "misc"),
WikiSection("reading.md", "๐", "reading"),
WikiSection("torrenting.md", "๐", "torrenting"),
WikiSection("image-tools.md", "๐ท", "image-tools"),
WikiSection("gaming-tools.md", "๐พ", "gaming-tools"),
WikiSection("linux-macos.md", "๐ง๐", "linux-macos"),
WikiSection("developer-tools.md", "๐ฅ๏ธ", "developer-tools"),
WikiSection("non-english.md", "๐", "non-english"),
WikiSection("storage.md", "๐๏ธ", "storage"),
WikiSection("base64.md", "๐", "base64"),
WikiSection("unsafe.md", "๐ถ", "unsafe"),
]
async def main_async() -> None:
"""Main execution function (async version)."""
logger.info("Collecting wiki content...")
all_content = await collect_all_wiki_content_async()
full_content = "\n".join(all_content)
# Generate both bookmark files
create_html_bookmarks(full_content, "fmhy_in_bookmarks.html")
create_html_bookmarks(
full_content, "fmhy_in_bookmarks_starred_only.html", starred_only=True
)
logger.info("Bookmark generation complete!")
def parse_bookmark_line(line: str) -> Tuple[str, str, str, BookmarkLine | None]:
"""Parse a line to extract hierarchy and bookmark data."""
url_pattern = re.compile(r"\[([^\]]+)\]\((https?://[^\)]+)\)")
hierarchy_pattern = re.compile(r'^\{"([^"]+)", "([^"]+)", "([^"]+)"\}')
hierarchy_match = hierarchy_pattern.match(line)
if not hierarchy_match:
return "", "", "", None
level1, level2, level3 = hierarchy_match.groups()
matches = url_pattern.findall(line)
# Remove non-primary Discord invites, X, Telegram and .onion links
filters = {"Discord", "X", "Telegram", ".onion"}
for matched_link in matches.copy():
if matched_link[0] in filters:
matches.remove(matched_link)
# Check if line contains starred content
is_starred = "โญ" in line or "๐" in line
# Extract raw description (text after last URL)
last_paren = line.rfind(")")
description_raw = (
line[last_paren + 1 :].replace("**", "").strip() if last_paren != -1 else ""
)
bookmark_line = BookmarkLine(
is_starred=is_starred, description_raw=description_raw, links=matches
)
return level1, level2, level3, bookmark_line
def generate_bookmark_html(
bookmarks_dict: Dict[str, Dict[str, Dict[str, List[BookmarkLine]]]],
indent: int = 1,
starred_only: bool = False,
path: Tuple[str, ...] = (),
) -> str:
"""Generate HTML from bookmark dictionary."""
html = ""
for key, value in bookmarks_dict.items():
html += " " * indent + f"<DT><H3>{key}</H3>\n"
html += " " * indent + "<DL><p>\n"
current_path = path + (key,)
if isinstance(value, dict):
html += generate_bookmark_html(
value, indent + 1, starred_only, current_path
)
else:
# At leaf level - render BookmarkLine items
# current_path should be (level1, level2, level3)
level1, level2, level3 = (
current_path if len(current_path) >= 3 else ("", "", "")
)
for bookmark_line in value:
# Skip if starred_only mode and line is not starred
if starred_only and not bookmark_line.is_starred:
continue
# Compute effective description
if bookmark_line.description_raw:
effective_description = bookmark_line.description_raw
else:
# Fallback description using current hierarchy path
effective_description = "- " + (
level3 if level3 != "/" else level2 if level2 else level1
)
# Determine which links to render
links_to_render = bookmark_line.links
if starred_only:
links_to_render = links_to_render[
:1
] # Only first link for starred content
# Render each link
for title, url in links_to_render:
anchor_text = f"{title} {effective_description}".strip()
html += (
" " * (indent + 1)
+ f'<DT><A HREF="{url}" ADD_DATE="0">{anchor_text}</A>\n'
)
html += " " * indent + "</DL><p>\n"
return html
def create_html_bookmarks(
content: str, output_file: str, starred_only: bool = False
) -> None:
"""Create HTML bookmark file from processed content."""
bookmarks: Dict[str, Dict[str, Dict[str, List[BookmarkLine]]]] = {}
for line in content.split("\n"):
level1, level2, level3, bookmark_line = parse_bookmark_line(line)
if (
not level1 or bookmark_line is None
): # Skip lines that don't match hierarchy pattern
continue
# Initialize nested structure
bookmarks.setdefault(level1, {}).setdefault(level2, {}).setdefault(level3, [])
bookmarks[level1][level2][level3].append(bookmark_line)
# Generate HTML
html_content = (
"<!DOCTYPE NETSCAPE-Bookmark-file-1>\n"
'<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">\n'
"<TITLE>Bookmarks</TITLE>\n"
"<H1>Bookmarks</H1>\n"
"<DL><p>\n"
f" <DT><H3>{CONFIG.folder_name}</H3>\n"
" <DL><p>\n"
+ generate_bookmark_html(bookmarks, indent=2, starred_only=starred_only)
+ " </DL><p>\n"
"</DL><p>\n"
)
with open(output_file, "w", encoding="utf-8") as f:
f.write(html_content)
logger.info("Created bookmark file: %s", output_file)
def main() -> None:
"""Main execution function."""
asyncio.run(main_async())
if __name__ == "__main__":
main()