import * as cheerio from "cheerio"; export function parseNetscapeBookmarkHtmlNode(html) { const $ = cheerio.load(html, { decodeEntities: false }); const rootDl = $("dl").first(); if (!rootDl.length) return { folders: [], bookmarks: [] }; const folders = []; const bookmarks = []; function normText(s) { return String(s || "").replace(/\s+/g, " ").trim(); } function collectLevelDt(node) { const out = []; const children = $(node).contents().toArray(); for (const child of children) { if (!child || child.type !== "tag") continue; const tag = child.tagName?.toLowerCase(); if (tag === "dt") { out.push(child); continue; } if (tag === "dl") { // nested list belongs to the previous
continue; } out.push(...collectLevelDt(child)); } return out; } function findNextDlForDt(dtNode, stopDlNode) { let cur = dtNode; while (cur && cur !== stopDlNode) { let next = cur.nextSibling; while (next && next.type !== "tag") next = next.nextSibling; if (next && next.type === "tag" && next.tagName?.toLowerCase() === "dl") return $(next); cur = cur.parent; } return null; } function walkDl($dl, parentTempId) { // Netscape format:

contains repeating

items and nested
. // When parsed,
may be wrapped (e.g. inside

), so we must be robust. const dts = collectLevelDt($dl[0]); for (const node of dts) { const $dt = $(node); const $h3 = $dt.children("h3").first().length ? $dt.children("h3").first() : $dt.find("h3").first(); const $a = $dt.children("a").first().length ? $dt.children("a").first() : $dt.find("a").first(); const $nestedDl = $dt.children("dl").first(); const $nextDl = $nestedDl.length ? $nestedDl : findNextDlForDt(node, $dl[0]); if ($h3.length) { const tempId = `${folders.length + 1}`; const name = normText($h3.text() || ""); folders.push({ tempId, parentTempId: parentTempId ?? null, name }); if ($nextDl?.length) walkDl($nextDl, tempId); } else if ($a.length) { const title = normText($a.text() || ""); const url = $a.attr("href") || ""; bookmarks.push({ parentTempId: parentTempId ?? null, title, url }); } } } walkDl(rootDl, null); return { folders, bookmarks }; } export function buildNetscapeBookmarkHtml({ folders, bookmarks }) { // folders: [{id, parentId, name}] // bookmarks: [{folderId, title, url}] const folderChildren = new Map(); const bookmarkChildren = new Map(); for (const f of folders) { const key = f.parentId ?? "root"; if (!folderChildren.has(key)) folderChildren.set(key, []); folderChildren.get(key).push(f); } for (const b of bookmarks) { const key = b.folderId ?? "root"; if (!bookmarkChildren.has(key)) bookmarkChildren.set(key, []); bookmarkChildren.get(key).push(b); } function esc(s) { return String(s) .replaceAll("&", "&") .replaceAll("<", "<") .replaceAll(">", ">") .replaceAll('"', """); } function renderFolder(parentId) { const key = parentId ?? "root"; const subFolders = (folderChildren.get(key) || []).slice().sort((a, b) => a.name.localeCompare(b.name)); const subBookmarks = (bookmarkChildren.get(key) || []).slice().sort((a, b) => a.title.localeCompare(b.title)); let out = "

\n"; for (const f of subFolders) { out += `

${esc(f.name)}

\n`; out += renderFolder(f.id) .split("\n") .map((line) => (line ? ` ${line}` : line)) .join("\n"); out += "\n"; } for (const b of subBookmarks) { out += `
${esc(b.title)}\n`; } out += "

"; return out; } const header = `\n\n\nBookmarks\n

Bookmarks

\n`; const body = renderFolder(null); return header + body + "\n"; }