126 lines
4.2 KiB
JavaScript
126 lines
4.2 KiB
JavaScript
import * as cheerio from "cheerio";
|
|
|
|
export function parseNetscapeBookmarkHtmlNode(html) {
|
|
const $ = cheerio.load(html, { decodeEntities: false });
|
|
const rootDl = $("dl").first();
|
|
if (!rootDl.length) return { folders: [], bookmarks: [] };
|
|
|
|
const folders = [];
|
|
const bookmarks = [];
|
|
|
|
function normText(s) {
|
|
return String(s || "").replace(/\s+/g, " ").trim();
|
|
}
|
|
|
|
function collectLevelDt(node) {
|
|
const out = [];
|
|
const children = $(node).contents().toArray();
|
|
for (const child of children) {
|
|
if (!child || child.type !== "tag") continue;
|
|
const tag = child.tagName?.toLowerCase();
|
|
if (tag === "dt") {
|
|
out.push(child);
|
|
continue;
|
|
}
|
|
if (tag === "dl") {
|
|
// nested list belongs to the previous <DT>
|
|
continue;
|
|
}
|
|
out.push(...collectLevelDt(child));
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function findNextDlForDt(dtNode, stopDlNode) {
|
|
let cur = dtNode;
|
|
while (cur && cur !== stopDlNode) {
|
|
let next = cur.nextSibling;
|
|
while (next && next.type !== "tag") next = next.nextSibling;
|
|
if (next && next.type === "tag" && next.tagName?.toLowerCase() === "dl") return $(next);
|
|
cur = cur.parent;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function walkDl($dl, parentTempId) {
|
|
// Netscape format: <DL><p> contains repeating <DT> items and nested <DL>.
|
|
// When parsed, <DT> may be wrapped (e.g. inside <p>), so we must be robust.
|
|
const dts = collectLevelDt($dl[0]);
|
|
for (const node of dts) {
|
|
const $dt = $(node);
|
|
const $h3 = $dt.children("h3").first().length ? $dt.children("h3").first() : $dt.find("h3").first();
|
|
const $a = $dt.children("a").first().length ? $dt.children("a").first() : $dt.find("a").first();
|
|
const $nestedDl = $dt.children("dl").first();
|
|
const $nextDl = $nestedDl.length ? $nestedDl : findNextDlForDt(node, $dl[0]);
|
|
|
|
if ($h3.length) {
|
|
const tempId = `${folders.length + 1}`;
|
|
const name = normText($h3.text() || "");
|
|
folders.push({ tempId, parentTempId: parentTempId ?? null, name });
|
|
if ($nextDl?.length) walkDl($nextDl, tempId);
|
|
} else if ($a.length) {
|
|
const title = normText($a.text() || "");
|
|
const url = $a.attr("href") || "";
|
|
bookmarks.push({ parentTempId: parentTempId ?? null, title, url });
|
|
}
|
|
}
|
|
}
|
|
|
|
walkDl(rootDl, null);
|
|
return { folders, bookmarks };
|
|
}
|
|
|
|
export function buildNetscapeBookmarkHtml({ folders, bookmarks }) {
|
|
// folders: [{id, parentId, name}]
|
|
// bookmarks: [{folderId, title, url}]
|
|
const folderChildren = new Map();
|
|
const bookmarkChildren = new Map();
|
|
|
|
for (const f of folders) {
|
|
const key = f.parentId ?? "root";
|
|
if (!folderChildren.has(key)) folderChildren.set(key, []);
|
|
folderChildren.get(key).push(f);
|
|
}
|
|
for (const b of bookmarks) {
|
|
const key = b.folderId ?? "root";
|
|
if (!bookmarkChildren.has(key)) bookmarkChildren.set(key, []);
|
|
bookmarkChildren.get(key).push(b);
|
|
}
|
|
|
|
function esc(s) {
|
|
return String(s)
|
|
.replaceAll("&", "&")
|
|
.replaceAll("<", "<")
|
|
.replaceAll(">", ">")
|
|
.replaceAll('"', """);
|
|
}
|
|
|
|
function renderFolder(parentId) {
|
|
const key = parentId ?? "root";
|
|
const subFolders = (folderChildren.get(key) || []).slice().sort((a, b) => a.name.localeCompare(b.name));
|
|
const subBookmarks = (bookmarkChildren.get(key) || []).slice().sort((a, b) => a.title.localeCompare(b.title));
|
|
|
|
let out = "<DL><p>\n";
|
|
|
|
for (const f of subFolders) {
|
|
out += ` <DT><H3>${esc(f.name)}</H3>\n`;
|
|
out += renderFolder(f.id)
|
|
.split("\n")
|
|
.map((line) => (line ? ` ${line}` : line))
|
|
.join("\n");
|
|
out += "\n";
|
|
}
|
|
|
|
for (const b of subBookmarks) {
|
|
out += ` <DT><A HREF=\"${esc(b.url)}\">${esc(b.title)}</A>\n`;
|
|
}
|
|
|
|
out += "</DL><p>";
|
|
return out;
|
|
}
|
|
|
|
const header = `<!DOCTYPE NETSCAPE-Bookmark-file-1>\n<!-- This is an automatically generated file. -->\n<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=UTF-8\">\n<TITLE>Bookmarks</TITLE>\n<H1>Bookmarks</H1>\n`;
|
|
const body = renderFolder(null);
|
|
return header + body + "\n";
|
|
}
|