2026-01-21 23:09:33 +08:00
import * as cheerio from "cheerio" ;
export function parseNetscapeBookmarkHtmlNode ( html ) {
const $ = cheerio . load ( html , { decodeEntities : false } ) ;
const rootDl = $ ( "dl" ) . first ( ) ;
if ( ! rootDl . length ) return { folders : [ ] , bookmarks : [ ] } ;
const folders = [ ] ;
const bookmarks = [ ] ;
function normText ( s ) {
return String ( s || "" ) . replace ( /\s+/g , " " ) . trim ( ) ;
}
function collectLevelDt ( node ) {
const out = [ ] ;
const children = $ ( node ) . contents ( ) . toArray ( ) ;
for ( const child of children ) {
if ( ! child || child . type !== "tag" ) continue ;
const tag = child . tagName ? . toLowerCase ( ) ;
if ( tag === "dt" ) {
out . push ( child ) ;
continue ;
}
if ( tag === "dl" ) {
// nested list belongs to the previous <DT>
continue ;
}
out . push ( ... collectLevelDt ( child ) ) ;
}
return out ;
}
function findNextDlForDt ( dtNode , stopDlNode ) {
let cur = dtNode ;
while ( cur && cur !== stopDlNode ) {
let next = cur . nextSibling ;
while ( next && next . type !== "tag" ) next = next . nextSibling ;
if ( next && next . type === "tag" && next . tagName ? . toLowerCase ( ) === "dl" ) return $ ( next ) ;
cur = cur . parent ;
}
return null ;
}
function walkDl ( $dl , parentTempId ) {
// Netscape format: <DL><p> contains repeating <DT> items and nested <DL>.
// When parsed, <DT> may be wrapped (e.g. inside <p>), so we must be robust.
const dts = collectLevelDt ( $dl [ 0 ] ) ;
for ( const node of dts ) {
const $dt = $ ( node ) ;
const $h3 = $dt . children ( "h3" ) . first ( ) . length ? $dt . children ( "h3" ) . first ( ) : $dt . find ( "h3" ) . first ( ) ;
const $a = $dt . children ( "a" ) . first ( ) . length ? $dt . children ( "a" ) . first ( ) : $dt . find ( "a" ) . first ( ) ;
const $nestedDl = $dt . children ( "dl" ) . first ( ) ;
const $nextDl = $nestedDl . length ? $nestedDl : findNextDlForDt ( node , $dl [ 0 ] ) ;
if ( $h3 . length ) {
const tempId = ` ${ folders . length + 1 } ` ;
const name = normText ( $h3 . text ( ) || "" ) ;
folders . push ( { tempId , parentTempId : parentTempId ? ? null , name } ) ;
if ( $nextDl ? . length ) walkDl ( $nextDl , tempId ) ;
} else if ( $a . length ) {
const title = normText ( $a . text ( ) || "" ) ;
const url = $a . attr ( "href" ) || "" ;
bookmarks . push ( { parentTempId : parentTempId ? ? null , title , url } ) ;
}
}
}
walkDl ( rootDl , null ) ;
return { folders , bookmarks } ;
}
export function buildNetscapeBookmarkHtml ( { folders , bookmarks } ) {
// folders: [{id, parentId, name}]
// bookmarks: [{folderId, title, url}]
const folderChildren = new Map ( ) ;
const bookmarkChildren = new Map ( ) ;
for ( const f of folders ) {
const key = f . parentId ? ? "root" ;
if ( ! folderChildren . has ( key ) ) folderChildren . set ( key , [ ] ) ;
folderChildren . get ( key ) . push ( f ) ;
}
for ( const b of bookmarks ) {
const key = b . folderId ? ? "root" ;
if ( ! bookmarkChildren . has ( key ) ) bookmarkChildren . set ( key , [ ] ) ;
bookmarkChildren . get ( key ) . push ( b ) ;
}
function esc ( s ) {
return String ( s )
. replaceAll ( "&" , "&" )
. replaceAll ( "<" , "<" )
. replaceAll ( ">" , ">" )
. replaceAll ( '"' , """ ) ;
}
function renderFolder ( parentId ) {
const key = parentId ? ? "root" ;
const subFolders = ( folderChildren . get ( key ) || [ ] ) . slice ( ) . sort ( ( a , b ) => a . name . localeCompare ( b . name ) ) ;
const subBookmarks = ( bookmarkChildren . get ( key ) || [ ] ) . slice ( ) . sort ( ( a , b ) => a . title . localeCompare ( b . title ) ) ;
let out = "<DL><p>\n" ;
for ( const f of subFolders ) {
out += ` <DT><H3> ${ esc ( f . name ) } </H3> \n ` ;
out += renderFolder ( f . id )
. split ( "\n" )
. map ( ( line ) => ( line ? ` ${ line } ` : line ) )
. join ( "\n" ) ;
out += "\n" ;
}
for ( const b of subBookmarks ) {
out += ` <DT><A HREF= \" ${ esc ( b . url ) } \" > ${ esc ( b . title ) } </A> \n ` ;
}
out += "</DL><p>" ;
return out ;
}
const header = ` <!DOCTYPE NETSCAPE-Bookmark-file-1> \n <!-- This is an automatically generated file. --> \n <META HTTP-EQUIV= \" Content-Type \" CONTENT= \" text/html; charset=UTF-8 \" > \n <TITLE>Bookmarks</TITLE> \n <H1>Bookmarks</H1> \n ` ;
const body = renderFolder ( null ) ;
return header + body + "\n" ;
}