93 lines
3.1 KiB
JavaScript
93 lines
3.1 KiB
JavaScript
// Lightweight GBK/GB18030 decoder helper
|
|
// Strategy:
|
|
// 1. Prefer native TextDecoder('gb18030'/'gbk') if available
|
|
// 2. Fallback to a reverse table built from utils/ecUnicodeToGBK.js (if present)
|
|
// 3. Final fallback: byte-by-byte ASCII/replace decoding
|
|
|
|
function toUint8Array(input) {
|
|
if (!input) return new Uint8Array(0)
|
|
if (input instanceof Uint8Array) return input
|
|
if (input.buffer && input.buffer instanceof ArrayBuffer) return new Uint8Array(input.buffer)
|
|
return new Uint8Array(input)
|
|
}
|
|
|
|
let reverseMap = null
|
|
function buildReverseFromTable(table) {
|
|
try {
|
|
const rev = Object.create(null)
|
|
for (let i = 0; i < table.length; i++) {
|
|
const v = table[i]
|
|
if (v) rev[String(v).toUpperCase()] = Number(i)
|
|
}
|
|
return rev
|
|
} catch (e) { return null }
|
|
}
|
|
|
|
function decodeWithTable(u8, table) {
|
|
if (!table) return null
|
|
if (!reverseMap) reverseMap = buildReverseFromTable(table)
|
|
if (!reverseMap) return null
|
|
let out = ''
|
|
for (let i = 0; i < u8.length; i++) {
|
|
const b = u8[i]
|
|
if (b <= 0x7F) { out += String.fromCharCode(b); continue }
|
|
const b2 = u8[i + 1]
|
|
if (typeof b2 !== 'number') { out += '\uFFFD'; continue }
|
|
const hex = b.toString(16).padStart(2, '0').toUpperCase() + b2.toString(16).padStart(2, '0').toUpperCase()
|
|
const cp = reverseMap[hex]
|
|
if (cp != null) {
|
|
try { out += String.fromCodePoint(cp) } catch (e) { out += '\uFFFD' }
|
|
} else {
|
|
out += '\uFFFD'
|
|
}
|
|
i += 1
|
|
}
|
|
return out
|
|
}
|
|
|
|
function decode(u8in) {
|
|
const u8 = toUint8Array(u8in)
|
|
// 1) Native TextDecoder if environment supports gb18030/gbk
|
|
try {
|
|
if (typeof TextDecoder === 'function') {
|
|
try { return new TextDecoder('gb18030').decode(u8) } catch (e) { /* ignore */ }
|
|
try { return new TextDecoder('gbk').decode(u8) } catch (e) { /* ignore */ }
|
|
}
|
|
} catch (e) { /* ignore */ }
|
|
|
|
// 2) Try a local Encoding lib if available (bundles may expose Encoding.convert)
|
|
try {
|
|
const enc = (typeof Encoding !== 'undefined') ? Encoding : (typeof window !== 'undefined' ? window.Encoding : null)
|
|
if (enc && typeof enc.convert === 'function') {
|
|
try { return enc.convert(u8, { to: 'UNICODE', from: 'GB18030', type: 'string' }) } catch (e) { /* ignore */ }
|
|
try { return enc.convert(u8, { to: 'UTF8', from: 'GB18030', type: 'string' }) } catch (e) { /* ignore */ }
|
|
}
|
|
} catch (e) { /* ignore */ }
|
|
|
|
// 3) Try building reverse map from project mapping utils (utils/ecUnicodeToGBK.js)
|
|
try {
|
|
const tableMod = require('../utils/ecUnicodeToGBK.js')
|
|
const table = tableMod && (tableMod.table || tableMod.t || tableMod)
|
|
if (table) {
|
|
const s = decodeWithTable(u8, table)
|
|
if (s && s.length) return s
|
|
}
|
|
} catch (e) { /* ignore */ }
|
|
|
|
// 4) Final fallback: best-effort ASCII/replace pass
|
|
try {
|
|
let out = ''
|
|
for (let i = 0; i < u8.length; i++) {
|
|
const b = u8[i]
|
|
if (b <= 0x7F) out += String.fromCharCode(b)
|
|
else {
|
|
const b2 = u8[i + 1]
|
|
if (typeof b2 === 'number') { out += '\uFFFD'; i += 1 } else { out += '\uFFFD' }
|
|
}
|
|
}
|
|
return out
|
|
} catch (e) { return '' }
|
|
}
|
|
|
|
module.exports = { decode }
|