[v1.3] 增强文本解码 by cyfung1031 · Pull Request #1166 · scriptscat/scriptcat

export const readBlobContent = async (blob: Blob | File | Response, contentType: string | null): Promise<string> => { const buffer = await blob.arrayBuffer(); const uint8 = new Uint8Array(buffer);
if (uint8.length === 0) { return ""; }
// 优先尝试使用 Content-Type header 中的 charset const headerCharset = parseCharsetFromContentType(contentType); if (headerCharset) { try { // 验证 charset 是否有效 return bytesDecode(headerCharset, uint8); } catch (e: any) { console.warn(`Invalid charset from Content-Type header: ${headerCharset}, error: ${e.message}`); } }
// BOM detection (highest priority) const bomEncoding = detectBOM(uint8); if (bomEncoding) return bytesDecode(bomEncoding, uint8);
const checkSize = Math.min(uint8.length, 16 * 1024);
if (uint8.length % 2 === 0) { // Heuristic detection (first 16 KB) const heuristicEncoding = guessByNullPattern(uint8, checkSize); if (heuristicEncoding) return bytesDecode(heuristicEncoding, uint8); }
// UTF-8 validation → legacy fallback let encoding = "utf-8"; try { // Strict mode – throws on invalid sequences new TextDecoder("utf-8", { fatal: true }).decode(uint8.subarray(0, checkSize)); } catch { // Invalid UTF-8 → most common real-world fallback encoding = "windows-1252"; // OR detectEncoding(uint8, null) }
return bytesDecode(encoding, uint8); };