微客导航 » 文章资讯 » js实现unicode码字符串与utf8字节数据互转详解

js实现unicode码字符串与utf8字节数据互转详解

2023-08-25 16:42:04 405

js的string变量存储字符串使用的是unicode编码，要保存时必须选择其他编码后进行传输，比如转成utf-8,utf-32等。存储到数据库中为utf-8编码，读取出来如何转换成正确的字符串就成了问题。现在给出解决方案，可以正确支持中文、emoji表情、英文混合的字符串编码互转。

/**
*Createdbyhdwangon2019/1/28.
*/
varconvertUtf8=(function(){

/**
*unicodestringtoutf-8
*@paramtext字符串
*@returns{*}utf-8编码
*/
functiontoBytes(text){
varresult=[],i=0;
text=encodeURI(text);
while(i>>7)===0){
unicodeStr+=String.fromCharCode(utf8Bytes[pos]);
pos+=1;

}elseif((flag&0xFC)===0xFC){
unicode=(utf8Bytes[pos]&0x3)<<30;
unicode|=(utf8Bytes[pos+1]&0x3F)<<24;
unicode|=(utf8Bytes[pos+2]&0x3F)<<18;
unicode|=(utf8Bytes[pos+3]&0x3F)<<12;
unicode|=(utf8Bytes[pos+4]&0x3F)<<6;
unicode|=(utf8Bytes[pos+5]&0x3F);
unicodeStr+=String.fromCodePoint(unicode);
pos+=6;

}elseif((flag&0xF8)===0xF8){
unicode=(utf8Bytes[pos]&0x7)<<24;
unicode|=(utf8Bytes[pos+1]&0x3F)<<18;
unicode|=(utf8Bytes[pos+2]&0x3F)<<12;
unicode|=(utf8Bytes[pos+3]&0x3F)<<6;
unicode|=(utf8Bytes[pos+4]&0x3F);
unicodeStr+=String.fromCodePoint(unicode);
pos+=5;

}elseif((flag&0xF0)===0xF0){
unicode=(utf8Bytes[pos]&0xF)<<18;
unicode|=(utf8Bytes[pos+1]&0x3F)<<12;
unicode|=(utf8Bytes[pos+2]&0x3F)<<6;
unicode|=(utf8Bytes[pos+3]&0x3F);
unicodeStr+=String.fromCodePoint(unicode);
pos+=4;

}elseif((flag&0xE0)===0xE0){
unicode=(utf8Bytes[pos]&0x1F)<<12;;
unicode|=(utf8Bytes[pos+1]&0x3F)<<6;
unicode|=(utf8Bytes[pos+2]&0x3F);
unicodeStr+=String.fromCharCode(unicode);
pos+=3;

}elseif((flag&0xC0)===0xC0){//110
unicode=(utf8Bytes[pos]&0x3F)<<6;
unicode|=(utf8Bytes[pos+1]&0x3F);
unicodeStr+=String.fromCharCode(unicode);
pos+=2;

}else{
unicodeStr+=String.fromCharCode(utf8Bytes[pos]);
pos+=1;
}
}
returnunicodeStr;
}



functioncheckInt(value){
return(parseInt(value)===value);
}

functioncheckInts(arrayish){
if(!checkInt(arrayish.length)){returnfalse;}

for(vari=0;i255){
returnfalse;
}
}

returntrue;
}

functioncoerceArray(arg,copy){

//ArrayBufferview
if(arg.buffer&&arg.name==='Uint8Array'){

if(copy){
if(arg.slice){
arg=arg.slice();
}else{
arg=Array.prototype.slice.call(arg);
}
}

returnarg;
}

//It'sanarray;checkitisavalidrepresentationofabyte
if(Array.isArray(arg)){
if(!checkInts(arg)){
thrownewError('Arraycontainsinvalidvalue:'+arg);
}

returnnewUint8Array(arg);
}

//Somethingelse,butbehaveslikeanarray(maybeaBuffer?Arguments?)
if(checkInt(arg.length)&&checkInts(arg)){
returnnewUint8Array(arg);
}

thrownewError('unsupportedarray-likeobject');
}

return{
toBytes:toBytes,
fromBytes:utf8ByteToUnicodeStr
}
})()

针对emoji的字节字符，占两个unicode字符。使用String.fromCharCode也可以实现，需要进行两次fromCharCode，没有fromPointCode方便。下面展示了utf-8的4字节转换为unicode(utf-16)的过程。

//高char10位[一个unicode字符](2+6+2=10)
unicode=((utf8Bytes[pos]&0x3))<<8|((utf8Bytes[pos+1]&0x3f)<<2)|((utf8Bytes[pos+2]>>4)&0x03);

//减去‭1F600‬中的1，这里减去6个0即可，低位char已经占据10位
unicode=unicode-parseInt('1000000',2)

//加上utf-16高char的标识符
unicode=0xD800+unicode;
console.log(unicode);
unicodeStr+=String.fromCharCode(unicode);

//低char10位[一个unicode字符](4+6)
unicode=((utf8Bytes[pos+2]&0x0F)<<6)|(utf8Bytes[pos+3]&0x3F);
//加上utf-16低char的标识符
unicode=0xDC00+unicode;
console.log(unicode);
unicodeStr+=String.fromCharCode(unicode);
pos+=4;

以上所述是小编给大家介绍的js实现unicode码字符串与utf8字节数据互转详解整合，希望对大家有所帮助，如果大家有任何疑问请给我留言，小编会及时回复大家的。在此也非常感谢大家对毛票票网站的支持！

返回顶部
3162201930
czq8825@qq.com

js实现unicode码字符串与utf8字节数据互转详解

热门推荐

随机推荐