utf-8이 아닌 문자열을 유용하게 읽을 수있는 방법을 찾았습니다.하지만 가장 좋은 방법인지 아니면 권장 된 방법인지 확실하지 않습니다.
function readAsChar8ThenAsChar16(stringPtr, known_len, jschar) {
// when reading as jschar it assumes max length of 500
// stringPtr is either char or jschar, if you know its jschar for sure, pass 2nd arg as true
// if known_len is passed, then assumption is not made, at the known_len position in array we will see a null char
// i tried getting known_len from stringPtr but its not possible, it has be known, i tried this:
//"stringPtr.contents.toString()" "95"
//"stringPtr.toString()" "ctypes.unsigned_char.ptr(ctypes.UInt64("0x7f73d5c87650"))"
// so as we see neither of these is 77, this is for the example of "_scratchpad/EnTeHandle.js at master · Noitidart/_scratchpad - Mozilla Firefox"
// tries to do read string on stringPtr, if it fails then it falls to read as jschar
var readJSCharString = function() {
var assumption_max_len = known_len ? known_len : 500;
var ptrAsArr = ctypes.cast(stringPtr, ctypes.unsigned_char.array(assumption_max_len).ptr).contents; // MUST cast to unsigned char (not ctypes.jschar, or ctypes.char) as otherwise i dont get foreign characters, as they are got as negative values, and i should read till i find a 0 which is null terminator which will have unsigned_char code of 0 // can test this by reading a string like this: "_scratchpad/EnTeHandle.js at master · Noitidart/_scratchpad - Mozilla Firefox" at js array position 36 (so 37 if count from 1), we see 183, and at 77 we see char code of 0 IF casted to unsigned_char, if casted to char we see -73 at pos 36 but pos 77 still 0, if casted to jschar we see chineese characters in all spots expect spaces even null terminator is a chineese character
console.info('ptrAsArr.length:', ptrAsArr.length);
//console.log('debug-msg :: dataCasted:', dataCasted, uneval(dataCasted), dataCasted.toString());
var charCode = [];
var fromCharCode = []
for (var i=0; i<ptrAsArr.length; i++) { //if known_len is correct, then will not hit null terminator so like in example of "_scratchpad/EnTeHandle.js at master · Noitidart/_scratchpad - Mozilla Firefox" if you pass length of 77, then null term will not get hit by this loop as null term is at pos 77 and we go till `< known_len`
var thisUnsignedCharCode = ptrAsArr.addressOfElement(i).contents;
if (thisUnsignedCharCode == 0) {
// reached null terminator, break
console.log('reached null terminator, at pos: ', i);
break;
}
charCode.push(thisUnsignedCharCode);
fromCharCode.push(String.fromCharCode(thisUnsignedCharCode));
}
console.info('charCode:', charCode);
console.info('fromCharCode:', fromCharCode);
var char16_val = fromCharCode.join('');
console.info('char16_val:', char16_val);
return char16_val;
}
if (!jschar) {
try {
var char8_val = stringPtr.readString();
console.info('stringPtr.readString():', char8_val);
return char8_val;
} catch (ex if ex.message.indexOf('malformed UTF-8 character sequence at offset ') == 0) {
console.warn('ex of offset utf8 read error when trying to do readString so using alternative method, ex:', ex);
return readJSCharString();
}
} else {
return readJSCharString();
}
}
당신이 XA_WM_NAME 원자를 사용하십니까 :이 (그렇게하지
char
또는jschar
,이 작업을해야합니다) 다음fromCharCode
을 내가unsigned_char
에 캐스팅해야하지만 작동? – paa'WS_NAME' 아톰을 사용했습니다. 또한'XGetWMName' 편의 함수에서 사용되는데, 나는'XGetWMName'과 함께 readString 문제를 실행합니다. – Noitidart