/***************************************************************************** (c) Cambridge Silicon Radio Limited 2010 All rights reserved and confidential information of CSR Refer to LICENSE.txt included with this source for details on the license terms. *****************************************************************************/ #include #include "csr_pmem.h" #include "csr_unicode.h" #include "csr_macro.h" #define UNI_SUR_HIGH_START ((u32) 0xD800) #define UNI_SUR_HIGH_END ((u32) 0xDBFF) #define UNI_SUR_LOW_START ((u32) 0xDC00) #define UNI_SUR_LOW_END ((u32) 0xDFFF) #define UNI_REPLACEMENT_CHAR ((u32) 0xFFFD) #define UNI_HALF_SHIFT ((u8) 10) /* used for shifting by 10 bits */ #define UNI_HALF_BASE ((u32) 0x00010000) #define UNI_BYTEMASK ((u32) 0xBF) #define UNI_BYTEMARK ((u32) 0x80) #define CAPITAL(x) ((x >= 'a') && (x <= 'z') ? ((x) & 0x00DF) : (x)) /* * Index into the table with the first byte to get the number of trailing bytes in a utf-8 character. * -1 if the byte has an invalid value. * * Legal sequences are: * * byte 1st 2nd 3rd 4th * * 00-7F * C2-DF 80-BF * E0 A0-BF 80-BF * E1-EC 80-BF 80-BF * ED 80-9F 80-BF * EE-EF 80-BF 80-BF * F0 90-BF 80-BF 80-BF * F1-F3 80-BF 80-BF 80-BF * F4 80-8F 80-BF 80-BF */ static const s8 trailingBytesForUtf8[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00 - 0x1F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20 - 0x3F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x40 - 0x5F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x60 - 0x7F */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0x80 - 0x9F */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0xA0 - 0xBF */ -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xC0 - 0xDF */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0xE0 - 0xFF */ }; /* Values to be substracted from a u32 when converting from UTF8 to UTF16 */ static const u32 offsetsFromUtf8[4] = { 0x00000000, 0x00003080, 0x000E2080, 0x03C82080 }; /******************************************************************************** * * Name: CsrUint32ToUtf16String * * Description: The function converts an 32 bit number to an UTF-16 string * that is allocated and 0-terminated. * * Input: 32 bit number. * * Output: A string of UTF-16 characters. * *********************************************************************************/ u16 *CsrUint32ToUtf16String(u32 number) { u16 count, noOfDigits; u16 *output; u32 tempNumber; /* calculate the number of digits in the output */ tempNumber = number; noOfDigits = 1; while (tempNumber >= 10) { tempNumber = tempNumber / 10; noOfDigits++; } output = (u16 *) CsrPmemAlloc(sizeof(u16) * (noOfDigits + 1)); /*add space for 0-termination*/ tempNumber = number; for (count = noOfDigits; count > 0; count--) { output[count - 1] = (u16) ((tempNumber % 10) + '0'); tempNumber = tempNumber / 10; } output[noOfDigits] = '\0'; return output; } /******************************************************************************** * * Name: CsrUtf16StringToUint32 * * Description: The function converts an UTF-16 string that is * 0-terminated into a 32 bit number. * * Input: A string of UTF-16 characters containig a number. * * Output: 32 bit number. * *********************************************************************************/ u32 CsrUtf16StringToUint32(const u16 *unicodeString) { u16 numLen, count; u32 newNumber = 0; numLen = (u16) CsrUtf16StrLen(unicodeString); if ((numLen > 10) || (numLen == 0) || (unicodeString == NULL)) /*CSRMAX number is 4.294.967.295 */ { return 0; } for (count = 0; count < numLen; count++) { u16 input = unicodeString[count]; if ((input < 0x30) || (input > 0x39) || ((newNumber == 0x19999999) && (input > 0x35)) || (newNumber > 0x19999999)) /* chars are present or number is too large now causing number to get to large when *10 */ { return 0; } newNumber = (newNumber * 10) + (input - 0x30); } return newNumber; } /******************************************************************************** * * Name: CsrUtf16MemCpy * * Description: The function copies count number of 16 bit data elements * from srv to dest. * * Input: A pointer to an unicoded string. * * Output: A pointer to an unicoded string. * *********************************************************************************/ u16 *CsrUtf16MemCpy(u16 *dest, const u16 *src, u32 count) { return memcpy((u8 *) dest, (u8 *) src, count * sizeof(u16)); } /******************************************************************************** * * Name: CsrUtf16ConcatenateTexts * * Description: The function merge the contents of 4 unicoded input pointers * into a new string. * * Input: 4 unicoded input strings (UTF-16). * * Output: A new unicoded string (UTF-16) containing the combined strings. * *********************************************************************************/ u16 *CsrUtf16ConcatenateTexts(const u16 *inputText1, const u16 *inputText2, const u16 *inputText3, const u16 *inputText4) { u16 *outputText; u32 textLen, textLen1, textLen2, textLen3, textLen4; textLen1 = CsrUtf16StrLen(inputText1); textLen2 = CsrUtf16StrLen(inputText2); textLen3 = CsrUtf16StrLen(inputText3); textLen4 = CsrUtf16StrLen(inputText4); textLen = textLen1 + textLen2 + textLen3 + textLen4; if (textLen == 0) /*stop here is all lengths are 0*/ { return NULL; } outputText = (u16 *) CsrPmemAlloc((textLen + 1) * sizeof(u16)); /* add space for 0-termination*/ if (inputText1 != NULL) { CsrUtf16MemCpy(outputText, inputText1, textLen1); } if (inputText2 != NULL) { CsrUtf16MemCpy(&(outputText[textLen1]), inputText2, textLen2); } if (inputText3 != NULL) { CsrUtf16MemCpy(&(outputText[textLen1 + textLen2]), inputText3, textLen3); } if (inputText4 != NULL) { CsrUtf16MemCpy(&(outputText[textLen1 + textLen2 + textLen3]), inputText4, textLen4); } outputText[textLen] = '\0'; return outputText; } /******************************************************************************** * * Name: CsrUtf16StrLen * * Description: The function returns the number of 16 bit elements present * in the 0-terminated string. * * Input: 0-terminated string of 16 bit unicoded characters. * * Output: The number of 16 bit elements in the string. * *********************************************************************************/ u32 CsrUtf16StrLen(const u16 *unicodeString) { u32 length; length = 0; if (unicodeString != NULL) { while (*unicodeString) { length++; unicodeString++; } } return length; } /******************************************************************************** * * Name: CsrUtf16String2Utf8 * * Description: The function decodes an UTF-16 string into an UTF8 byte * oriented string. * * Input: 0-terminated UTF-16 string characters. * * Output: 0-terminated string of byte oriented UTF8 coded characters. * *********************************************************************************/ u8 *CsrUtf16String2Utf8(const u16 *source) { u8 *dest, *destStart = NULL; u32 i; u32 ch; u32 length; u32 sourceLength; u8 bytes; u8 appendNull = FALSE; u8 firstByteMark[5] = {0x00, 0x00, 0xC0, 0xE0, 0xF0}; if (!source) { return NULL; } length = 0; sourceLength = CsrUtf16StrLen(source) + 1; for (i = 0; i < sourceLength; i++) { ch = source[i]; if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */ { if (i + 1 < sourceLength) /* The low surrogate is in the source */ { u32 ch2 = source[++i]; if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate */ { length += 4; } else /* It is not a low surrogate, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ { length += 3; i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */ } } else /* The low surrogate does not exist, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */ { length += 4; } } else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ { length += 3; } else /* Figure out how many bytes that are required */ { if (ch < 0x0080) { length++; } else if (ch < 0x0800) { length += 2; } else { length += 3; } } } dest = CsrPmemAlloc(length); destStart = dest; for (i = 0; i < sourceLength; i++) { ch = source[i]; if ((ch >= UNI_SUR_HIGH_START) && (ch <= UNI_SUR_HIGH_END)) /* This is a high surrogate */ { if (i + 1 < sourceLength) /* The low surrogate is in the source */ { u32 ch2 = source[++i]; if ((ch2 >= UNI_SUR_LOW_START) && (ch2 <= UNI_SUR_LOW_END)) /* And it is a legal low surrogate, convert to UTF-32 */ { ch = ((ch - UNI_SUR_HIGH_START) << UNI_HALF_SHIFT) + (ch2 - UNI_SUR_LOW_START) + UNI_HALF_BASE; } else /* It is not a low surrogate, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ { ch = UNI_REPLACEMENT_CHAR; i--; /* Substract 1 again as the conversion must continue after the ill-formed code unit */ } } else /* The low surrogate does not exist, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */ { ch = UNI_REPLACEMENT_CHAR; appendNull = TRUE; } } else if ((ch >= UNI_SUR_LOW_START) && (ch <= UNI_SUR_LOW_END)) /* The value of UTF-16 is not allowed to be in this range, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ { ch = UNI_REPLACEMENT_CHAR; } /* Figure out how many bytes that are required */ if (ch < (u32) 0x80) { bytes = 1; } else if (ch < (u32) 0x800) { bytes = 2; } else if (ch < (u32) 0x10000) { bytes = 3; } else if (ch < (u32) 0x110000) { bytes = 4; } else { bytes = 3; ch = UNI_REPLACEMENT_CHAR; } dest += bytes; switch (bytes) /* Convert character to UTF-8. Note: everything falls through. */ { case 4: { *--dest = (u8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK); ch >>= 6; } /* FALLTHROUGH */ case 3: { *--dest = (u8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK); ch >>= 6; } /* FALLTHROUGH */ case 2: { *--dest = (u8) ((ch | UNI_BYTEMARK) & UNI_BYTEMASK); ch >>= 6; } /* FALLTHROUGH */ case 1: { *--dest = (u8) (ch | firstByteMark[bytes]); } /* FALLTHROUGH */ default: { break; } } dest += bytes; } if (appendNull) /* Append the \0 character */ { *dest = '\0'; } return destStart; } /***************************************************************************** NAME isLegalUtf8 DESCRIPTION Returns TRUE if the given UFT-8 code unit is legal as defined by the Unicode standard (see Chapter 3: Conformance, Section 3.9: Unicode Encoding Forms, UTF-8). This function assumes that the length parameter is unconditionally correct and that the first byte is already validated by looking it up in the trailingBytesForUtf8 array, which also reveals the number of trailing bytes. Legal code units are composed of one of the following byte sequences: 1st 2nd 3rd 4th -------------------------------- 00-7F C2-DF 80-BF E0 A0-BF 80-BF E1-EC 80-BF 80-BF ED 80-9F 80-BF EE-EF 80-BF 80-BF F0 90-BF 80-BF 80-BF F1-F3 80-BF 80-BF 80-BF F4 80-8F 80-BF 80-BF Please note that this function only checks whether the 2nd, 3rd and 4th bytes fall into the valid ranges. PARAMETERS codeUnit - pointer to the first byte of the byte sequence composing the code unit to test. length - the number of bytes in the code unit. Valid range is 1 to 4. RETURNS TRUE if the given code unit is legal. *****************************************************************************/ static u8 isLegalUtf8(const u8 *codeUnit, u32 length) { const u8 *srcPtr = codeUnit + length; u8 byte; switch (length) /* Everything falls through except case 1 */ { case 4: { byte = *--srcPtr; if ((byte < 0x80) || (byte > 0xBF)) { return FALSE; } } /* Fallthrough */ case 3: { byte = *--srcPtr; if ((byte < 0x80) || (byte > 0xBF)) { return FALSE; } } /* Fallthrough */ case 2: { byte = *--srcPtr; if (byte > 0xBF) { return FALSE; } switch (*codeUnit) /* No fallthrough */ { case 0xE0: { if (byte < 0xA0) { return FALSE; } break; } case 0xED: { if ((byte < 0x80) || (byte > 0x9F)) { return FALSE; } break; } case 0xF0: { if (byte < 0x90) { return FALSE; } break; } case 0xF4: { if ((byte < 0x80) || (byte > 0x8F)) { return FALSE; } break; } default: { if (byte < 0x80) { return FALSE; } break; } } } /* Fallthrough */ case 1: default: /* The 1st byte and length are assumed correct */ break; } return TRUE; } /******************************************************************************** * * Name: CsrUtf82Utf16String * * Description: The function decodes an UTF8 byte oriented string into a * UTF-16string. * * Input: 0-terminated string of byte oriented UTF8 coded characters. * * Output: 0-terminated string of UTF-16 characters. * *********************************************************************************/ u16 *CsrUtf82Utf16String(const u8 *utf8String) { size_t i, length = 0; size_t sourceLength; u16 *dest = NULL; u16 *destStart = NULL; s8 extraBytes2Read; if (!utf8String) { return NULL; } sourceLength = strlen((char *)utf8String); for (i = 0; i < sourceLength; i++) { extraBytes2Read = trailingBytesForUtf8[utf8String[i]]; if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ { length += 1; } else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */ { length += 2; break; } else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ { length += 1; } else { if (utf8String[i] > 0xEF) /* Needs a high and a low surrogate */ { length += 2; } else { length += 1; } i += extraBytes2Read; } } /* Create space for the null terminated character */ dest = (u16 *) CsrPmemAlloc((1 + length) * sizeof(u16)); destStart = dest; for (i = 0; i < sourceLength; i++) { extraBytes2Read = trailingBytesForUtf8[utf8String[i]]; if (extraBytes2Read == -1) /* Illegal byte value, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ { *dest++ = UNI_REPLACEMENT_CHAR; } else if (i + extraBytes2Read > sourceLength) /* The extra bytes does not exist, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD), and the null terminated character */ { *dest++ = UNI_REPLACEMENT_CHAR; *dest++ = '\0'; break; } else if (isLegalUtf8(&utf8String[i], extraBytes2Read + 1) == FALSE) /* It is not a legal utf-8 character, instead put a Unicode 'REPLACEMENT CHARACTER' (U+FFFD) */ { *dest++ = UNI_REPLACEMENT_CHAR; } else /* It is legal, convert the character to an u32 */ { u32 ch = 0; switch (extraBytes2Read) /* Everything falls through */ { case 3: { ch += utf8String[i]; ch <<= 6; i++; } /* FALLTHROUGH */ case 2: { ch += utf8String[i]; ch <<= 6; i++; } /* FALLTHROUGH */ case 1: { ch += utf8String[i]; ch <<= 6; i++; } /* FALLTHROUGH */ case 0: { ch += utf8String[i]; } /* FALLTHROUGH */ default: { break; } } ch -= offsetsFromUtf8[extraBytes2Read]; if (ch <= 0xFFFF) /* Character can be encoded in one u16 */ { *dest++ = (u16) ch; } else /* The character needs two u16 */ { ch -= UNI_HALF_BASE; *dest++ = (u16) ((ch >> UNI_HALF_SHIFT) | UNI_SUR_HIGH_START); *dest++ = (u16) ((ch & 0x03FF) | UNI_SUR_LOW_START); } } } destStart[length] = 0x00; return destStart; } /******************************************************************************** * * Name: CsrUtf16StrCpy * * Description: The function copies the contents from one UTF-16 string * to another UTF-16 string. * * Input: 0-terminated UTF-16 string. * * Output: 0-terminated UTF-16 string. * *********************************************************************************/ u16 *CsrUtf16StrCpy(u16 *target, const u16 *source) { if (source) /* if source is not NULL*/ { memcpy(target, source, (CsrUtf16StrLen(source) + 1) * sizeof(u16)); return target; } else { return NULL; } } /******************************************************************************** * * Name: CsrUtf16StringDuplicate * * Description: The function allocates a new pointer and copies the input to * the new pointer. * * Input: 0-terminated UTF-16 string. * * Output: Allocated variable0-terminated UTF-16 string. * *********************************************************************************/ u16 *CsrUtf16StringDuplicate(const u16 *source) { u16 *target = NULL; u32 length; if (source) /* if source is not NULL*/ { length = (CsrUtf16StrLen(source) + 1) * sizeof(u16); target = (u16 *) CsrPmemAlloc(length); memcpy(target, source, length); } return target; } /******************************************************************************** * * Name: CsrUtf16StrICmp * * Description: The function compares two UTF-16 strings. * * Input: Two 0-terminated UTF-16 string. * * Output: 0: if the strings are identical. * *********************************************************************************/ u16 CsrUtf16StrICmp(const u16 *string1, const u16 *string2) { while (*string1 || *string2) { if (CAPITAL(*string1) != CAPITAL(*string2)) { return *string1 - *string2; } string1++; string2++; } return 0; } /******************************************************************************** * * Name: CsrUtf16StrNICmp * * Description: The function compares upto count number of elements in the * two UTF-16 string. * * Input: Two 0-terminated UTF-16 string and a maximum * number of elements to check. * * Output: 0: if the strings are identical. * *********************************************************************************/ u16 CsrUtf16StrNICmp(const u16 *string1, const u16 *string2, u32 count) { while ((*string1 || *string2) && count--) { if (CAPITAL(*string1) != CAPITAL(*string2)) { return *string1 - *string2; } string1++; string2++; } return 0; } /******************************************************************************** * * Name: CsrUtf16String2XML * * Description: The function converts an unicoded string (UTF-16) into an unicoded XML * string where some special characters are encoded according to * the XML spec. * * Input: A unicoded string (UTF-16) which is freed. * * Output: A new unicoded string (UTF-16) containing the converted output. * *********************************************************************************/ u16 *CsrUtf16String2XML(u16 *str) { u16 *scanString; u16 *outputString = NULL; u16 *resultString = str; u32 stringLength = 0; u8 encodeChars = FALSE; scanString = str; if (scanString) { while (*scanString) { if (*scanString == L'&') { stringLength += 5; encodeChars = TRUE; } else if ((*scanString == L'<') || (*scanString == L'>')) { stringLength += 4; encodeChars = TRUE; } else { stringLength++; } scanString++; } stringLength++; if (encodeChars) { resultString = outputString = CsrPmemAlloc(stringLength * sizeof(u16)); scanString = str; while (*scanString) { if (*scanString == L'&') { *outputString++ = '&'; *outputString++ = 'a'; *outputString++ = 'm'; *outputString++ = 'p'; *outputString++ = ';'; } else if (*scanString == L'<') { *outputString++ = '&'; *outputString++ = 'l'; *outputString++ = 't'; *outputString++ = ';'; } else if (*scanString == L'>') { *outputString++ = '&'; *outputString++ = 'g'; *outputString++ = 't'; *outputString++ = ';'; } else { *outputString++ = *scanString; } scanString++; } *outputString++ = 0; CsrPmemFree(str); } } return resultString; } /******************************************************************************** * * Name: CsrXML2Utf16String * * Description: The function converts an unicoded XML string into an unicoded * string (UTF-16) where some special XML characters are decoded according to * the XML spec. * * Input: A unicoded XML string which is freed. * * Output: A new unicoded pointer containing the decoded output. * *********************************************************************************/ u16 *CsrXML2Utf16String(u16 *str) { u16 *scanString; u16 *outputString = NULL; u16 *resultString = str; u32 stringLength = 0; u8 encodeChars = FALSE; scanString = str; if (scanString) { while (*scanString) { if (*scanString == (u16) L'&') { scanString++; if (!CsrUtf16StrNICmp(scanString, (u16 *) L"AMP;", 4)) { scanString += 3; encodeChars = TRUE; } else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"LT;", 3)) { scanString += 2; encodeChars = TRUE; } else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"GT;", 3)) { scanString += 2; encodeChars = TRUE; } if (!CsrUtf16StrNICmp(scanString, (u16 *) L"APOS;", 5)) { scanString += 4; encodeChars = TRUE; } if (!CsrUtf16StrNICmp(scanString, (u16 *) L"QUOT;", 5)) { scanString += 4; encodeChars = TRUE; } else { scanString--; } } stringLength++; scanString++; } stringLength++; if (encodeChars) { resultString = outputString = CsrPmemAlloc(stringLength * sizeof(u16)); scanString = str; while (*scanString) { if (*scanString == L'&') { scanString++; if (!CsrUtf16StrNICmp(scanString, (u16 *) L"AMP;", 4)) { *outputString++ = L'&'; scanString += 3; } else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"LT;", 3)) { *outputString++ = L'<'; scanString += 2; } else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"GT;", 3)) { *outputString++ = L'>'; scanString += 2; } else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"APOS;", 5)) { *outputString++ = L'\''; scanString += 4; } else if (!CsrUtf16StrNICmp(scanString, (u16 *) L"QUOT;", 5)) { *outputString++ = L'\"'; scanString += 4; } else { *outputString++ = L'&'; scanString--; } } else { *outputString++ = *scanString; } scanString++; } *outputString++ = 0; CsrPmemFree(str); } } return resultString; } u32 CsrUtf8StringLengthInBytes(const u8 *string) { size_t length = 0; if (string) { length = strlen((const char *)string); } return (u32) length; } u8 *CsrUtf8StrTruncate(u8 *target, size_t count) { size_t lastByte = count - 1; target[count] = '\0'; if (count && (target[lastByte] & 0x80)) { /* the last byte contains non-ascii char */ if (target[lastByte] & 0x40) { /* multi-byte char starting just before truncation */ target[lastByte] = '\0'; } else if ((target[lastByte - 1] & 0xE0) == 0xE0) { /* 3-byte char starting 2 bytes before truncation */ target[lastByte - 1] = '\0'; } else if ((target[lastByte - 2] & 0xF0) == 0xF0) { /* 4-byte char starting 3 bytes before truncation */ target[lastByte - 2] = '\0'; } } return target; }