Merge pull request #22212 from knn-k/xInlineUTF16StrIdxOfStr · eclipse-openj9/openj9@1665fb8
@@ -10209,25 +10209,28 @@ static TR::Register* inlineIntrinsicIndexOf(TR::Node* node, TR::CodeGenerator* c
10209102091021010210/**
1021110211 * \brief
10212- * Generate inlined instructions equivalent to java/lang/StringLatin1.indexOf([BI[BII)I
10212+ * Generate inlined instructions equivalent to java/lang/StringLatin1.indexOf([BI[BII)I or java/lang/StringUTF16.indexOf([BI[BII)I
1021310213 *
1021410214 * \param node
1021510215 * The tree node
1021610216 *
1021710217 * \param cg
1021810218 * The Code Generator
1021910219 *
10220+ * \param isLatin1
10221+ * True when the string is Latin1, False when the string is UTF16
10222+ *
1022010223 * Note that this version does not support discontiguous arrays
1022110224 */
10222-static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::CodeGenerator* cg)
10225+static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::CodeGenerator* cg, bool isLatin1)
1022310226 {
1022410227 static bool disableStrIdxOfStr = (feGetEnv("TR_disableStrIdxOfStr") != NULL);
1022510228 if (disableStrIdxOfStr) return NULL;
10226102291022710230 static bool verboseStrIdxOfStr = (feGetEnv("TR_verboseStrIdxOfStr") != NULL);
1022810231 if (verboseStrIdxOfStr)
1022910232 {
10230- fprintf(stderr, "*Latin1.indexOfString(): %s @%s\n", cg->comp()->signature(), cg->comp()->getHotnessName());
10233+ fprintf(stderr, "*%s.indexOfString: %s @%s\n", isLatin1 ? "Latin1" : "UTF16", cg->comp()->signature(), cg->comp()->getHotnessName());
1023110234 }
10232102351023310236 TR_ASSERT_FATAL(cg->comp()->target().is64Bit(), "Not supported on 32-bit platform");
@@ -10273,15 +10276,37 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code
1027310276 0x00, 0x00, 0x00, 0x00,
1027410277 0x00, 0x00, 0x00, 0x00,
1027510278 };
10279+ static uint8_t MASKOFSIZETWO[] =
10280+ {
10281+ 0x00, 0x01, 0x00, 0x01,
10282+ 0x00, 0x01, 0x00, 0x01,
10283+ 0x00, 0x01, 0x00, 0x01,
10284+ 0x00, 0x01, 0x00, 0x01,
10285+ };
10276102861027710287 const uint8_t width = 16;
10288+ uint8_t shift = 0;
10289+ uint8_t *shuffleMask = NULL;
10290+ TR::InstOpCode::Mnemonic compareOp = TR::InstOpCode::bad;
10291+ if (isLatin1)
10292+ {
10293+ shuffleMask = MASKOFSIZEONE;
10294+ compareOp = TR::InstOpCode::PCMPEQBRegReg;
10295+ shift = 0;
10296+ }
10297+ else
10298+ {
10299+ shuffleMask = MASKOFSIZETWO;
10300+ compareOp = TR::InstOpCode::PCMPEQWRegReg;
10301+ shift = 1;
10302+ }
10278103031027910304 TR::Register *ECX = cg->allocateRegister(TR_GPR);
1028010305 TR::Register *tmpReg = cg->allocateRegister(TR_GPR);
1028110306 TR::Register *xmmReg1 = cg->allocateRegister(TR_VRF);
1028210307 TR::Register *xmmReg2 = cg->allocateRegister(TR_VRF);
1028310308 TR::Register *xmmReg3 = cg->allocateRegister(TR_VRF);
10284- TR::Register *s1idxReg = cg->allocateRegister(TR_GPR);
10309+ TR::Register *s1addrReg = cg->allocateRegister(TR_GPR);
1028510310 TR::Register *s2idxReg = cg->allocateRegister(TR_GPR);
10286103111028710312 TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions((uint8_t)12, (uint8_t)12, cg);
@@ -10295,7 +10320,7 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code
1029510320 dependencies->addPreCondition(xmmReg1, TR::RealRegister::NoReg, cg);
1029610321 dependencies->addPreCondition(xmmReg2, TR::RealRegister::NoReg, cg);
1029710322 dependencies->addPreCondition(xmmReg3, TR::RealRegister::NoReg, cg);
10298- dependencies->addPreCondition(s1idxReg, TR::RealRegister::NoReg, cg);
10323+ dependencies->addPreCondition(s1addrReg, TR::RealRegister::NoReg, cg);
1029910324 dependencies->addPreCondition(s2idxReg, TR::RealRegister::NoReg, cg);
10300103251030110326 dependencies->addPostCondition(s1Reg, TR::RealRegister::NoReg, cg);
@@ -10308,7 +10333,7 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code
1030810333 dependencies->addPostCondition(xmmReg1, TR::RealRegister::NoReg, cg);
1030910334 dependencies->addPostCondition(xmmReg2, TR::RealRegister::NoReg, cg);
1031010335 dependencies->addPostCondition(xmmReg3, TR::RealRegister::NoReg, cg);
10311- dependencies->addPostCondition(s1idxReg, TR::RealRegister::NoReg, cg);
10336+ dependencies->addPostCondition(s1addrReg, TR::RealRegister::NoReg, cg);
1031210337 dependencies->addPostCondition(s2idxReg, TR::RealRegister::NoReg, cg);
10313103381031410339 TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
@@ -10330,9 +10355,9 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code
1033010355 int32_t hdrSize = static_cast<int32_t>(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
10331103561033210357 // load first char of s2
10333- generateRegMemInstruction(TR::InstOpCode::MOVZXReg4Mem1, node, tmpReg, generateX86MemoryReference(s2Reg, hdrSize, cg), cg);
10358+ generateRegMemInstruction(isLatin1 ? TR::InstOpCode::MOVZXReg4Mem1 : TR::InstOpCode::MOVZXReg4Mem2, node, tmpReg, generateX86MemoryReference(s2Reg, hdrSize, cg), cg);
1033410359 generateRegRegInstruction(TR::InstOpCode::MOVDRegReg4, node, xmmReg2, tmpReg, cg);
10335- generateRegMemInstruction(TR::InstOpCode::PSHUFBRegMem, node, xmmReg2, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, MASKOFSIZEONE), cg), cg);
10360+ generateRegMemInstruction(TR::InstOpCode::PSHUFBRegMem, node, xmmReg2, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, shuffleMask), cg), cg);
10336103611033710362 // calculate max
1033810363 generateRegRegInstruction(TR::InstOpCode::SUB4RegReg, node, maxReg, s2lenReg, cg); // s1len - s2len
@@ -10342,31 +10367,35 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code
1034210367 generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, resultReg, maxReg, cg);
1034310368 generateLabelInstruction(TR::InstOpCode::JG4, node, notFoundLabel, cg);
103441036910345- generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tmpReg, generateX86MemoryReference(s1Reg, resultReg, 0, hdrSize, cg), cg);
10370+ generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tmpReg, generateX86MemoryReference(s1Reg, resultReg, shift, hdrSize, cg), cg);
1034610371 generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, ECX, tmpReg, cg);
1034710372 generateRegImmInstruction(TR::InstOpCode::AND4RegImms, node, ECX, width - 1, cg);
1034810373 generateLabelInstruction(TR::InstOpCode::JE1, node, firstCharLoopLabel, cg);
10349103741035010375 generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, tmpReg, ~(width - 1), cg);
1035110376 generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg1, generateX86MemoryReference(tmpReg, 0, cg), cg);
10352- generateRegRegInstruction(TR::InstOpCode::PCMPEQBRegReg, node, xmmReg1, xmmReg2, cg);
10377+ generateRegRegInstruction(compareOp, node, xmmReg1, xmmReg2, cg);
1035310378 generateRegRegInstruction(TR::InstOpCode::PMOVMSKB4RegReg, node, tmpReg, xmmReg1, cg);
1035410379 generateRegInstruction(TR::InstOpCode::SHR4RegCL, node, tmpReg, cg);
1035510380 generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, tmpReg, tmpReg, cg);
1035610381 generateLabelInstruction(TR::InstOpCode::JNE1, node, firstCharMatchedLabel, cg);
10357- generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, resultReg, width, cg);
10382+ if (!isLatin1)
10383+ {
10384+ generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, ECX, 1, cg);
10385+ }
10386+ generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, resultReg, width >> shift, cg);
1035810387 generateRegRegInstruction(TR::InstOpCode::SUB4RegReg, node, resultReg, ECX, cg);
1035910388 generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, resultReg, maxReg, cg);
1036010389 generateLabelInstruction(TR::InstOpCode::JG4, node, notFoundLabel, cg);
10361103901036210391 // loop for finding the first char
1036310392 generateLabelInstruction(TR::InstOpCode::label, node, firstCharLoopLabel, cg);
10364- generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg1, generateX86MemoryReference(s1Reg, resultReg, 0, hdrSize, cg), cg);
10365- generateRegRegInstruction(TR::InstOpCode::PCMPEQBRegReg, node, xmmReg1, xmmReg2, cg);
10393+ generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg1, generateX86MemoryReference(s1Reg, resultReg, shift, hdrSize, cg), cg);
10394+ generateRegRegInstruction(compareOp, node, xmmReg1, xmmReg2, cg);
1036610395 generateRegRegInstruction(TR::InstOpCode::PMOVMSKB4RegReg, node, tmpReg, xmmReg1, cg);
1036710396 generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, tmpReg, tmpReg, cg);
1036810397 generateLabelInstruction(TR::InstOpCode::JNE1, node, firstCharMatchedLabel, cg);
10369- generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, resultReg, width, cg);
10398+ generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, resultReg, width >> shift, cg);
1037010399 generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, resultReg, maxReg, cg);
1037110400 generateLabelInstruction(TR::InstOpCode::JLE1, node, firstCharLoopLabel, cg);
1037210401 generateLabelInstruction(TR::InstOpCode::JMP4, node, notFoundLabel, cg);
@@ -10375,29 +10404,32 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code
1037510404 generateLabelInstruction(TR::InstOpCode::label, node, firstCharMatchedLabel, cg);
10376104051037710406 generateRegRegInstruction(TR::InstOpCode::BSF4RegReg, node, tmpReg, tmpReg, cg);
10407+ if (!isLatin1)
10408+ {
10409+ generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, tmpReg, 1, cg);
10410+ }
1037810411 generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, resultReg, tmpReg, cg);
10379104121038010413 generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, resultReg, maxReg, cg);
1038110414 generateLabelInstruction(TR::InstOpCode::JG4, node, notFoundLabel, cg);
103821041510383- generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, s1idxReg, generateX86MemoryReference(resultReg, 1, cg), cg); // s1idx = offset + 1
10416+ generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, s1addrReg, generateX86MemoryReference(s1Reg, resultReg, shift, hdrSize, cg), cg); // s1addr = &(s1[resultReg << shift])
1038410417 generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, s2idxReg, 1, cg); // s2idx = 1
10385104181038610419 generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, ECX, generateX86MemoryReference(s2lenReg, -1, cg), cg); // ECX = s2len - 1: 1st char has already matched
10387- generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, ECX, 4, cg);
10420+ generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, ECX, 4 - shift, cg); // div by 16 or 8
1038810421 generateLabelInstruction(TR::InstOpCode::JE1, node, byteLoopLabel, cg);
10389104221039010423 // Compare by 16 bytes
1039110424 generateLabelInstruction(TR::InstOpCode::label, node, qwordLoopLabel, cg);
10392- generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg1, generateX86MemoryReference(s1Reg, s1idxReg, 0, hdrSize, cg), cg);
10393- generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg3, generateX86MemoryReference(s2Reg, s2idxReg, 0, hdrSize, cg), cg);
10425+ generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg1, generateX86MemoryReference(s1addrReg, s2idxReg, shift, 0, cg), cg);
10426+ generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg3, generateX86MemoryReference(s2Reg, s2idxReg, shift, hdrSize, cg), cg);
1039410427 generateRegRegInstruction(TR::InstOpCode::PCMPEQBRegReg, node, xmmReg1, xmmReg3, cg);
1039510428 generateRegRegInstruction(TR::InstOpCode::PMOVMSKB4RegReg, node, tmpReg, xmmReg1, cg);
1039610429 generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, tmpReg, 0xffff, cg);
1039710430 generateLabelInstruction(TR::InstOpCode::JNE1, node, unmatchedLabel, cg);
103981043110399- generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, s1idxReg, width, cg);
10400- generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, s2idxReg, width, cg);
10432+ generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, s2idxReg, width >> shift, cg);
1040110433 generateRegImmInstruction(TR::InstOpCode::SUB4RegImms, node, ECX, 1, cg);
1040210434 generateLabelInstruction(TR::InstOpCode::JG1, node, qwordLoopLabel, cg);
1040310435@@ -10406,11 +10438,10 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code
1040610438 generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, s2lenReg, s2idxReg, cg);
1040710439 generateLabelInstruction(TR::InstOpCode::JLE1, node, doneLabel, cg); // resultReg has the result
104081044010409- generateRegMemInstruction(TR::InstOpCode::L1RegMem, node, tmpReg, generateX86MemoryReference(s2Reg, s2idxReg, 0, hdrSize, cg), cg);
10410- generateMemRegInstruction(TR::InstOpCode::CMP1MemReg, node, generateX86MemoryReference(s1Reg, s1idxReg, 0, hdrSize, cg), tmpReg, cg);
10441+ generateRegMemInstruction(isLatin1 ? TR::InstOpCode::L1RegMem : TR::InstOpCode::L2RegMem, node, tmpReg, generateX86MemoryReference(s2Reg, s2idxReg, shift, hdrSize, cg), cg);
10442+ generateMemRegInstruction(isLatin1 ? TR::InstOpCode::CMP1MemReg : TR::InstOpCode::CMP2MemReg, node, generateX86MemoryReference(s1addrReg, s2idxReg, shift, 0, cg), tmpReg, cg);
1041110443 generateLabelInstruction(TR::InstOpCode::JNE1, node, unmatchedLabel, cg);
104121044410413- generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, s1idxReg, 1, cg);
1041410445 generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, s2idxReg, 1, cg);
1041510446 generateLabelInstruction(TR::InstOpCode::JMP1, node, byteLoopLabel, cg);
1041610447@@ -10431,7 +10462,7 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code
1043110462 cg->stopUsingRegister(xmmReg1);
1043210463 cg->stopUsingRegister(xmmReg2);
1043310464 cg->stopUsingRegister(xmmReg3);
10434- cg->stopUsingRegister(s1idxReg);
10465+ cg->stopUsingRegister(s1addrReg);
1043510466 cg->stopUsingRegister(s2idxReg);
10436104671043710468 if (maxReg != s1lenReg)
@@ -12948,7 +12979,15 @@ J9::X86::TreeEvaluator::directCallEvaluator(TR::Node *node, TR::CodeGenerator *c
1294812979 case TR::java_lang_StringLatin1_indexOf:
1294912980 case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfStringLatin1:
1295012981 if (cg->getSupportsInlineStringIndexOfString())
12951- returnRegister = inlineIntrinsicStringIndexOfString(node, cg);
12982+ returnRegister = inlineIntrinsicStringIndexOfString(node, cg, true);
12983+12984+ callInlined = (returnRegister != NULL);
12985+ break;
12986+12987+ case TR::java_lang_StringUTF16_indexOf:
12988+ case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfStringUTF16:
12989+ if (cg->getSupportsInlineStringIndexOfString())
12990+ returnRegister = inlineIntrinsicStringIndexOfString(node, cg, false);
12952129911295312992 callInlined = (returnRegister != NULL);
1295412993 break;