Merge pull request #22212 from knn-k/xInlineUTF16StrIdxOfStr · eclipse-openj9/openj9@1665fb8

@@ -10209,25 +10209,28 @@ static TR::Register* inlineIntrinsicIndexOf(TR::Node* node, TR::CodeGenerator* c

10209102091021010210

/**

1021110211

* \brief

10212-

* Generate inlined instructions equivalent to java/lang/StringLatin1.indexOf([BI[BII)I

10212+

* Generate inlined instructions equivalent to java/lang/StringLatin1.indexOf([BI[BII)I or java/lang/StringUTF16.indexOf([BI[BII)I

1021310213

*

1021410214

* \param node

1021510215

* The tree node

1021610216

*

1021710217

* \param cg

1021810218

* The Code Generator

1021910219

*

10220+

* \param isLatin1

10221+

* True when the string is Latin1, False when the string is UTF16

10222+

*

1022010223

* Note that this version does not support discontiguous arrays

1022110224

*/

10222-

static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::CodeGenerator* cg)

10225+

static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::CodeGenerator* cg, bool isLatin1)

1022310226

{

1022410227

static bool disableStrIdxOfStr = (feGetEnv("TR_disableStrIdxOfStr") != NULL);

1022510228

if (disableStrIdxOfStr) return NULL;

10226102291022710230

static bool verboseStrIdxOfStr = (feGetEnv("TR_verboseStrIdxOfStr") != NULL);

1022810231

if (verboseStrIdxOfStr)

1022910232

{

10230-

fprintf(stderr, "*Latin1.indexOfString(): %s @%s\n", cg->comp()->signature(), cg->comp()->getHotnessName());

10233+

fprintf(stderr, "*%s.indexOfString: %s @%s\n", isLatin1 ? "Latin1" : "UTF16", cg->comp()->signature(), cg->comp()->getHotnessName());

1023110234

}

10232102351023310236

TR_ASSERT_FATAL(cg->comp()->target().is64Bit(), "Not supported on 32-bit platform");

@@ -10273,15 +10276,37 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code

1027310276

0x00, 0x00, 0x00, 0x00,

1027410277

0x00, 0x00, 0x00, 0x00,

1027510278

};

10279+

static uint8_t MASKOFSIZETWO[] =

10280+

{

10281+

0x00, 0x01, 0x00, 0x01,

10282+

0x00, 0x01, 0x00, 0x01,

10283+

0x00, 0x01, 0x00, 0x01,

10284+

0x00, 0x01, 0x00, 0x01,

10285+

};

10276102861027710287

const uint8_t width = 16;

10288+

uint8_t shift = 0;

10289+

uint8_t *shuffleMask = NULL;

10290+

TR::InstOpCode::Mnemonic compareOp = TR::InstOpCode::bad;

10291+

if (isLatin1)

10292+

{

10293+

shuffleMask = MASKOFSIZEONE;

10294+

compareOp = TR::InstOpCode::PCMPEQBRegReg;

10295+

shift = 0;

10296+

}

10297+

else

10298+

{

10299+

shuffleMask = MASKOFSIZETWO;

10300+

compareOp = TR::InstOpCode::PCMPEQWRegReg;

10301+

shift = 1;

10302+

}

10278103031027910304

TR::Register *ECX = cg->allocateRegister(TR_GPR);

1028010305

TR::Register *tmpReg = cg->allocateRegister(TR_GPR);

1028110306

TR::Register *xmmReg1 = cg->allocateRegister(TR_VRF);

1028210307

TR::Register *xmmReg2 = cg->allocateRegister(TR_VRF);

1028310308

TR::Register *xmmReg3 = cg->allocateRegister(TR_VRF);

10284-

TR::Register *s1idxReg = cg->allocateRegister(TR_GPR);

10309+

TR::Register *s1addrReg = cg->allocateRegister(TR_GPR);

1028510310

TR::Register *s2idxReg = cg->allocateRegister(TR_GPR);

10286103111028710312

TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions((uint8_t)12, (uint8_t)12, cg);

@@ -10295,7 +10320,7 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code

1029510320

dependencies->addPreCondition(xmmReg1, TR::RealRegister::NoReg, cg);

1029610321

dependencies->addPreCondition(xmmReg2, TR::RealRegister::NoReg, cg);

1029710322

dependencies->addPreCondition(xmmReg3, TR::RealRegister::NoReg, cg);

10298-

dependencies->addPreCondition(s1idxReg, TR::RealRegister::NoReg, cg);

10323+

dependencies->addPreCondition(s1addrReg, TR::RealRegister::NoReg, cg);

1029910324

dependencies->addPreCondition(s2idxReg, TR::RealRegister::NoReg, cg);

10300103251030110326

dependencies->addPostCondition(s1Reg, TR::RealRegister::NoReg, cg);

@@ -10308,7 +10333,7 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code

1030810333

dependencies->addPostCondition(xmmReg1, TR::RealRegister::NoReg, cg);

1030910334

dependencies->addPostCondition(xmmReg2, TR::RealRegister::NoReg, cg);

1031010335

dependencies->addPostCondition(xmmReg3, TR::RealRegister::NoReg, cg);

10311-

dependencies->addPostCondition(s1idxReg, TR::RealRegister::NoReg, cg);

10336+

dependencies->addPostCondition(s1addrReg, TR::RealRegister::NoReg, cg);

1031210337

dependencies->addPostCondition(s2idxReg, TR::RealRegister::NoReg, cg);

10313103381031410339

TR::LabelSymbol *startLabel = generateLabelSymbol(cg);

@@ -10330,9 +10355,9 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code

1033010355

int32_t hdrSize = static_cast<int32_t>(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());

10331103561033210357

// load first char of s2

10333-

generateRegMemInstruction(TR::InstOpCode::MOVZXReg4Mem1, node, tmpReg, generateX86MemoryReference(s2Reg, hdrSize, cg), cg);

10358+

generateRegMemInstruction(isLatin1 ? TR::InstOpCode::MOVZXReg4Mem1 : TR::InstOpCode::MOVZXReg4Mem2, node, tmpReg, generateX86MemoryReference(s2Reg, hdrSize, cg), cg);

1033410359

generateRegRegInstruction(TR::InstOpCode::MOVDRegReg4, node, xmmReg2, tmpReg, cg);

10335-

generateRegMemInstruction(TR::InstOpCode::PSHUFBRegMem, node, xmmReg2, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, MASKOFSIZEONE), cg), cg);

10360+

generateRegMemInstruction(TR::InstOpCode::PSHUFBRegMem, node, xmmReg2, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, shuffleMask), cg), cg);

10336103611033710362

// calculate max

1033810363

generateRegRegInstruction(TR::InstOpCode::SUB4RegReg, node, maxReg, s2lenReg, cg); // s1len - s2len

@@ -10342,31 +10367,35 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code

1034210367

generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, resultReg, maxReg, cg);

1034310368

generateLabelInstruction(TR::InstOpCode::JG4, node, notFoundLabel, cg);

103441036910345-

generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tmpReg, generateX86MemoryReference(s1Reg, resultReg, 0, hdrSize, cg), cg);

10370+

generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tmpReg, generateX86MemoryReference(s1Reg, resultReg, shift, hdrSize, cg), cg);

1034610371

generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, ECX, tmpReg, cg);

1034710372

generateRegImmInstruction(TR::InstOpCode::AND4RegImms, node, ECX, width - 1, cg);

1034810373

generateLabelInstruction(TR::InstOpCode::JE1, node, firstCharLoopLabel, cg);

10349103741035010375

generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, tmpReg, ~(width - 1), cg);

1035110376

generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg1, generateX86MemoryReference(tmpReg, 0, cg), cg);

10352-

generateRegRegInstruction(TR::InstOpCode::PCMPEQBRegReg, node, xmmReg1, xmmReg2, cg);

10377+

generateRegRegInstruction(compareOp, node, xmmReg1, xmmReg2, cg);

1035310378

generateRegRegInstruction(TR::InstOpCode::PMOVMSKB4RegReg, node, tmpReg, xmmReg1, cg);

1035410379

generateRegInstruction(TR::InstOpCode::SHR4RegCL, node, tmpReg, cg);

1035510380

generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, tmpReg, tmpReg, cg);

1035610381

generateLabelInstruction(TR::InstOpCode::JNE1, node, firstCharMatchedLabel, cg);

10357-

generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, resultReg, width, cg);

10382+

if (!isLatin1)

10383+

{

10384+

generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, ECX, 1, cg);

10385+

}

10386+

generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, resultReg, width >> shift, cg);

1035810387

generateRegRegInstruction(TR::InstOpCode::SUB4RegReg, node, resultReg, ECX, cg);

1035910388

generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, resultReg, maxReg, cg);

1036010389

generateLabelInstruction(TR::InstOpCode::JG4, node, notFoundLabel, cg);

10361103901036210391

// loop for finding the first char

1036310392

generateLabelInstruction(TR::InstOpCode::label, node, firstCharLoopLabel, cg);

10364-

generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg1, generateX86MemoryReference(s1Reg, resultReg, 0, hdrSize, cg), cg);

10365-

generateRegRegInstruction(TR::InstOpCode::PCMPEQBRegReg, node, xmmReg1, xmmReg2, cg);

10393+

generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg1, generateX86MemoryReference(s1Reg, resultReg, shift, hdrSize, cg), cg);

10394+

generateRegRegInstruction(compareOp, node, xmmReg1, xmmReg2, cg);

1036610395

generateRegRegInstruction(TR::InstOpCode::PMOVMSKB4RegReg, node, tmpReg, xmmReg1, cg);

1036710396

generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, tmpReg, tmpReg, cg);

1036810397

generateLabelInstruction(TR::InstOpCode::JNE1, node, firstCharMatchedLabel, cg);

10369-

generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, resultReg, width, cg);

10398+

generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, resultReg, width >> shift, cg);

1037010399

generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, resultReg, maxReg, cg);

1037110400

generateLabelInstruction(TR::InstOpCode::JLE1, node, firstCharLoopLabel, cg);

1037210401

generateLabelInstruction(TR::InstOpCode::JMP4, node, notFoundLabel, cg);

@@ -10375,29 +10404,32 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code

1037510404

generateLabelInstruction(TR::InstOpCode::label, node, firstCharMatchedLabel, cg);

10376104051037710406

generateRegRegInstruction(TR::InstOpCode::BSF4RegReg, node, tmpReg, tmpReg, cg);

10407+

if (!isLatin1)

10408+

{

10409+

generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, tmpReg, 1, cg);

10410+

}

1037810411

generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, resultReg, tmpReg, cg);

10379104121038010413

generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, resultReg, maxReg, cg);

1038110414

generateLabelInstruction(TR::InstOpCode::JG4, node, notFoundLabel, cg);

103821041510383-

generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, s1idxReg, generateX86MemoryReference(resultReg, 1, cg), cg); // s1idx = offset + 1

10416+

generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, s1addrReg, generateX86MemoryReference(s1Reg, resultReg, shift, hdrSize, cg), cg); // s1addr = &(s1[resultReg << shift])

1038410417

generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, s2idxReg, 1, cg); // s2idx = 1

10385104181038610419

generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, ECX, generateX86MemoryReference(s2lenReg, -1, cg), cg); // ECX = s2len - 1: 1st char has already matched

10387-

generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, ECX, 4, cg);

10420+

generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, ECX, 4 - shift, cg); // div by 16 or 8

1038810421

generateLabelInstruction(TR::InstOpCode::JE1, node, byteLoopLabel, cg);

10389104221039010423

// Compare by 16 bytes

1039110424

generateLabelInstruction(TR::InstOpCode::label, node, qwordLoopLabel, cg);

10392-

generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg1, generateX86MemoryReference(s1Reg, s1idxReg, 0, hdrSize, cg), cg);

10393-

generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg3, generateX86MemoryReference(s2Reg, s2idxReg, 0, hdrSize, cg), cg);

10425+

generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg1, generateX86MemoryReference(s1addrReg, s2idxReg, shift, 0, cg), cg);

10426+

generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmReg3, generateX86MemoryReference(s2Reg, s2idxReg, shift, hdrSize, cg), cg);

1039410427

generateRegRegInstruction(TR::InstOpCode::PCMPEQBRegReg, node, xmmReg1, xmmReg3, cg);

1039510428

generateRegRegInstruction(TR::InstOpCode::PMOVMSKB4RegReg, node, tmpReg, xmmReg1, cg);

1039610429

generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, tmpReg, 0xffff, cg);

1039710430

generateLabelInstruction(TR::InstOpCode::JNE1, node, unmatchedLabel, cg);

103981043110399-

generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, s1idxReg, width, cg);

10400-

generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, s2idxReg, width, cg);

10432+

generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, s2idxReg, width >> shift, cg);

1040110433

generateRegImmInstruction(TR::InstOpCode::SUB4RegImms, node, ECX, 1, cg);

1040210434

generateLabelInstruction(TR::InstOpCode::JG1, node, qwordLoopLabel, cg);

1040310435

@@ -10406,11 +10438,10 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code

1040610438

generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, s2lenReg, s2idxReg, cg);

1040710439

generateLabelInstruction(TR::InstOpCode::JLE1, node, doneLabel, cg); // resultReg has the result

104081044010409-

generateRegMemInstruction(TR::InstOpCode::L1RegMem, node, tmpReg, generateX86MemoryReference(s2Reg, s2idxReg, 0, hdrSize, cg), cg);

10410-

generateMemRegInstruction(TR::InstOpCode::CMP1MemReg, node, generateX86MemoryReference(s1Reg, s1idxReg, 0, hdrSize, cg), tmpReg, cg);

10441+

generateRegMemInstruction(isLatin1 ? TR::InstOpCode::L1RegMem : TR::InstOpCode::L2RegMem, node, tmpReg, generateX86MemoryReference(s2Reg, s2idxReg, shift, hdrSize, cg), cg);

10442+

generateMemRegInstruction(isLatin1 ? TR::InstOpCode::CMP1MemReg : TR::InstOpCode::CMP2MemReg, node, generateX86MemoryReference(s1addrReg, s2idxReg, shift, 0, cg), tmpReg, cg);

1041110443

generateLabelInstruction(TR::InstOpCode::JNE1, node, unmatchedLabel, cg);

104121044410413-

generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, s1idxReg, 1, cg);

1041410445

generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, s2idxReg, 1, cg);

1041510446

generateLabelInstruction(TR::InstOpCode::JMP1, node, byteLoopLabel, cg);

1041610447

@@ -10431,7 +10462,7 @@ static TR::Register* inlineIntrinsicStringIndexOfString(TR::Node* node, TR::Code

1043110462

cg->stopUsingRegister(xmmReg1);

1043210463

cg->stopUsingRegister(xmmReg2);

1043310464

cg->stopUsingRegister(xmmReg3);

10434-

cg->stopUsingRegister(s1idxReg);

10465+

cg->stopUsingRegister(s1addrReg);

1043510466

cg->stopUsingRegister(s2idxReg);

10436104671043710468

if (maxReg != s1lenReg)

@@ -12948,7 +12979,15 @@ J9::X86::TreeEvaluator::directCallEvaluator(TR::Node *node, TR::CodeGenerator *c

1294812979

case TR::java_lang_StringLatin1_indexOf:

1294912980

case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfStringLatin1:

1295012981

if (cg->getSupportsInlineStringIndexOfString())

12951-

returnRegister = inlineIntrinsicStringIndexOfString(node, cg);

12982+

returnRegister = inlineIntrinsicStringIndexOfString(node, cg, true);

12983+12984+

callInlined = (returnRegister != NULL);

12985+

break;

12986+12987+

case TR::java_lang_StringUTF16_indexOf:

12988+

case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfStringUTF16:

12989+

if (cg->getSupportsInlineStringIndexOfString())

12990+

returnRegister = inlineIntrinsicStringIndexOfString(node, cg, false);

12952129911295312992

callInlined = (returnRegister != NULL);

1295412993

break;