Issue 76173
Summary Weird Pass By Reference Issue When Trying To Use the llvm-mc.cpp Assmbler Script in My Own Project
Labels new issue
Assignees
Reporter matinraayai
    My goal is to use LLVM MC to parse a simple assembly file in memory (without any directives, most likely a basic block) and generate machine code (preferably with only the .text section, I want to embed this in an ELF manually myself). I am targeting AMDGPU.

Naturally I looked at how it's done [here](https://github.com/llvm/llvm-project/blob/7bd17212ef23a72ea224a037126d33d3e02553fe/llvm/tools/llvm-mc/llvm-mc.cpp#L323) in [llvm-mc.cpp](https://github.com/llvm/llvm-project/blob/7bd17212ef23a72ea224a037126d33d3e02553fe/llvm/tools/llvm-mc/llvm-mc.cpp).

I created the following function to use with the AMDGPU target:
```c++
#include <string>
#include <iostream>
#include <memory>
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"

llvm::SmallVector<char> assemble(const std::string &instListStr) {

    LLVMInitializeAMDGPUTarget();
 LLVMInitializeAMDGPUTargetInfo();
    LLVMInitializeAMDGPUTargetMC();
 LLVMInitializeAMDGPUDisassembler();
 LLVMInitializeAMDGPUAsmParser();
    LLVMInitializeAMDGPUAsmPrinter();
 LLVMInitializeAMDGPUTargetMCA();

    std::string isaName{"amdgcn-amd-amdhsa--gfx908"};
    std::string Error;
 std::cout << "Target name: " << isaName << std::endl;

    const llvm::Target *TheTarget = llvm::TargetRegistry::lookupTarget(isaName, Error);
    assert(TheTarget);

    std::unique_ptr<const llvm::MCRegisterInfo> MRI(TheTarget->createMCRegInfo(llvm::StringRef(isaName)));
 assert(MRI);

    llvm::MCTargetOptions MCOptions;
 std::unique_ptr<const llvm::MCAsmInfo> MAI(
 TheTarget->createMCAsmInfo(*MRI, isaName, MCOptions));

 assert(MAI);

    std::unique_ptr<const llvm::MCInstrInfo> MII(TheTarget->createMCInstrInfo());
    assert(MII);

 std::unique_ptr<const llvm::MCSubtargetInfo> STI(
 TheTarget->createMCSubtargetInfo(isaName, "gfx908", "+sramecc-xnack"));
    assert(STI);

    // MatchAndEmitInstruction in MCTargetAsmParser.h

    // Now that GetTarget() has (potentially) replaced TripleName, it's safe to
    // construct the Triple object.
    llvm::Triple TheTriple(isaName);

//    std::unique_ptr<llvm::MemoryBuffer> BufferPtr = llvm::MemoryBuffer::getMemBuffer(instListStr, "", true);
//
//    llvm::MemoryBuffer *Buffer = BufferPtr.get();

// auto SrcMgr = std::make_unique<llvm::SourceMgr>();
    llvm::SourceMgr SrcMgr;
    // Package up features to be passed to target/subtarget
 std::string FeaturesStr;
    //    if (MAttrs.size()) {
    // SubtargetFeatures Features;
    //        for (unsigned i = 0; i != MAttrs.size(); ++i)
    //            Features.AddFeature(MAttrs[i]);
 //        FeaturesStr = Features.getString();
    //    }

    // std::unique_ptr<llvm::MCContext> Ctx(new (std::nothrow)
    // llvm::MCContext(llvm::Triple(isaName), MAI.get(), MRI.get(),
    // &SrcMgr,
    // &MCOptions,
    // STI.get()));
    //    assert(Ctx);

    // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and
 // MCObjectFileInfo needs a MCContext reference in order to initialize itself.
    llvm::MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr,
                        &MCOptions);
 std::unique_ptr<llvm::MCObjectFileInfo> MOFI(
 TheTarget->createMCObjectFileInfo(Ctx, /*PIC*/ true, /*large code model*/ false));
    Ctx.setObjectFileInfo(MOFI.get());

 Ctx.setAllowTemporaryLabels(false);

 Ctx.setGenDwarfForAssembly(false);

    llvm::SmallVector<char> out;

    llvm::raw_svector_ostream VOS(out);

 std::unique_ptr<llvm::buffer_ostream> BOS;



 std::unique_ptr<llvm::MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
 assert(MCII && "Unable to create instruction info!");

 llvm::MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, Ctx);
 llvm::MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
    std::unique_ptr<llvm::MCStreamer> Str(TheTarget->createMCObjectStreamer(
        TheTriple, Ctx, std::unique_ptr<llvm::MCAsmBackend>(MAB),
 MAB->createObjectWriter(VOS),
 std::unique_ptr<llvm::MCCodeEmitter>(CE), *STI, MCOptions.MCRelaxAll,
 MCOptions.MCIncrementalLinkerCompatible,
 /*DWARFMustBeAtTheEnd*/ false));

//    Str->initSections(true, *STI);

    // Use Assembler information for parsing.
 Str->setUseAssemblerInfoForParsing(false);

    // Tell SrcMgr about this buffer, which is what the parser will pick up.

 assert(llvm::MemoryBuffer::getMemBuffer(instListStr, "", true)->getBuffer() == llvm::StringRef(instListStr));
    unsigned srcId = SrcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBufferCopy(instListStr), llvm::SMLoc());
    SrcMgr.getBufferInfo(srcId);

 std::unique_ptr<llvm::MCAsmParser> Parser(
 llvm::createMCAsmParser(SrcMgr, Ctx, *Str, *MAI));
 std::unique_ptr<llvm::MCTargetAsmParser> TAP(
 TheTarget->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));

 assert(TAP && "this target does not support assembly parsing.\n");

 //    int SymbolResult = fillCommandLineSymbols(*Parser);
    // if(SymbolResult)
    //        return SymbolResult;
 Parser->setShowParsedOperands(true);
 Parser->setTargetParser(*TAP);
 Parser->getLexer().setLexMasmIntegers(true);
 Parser->getLexer().setLexMasmHexFloats(true);
 Parser->getLexer().setLexMotorolaIntegers(true);

 Parser->Run(false);

    return out;
}
```
I ran this function with the following input: ```instListStr = "s_load_dword s0, s[4:5], 0x4"```, but I get a segfault when creating the ```Parser``` using the ```llvm::createMCAsmParser``` factory method. After running the code on LLVM with debug information, the issue is found to be an assertion failing, showing that there are no sources added to ```SrcMgr``` in the first place; However, I can confirm that, before calling the parser factory function, my src buffer has been added and can be accessed from the ```SrcMgr``` by checking if ```SrcMgr.getBufferInfo(srcId);``` fails.

It seems that when ```SrcMgr``` is passed by reference, the underlying ```Buffers``` is not passed correctly. Inspecting with GDB shows that both the ```SM``` inside the factory function and the ```SrcMgr``` inside my function point to the same region of memory, but the ```Buffers``` field of ```SM``` inside the factory function points to ```nullptr```. I'm not sure why this happens, and what is causing this. Any help with this would be greatly appreciated.

Thanks in Advance

_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to