reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
  273
  274
  275
  276
  277
  278
  279
  280
  281
  282
  283
  284
  285
  286
  287
  288
  289
  290
  291
  292
  293
  294
  295
  296
  297
  298
  299
  300
  301
  302
  303
  304
  305
  306
  307
  308
  309
  310
  311
  312
  313
  314
  315
  316
  317
  318
  319
  320
  321
  322
  323
  324
  325
  326
  327
  328
  329
  330
  331
  332
  333
  334
  335
  336
  337
  338
  339
  340
  341
  342
  343
  344
  345
  346
  347
  348
  349
  350
  351
  352
  353
  354
  355
  356
  357
  358
  359
  360
  361
  362
  363
  364
  365
  366
  367
  368
  369
  370
  371
  372
  373
  374
  375
  376
  377
  378
  379
  380
  381
  382
  383
  384
  385
  386
  387
  388
  389
  390
  391
//===- ELFObjHandler.cpp --------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-----------------------------------------------------------------------===/

#include "ELFObjHandler.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ELFTypes.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/TextAPI/ELF/ELFStub.h"

using llvm::MemoryBufferRef;
using llvm::object::ELFObjectFile;

using namespace llvm;
using namespace llvm::object;
using namespace llvm::ELF;

namespace llvm {
namespace elfabi {

// Simple struct to hold relevant .dynamic entries.
struct DynamicEntries {
  uint64_t StrTabAddr = 0;
  uint64_t StrSize = 0;
  Optional<uint64_t> SONameOffset;
  std::vector<uint64_t> NeededLibNames;
  // Symbol table:
  uint64_t DynSymAddr = 0;
  // Hash tables:
  Optional<uint64_t> ElfHash;
  Optional<uint64_t> GnuHash;
};

/// This function behaves similarly to StringRef::substr(), but attempts to
/// terminate the returned StringRef at the first null terminator. If no null
/// terminator is found, an error is returned.
///
/// @param Str Source string to create a substring from.
/// @param Offset The start index of the desired substring.
static Expected<StringRef> terminatedSubstr(StringRef Str, size_t Offset) {
  size_t StrEnd = Str.find('\0', Offset);
  if (StrEnd == StringLiteral::npos) {
    return createError(
        "String overran bounds of string table (no null terminator)");
  }

  size_t StrLen = StrEnd - Offset;
  return Str.substr(Offset, StrLen);
}

/// This function takes an error, and appends a string of text to the end of
/// that error. Since "appending" to an Error isn't supported behavior of an
/// Error, this function technically creates a new error with the combined
/// message and consumes the old error.
///
/// @param Err Source error.
/// @param After Text to append at the end of Err's error message.
Error appendToError(Error Err, StringRef After) {
  std::string Message;
  raw_string_ostream Stream(Message);
  Stream << Err;
  Stream << " " << After;
  consumeError(std::move(Err));
  return createError(Stream.str().c_str());
}

/// This function populates a DynamicEntries struct using an ELFT::DynRange.
/// After populating the struct, the members are validated with
/// some basic sanity checks.
///
/// @param Dyn Target DynamicEntries struct to populate.
/// @param DynTable Source dynamic table.
template <class ELFT>
static Error populateDynamic(DynamicEntries &Dyn,
                             typename ELFT::DynRange DynTable) {
  if (DynTable.empty())
    return createError("No .dynamic section found");

  // Search .dynamic for relevant entries.
  bool FoundDynStr = false;
  bool FoundDynStrSz = false;
  bool FoundDynSym = false;
  for (auto &Entry : DynTable) {
    switch (Entry.d_tag) {
    case DT_SONAME:
      Dyn.SONameOffset = Entry.d_un.d_val;
      break;
    case DT_STRTAB:
      Dyn.StrTabAddr = Entry.d_un.d_ptr;
      FoundDynStr = true;
      break;
    case DT_STRSZ:
      Dyn.StrSize = Entry.d_un.d_val;
      FoundDynStrSz = true;
      break;
    case DT_NEEDED:
      Dyn.NeededLibNames.push_back(Entry.d_un.d_val);
      break;
    case DT_SYMTAB:
      Dyn.DynSymAddr = Entry.d_un.d_ptr;
      FoundDynSym = true;
      break;
    case DT_HASH:
      Dyn.ElfHash = Entry.d_un.d_ptr;
      break;
    case DT_GNU_HASH:
      Dyn.GnuHash = Entry.d_un.d_ptr;
    }
  }

  if (!FoundDynStr) {
    return createError(
        "Couldn't locate dynamic string table (no DT_STRTAB entry)");
  }
  if (!FoundDynStrSz) {
    return createError(
        "Couldn't determine dynamic string table size (no DT_STRSZ entry)");
  }
  if (!FoundDynSym) {
    return createError(
        "Couldn't locate dynamic symbol table (no DT_SYMTAB entry)");
  }
  if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) {
    return createStringError(
        object_error::parse_failed,
        "DT_SONAME string offset (0x%016" PRIx64
        ") outside of dynamic string table",
        *Dyn.SONameOffset);
  }
  for (uint64_t Offset : Dyn.NeededLibNames) {
    if (Offset >= Dyn.StrSize) {
      return createStringError(
          object_error::parse_failed,
          "DT_NEEDED string offset (0x%016" PRIx64
          ") outside of dynamic string table",
          Offset);
    }
  }

  return Error::success();
}

/// This function finds the number of dynamic symbols using a GNU hash table.
///
/// @param Table The GNU hash table for .dynsym.
template <class ELFT>
static uint64_t getDynSymtabSize(const typename ELFT::GnuHash &Table) {
  using Elf_Word = typename ELFT::Word;
  if (Table.nbuckets == 0)
    return Table.symndx + 1;
  uint64_t LastSymIdx = 0;
  uint64_t BucketVal = 0;
  // Find the index of the first symbol in the last chain.
  for (Elf_Word Val : Table.buckets()) {
    BucketVal = std::max(BucketVal, (uint64_t)Val);
  }
  LastSymIdx += BucketVal;
  const Elf_Word *It =
      reinterpret_cast<const Elf_Word *>(Table.values(BucketVal).end());
  // Locate the end of the chain to find the last symbol index.
  while ((*It & 1) == 0) {
    LastSymIdx++;
    It++;
  }
  return LastSymIdx + 1;
}

/// This function determines the number of dynamic symbols.
/// Without access to section headers, the number of symbols must be determined
/// by parsing dynamic hash tables.
///
/// @param Dyn Entries with the locations of hash tables.
/// @param ElfFile The ElfFile that the section contents reside in.
template <class ELFT>
static Expected<uint64_t> getNumSyms(DynamicEntries &Dyn,
                                     const ELFFile<ELFT> &ElfFile) {
  using Elf_Hash = typename ELFT::Hash;
  using Elf_GnuHash = typename ELFT::GnuHash;
  // Search GNU hash table to try to find the upper bound of dynsym.
  if (Dyn.GnuHash.hasValue()) {
    Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.GnuHash);
    if (!TablePtr)
      return TablePtr.takeError();
    const Elf_GnuHash *Table =
        reinterpret_cast<const Elf_GnuHash *>(TablePtr.get());
    return getDynSymtabSize<ELFT>(*Table);
  }
  // Search SYSV hash table to try to find the upper bound of dynsym.
  if (Dyn.ElfHash.hasValue()) {
    Expected<const uint8_t *> TablePtr = ElfFile.toMappedAddr(*Dyn.ElfHash);
    if (!TablePtr)
      return TablePtr.takeError();
    const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get());
    return Table->nchain;
  }
  return 0;
}

/// This function extracts symbol type from a symbol's st_info member and
/// maps it to an ELFSymbolType enum.
/// Currently, STT_NOTYPE, STT_OBJECT, STT_FUNC, and STT_TLS are supported.
/// Other symbol types are mapped to ELFSymbolType::Unknown.
///
/// @param Info Binary symbol st_info to extract symbol type from.
static ELFSymbolType convertInfoToType(uint8_t Info) {
  Info = Info & 0xf;
  switch (Info) {
    case ELF::STT_NOTYPE:
      return ELFSymbolType::NoType;
    case ELF::STT_OBJECT:
      return ELFSymbolType::Object;
    case ELF::STT_FUNC:
      return ELFSymbolType::Func;
    case ELF::STT_TLS:
      return ELFSymbolType::TLS;
    default:
      return ELFSymbolType::Unknown;
  }
}

/// This function creates an ELFSymbol and populates all members using
/// information from a binary ELFT::Sym.
///
/// @param SymName The desired name of the ELFSymbol.
/// @param RawSym ELFT::Sym to extract symbol information from.
template <class ELFT>
static ELFSymbol createELFSym(StringRef SymName,
                              const typename ELFT::Sym &RawSym) {
  ELFSymbol TargetSym(SymName);
  uint8_t Binding = RawSym.getBinding();
  if (Binding == STB_WEAK)
    TargetSym.Weak = true;
  else
    TargetSym.Weak = false;

  TargetSym.Undefined = RawSym.isUndefined();
  TargetSym.Type = convertInfoToType(RawSym.st_info);

  if (TargetSym.Type == ELFSymbolType::Func) {
    TargetSym.Size = 0;
  } else {
    TargetSym.Size = RawSym.st_size;
  }
  return TargetSym;
}

/// This function populates an ELFStub with symbols using information read
/// from an ELF binary.
///
/// @param TargetStub ELFStub to add symbols to.
/// @param DynSym Range of dynamic symbols to add to TargetStub.
/// @param DynStr StringRef to the dynamic string table.
template <class ELFT>
static Error populateSymbols(ELFStub &TargetStub,
                            const typename ELFT::SymRange DynSym,
                            StringRef DynStr) {
  // Skips the first symbol since it's the NULL symbol.
  for (auto RawSym : DynSym.drop_front(1)) {
    // If a symbol does not have global or weak binding, ignore it.
    uint8_t Binding = RawSym.getBinding();
    if (!(Binding == STB_GLOBAL || Binding == STB_WEAK))
      continue;
    // If a symbol doesn't have default or protected visibility, ignore it.
    uint8_t Visibility = RawSym.getVisibility();
    if (!(Visibility == STV_DEFAULT || Visibility == STV_PROTECTED))
      continue;
    // Create an ELFSymbol and populate it with information from the symbol
    // table entry.
    Expected<StringRef> SymName = terminatedSubstr(DynStr, RawSym.st_name);
    if (!SymName)
      return SymName.takeError();
    ELFSymbol Sym = createELFSym<ELFT>(*SymName, RawSym);
    TargetStub.Symbols.insert(std::move(Sym));
    // TODO: Populate symbol warning.
  }
  return Error::success();
}

/// Returns a new ELFStub with all members populated from an ELFObjectFile.
/// @param ElfObj Source ELFObjectFile.
template <class ELFT>
static Expected<std::unique_ptr<ELFStub>>
buildStub(const ELFObjectFile<ELFT> &ElfObj) {
  using Elf_Dyn_Range = typename ELFT::DynRange;
  using Elf_Phdr_Range = typename ELFT::PhdrRange;
  using Elf_Sym_Range = typename ELFT::SymRange;
  using Elf_Sym = typename ELFT::Sym;
  std::unique_ptr<ELFStub> DestStub = std::make_unique<ELFStub>();
  const ELFFile<ELFT> *ElfFile = ElfObj.getELFFile();
  // Fetch .dynamic table.
  Expected<Elf_Dyn_Range> DynTable = ElfFile->dynamicEntries();
  if (!DynTable) {
    return DynTable.takeError();
  }

  // Fetch program headers.
  Expected<Elf_Phdr_Range> PHdrs = ElfFile->program_headers();
  if (!PHdrs) {
    return PHdrs.takeError();
  }

  // Collect relevant .dynamic entries.
  DynamicEntries DynEnt;
  if (Error Err = populateDynamic<ELFT>(DynEnt, *DynTable))
    return std::move(Err);

    // Get pointer to in-memory location of .dynstr section.
  Expected<const uint8_t *> DynStrPtr =
      ElfFile->toMappedAddr(DynEnt.StrTabAddr);
  if (!DynStrPtr)
    return appendToError(DynStrPtr.takeError(),
                         "when locating .dynstr section contents");

  StringRef DynStr(reinterpret_cast<const char *>(DynStrPtr.get()),
                   DynEnt.StrSize);

  // Populate Arch from ELF header.
  DestStub->Arch = ElfFile->getHeader()->e_machine;

  // Populate SoName from .dynamic entries and dynamic string table.
  if (DynEnt.SONameOffset.hasValue()) {
    Expected<StringRef> NameOrErr =
        terminatedSubstr(DynStr, *DynEnt.SONameOffset);
    if (!NameOrErr) {
      return appendToError(NameOrErr.takeError(), "when reading DT_SONAME");
    }
    DestStub->SoName = *NameOrErr;
  }

  // Populate NeededLibs from .dynamic entries and dynamic string table.
  for (uint64_t NeededStrOffset : DynEnt.NeededLibNames) {
    Expected<StringRef> LibNameOrErr =
        terminatedSubstr(DynStr, NeededStrOffset);
    if (!LibNameOrErr) {
      return appendToError(LibNameOrErr.takeError(), "when reading DT_NEEDED");
    }
    DestStub->NeededLibs.push_back(*LibNameOrErr);
  }

  // Populate Symbols from .dynsym table and dynamic string table.
  Expected<uint64_t> SymCount = getNumSyms(DynEnt, *ElfFile);
  if (!SymCount)
    return SymCount.takeError();
  if (*SymCount > 0) {
    // Get pointer to in-memory location of .dynsym section.
    Expected<const uint8_t *> DynSymPtr =
        ElfFile->toMappedAddr(DynEnt.DynSymAddr);
    if (!DynSymPtr)
      return appendToError(DynSymPtr.takeError(),
                           "when locating .dynsym section contents");
    Elf_Sym_Range DynSyms =
        ArrayRef<Elf_Sym>(reinterpret_cast<const Elf_Sym *>(*DynSymPtr),
                          *SymCount);
    Error SymReadError = populateSymbols<ELFT>(*DestStub, DynSyms, DynStr);
    if (SymReadError)
      return appendToError(std::move(SymReadError),
                           "when reading dynamic symbols");
  }

  return std::move(DestStub);
}

Expected<std::unique_ptr<ELFStub>> readELFFile(MemoryBufferRef Buf) {
  Expected<std::unique_ptr<Binary>> BinOrErr = createBinary(Buf);
  if (!BinOrErr) {
    return BinOrErr.takeError();
  }

  Binary *Bin = BinOrErr->get();
  if (auto Obj = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
    return buildStub(*Obj);
  } else if (auto Obj = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
    return buildStub(*Obj);
  } else if (auto Obj = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
    return buildStub(*Obj);
  } else if (auto Obj = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
    return buildStub(*Obj);
  }

  return createStringError(errc::not_supported, "Unsupported binary format");
}

} // end namespace elfabi
} // end namespace llvm