reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
//===- TypeHashing.h ---------------------------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
#define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H

#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"

#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"

#include "llvm/Support/FormatProviders.h"

#include <type_traits>

namespace llvm {
namespace codeview {

/// A locally hashed type represents a straightforward hash code of a serialized
/// record.  The record is simply serialized, and then the bytes are hashed by
/// a standard algorithm.  This is sufficient for the case of de-duplicating
/// records within a single sequence of types, because if two records both have
/// a back-reference to the same type in the same stream, they will both have
/// the same numeric value for the TypeIndex of the back reference.
struct LocallyHashedType {
  hash_code Hash;
  ArrayRef<uint8_t> RecordData;

  /// Given a type, compute its local hash.
  static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData);

  /// Given a sequence of types, compute all of the local hashes.
  template <typename Range>
  static std::vector<LocallyHashedType> hashTypes(Range &&Records) {
    std::vector<LocallyHashedType> Hashes;
    Hashes.reserve(std::distance(std::begin(Records), std::end(Records)));
    for (const auto &R : Records)
      Hashes.push_back(hashType(R));

    return Hashes;
  }

  static std::vector<LocallyHashedType>
  hashTypeCollection(TypeCollection &Types) {
    std::vector<LocallyHashedType> Hashes;
    Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
      Hashes.push_back(hashType(Type.RecordData));
    });
    return Hashes;
  }
};

enum class GlobalTypeHashAlg : uint16_t {
  SHA1 = 0, // standard 20-byte SHA1 hash
  SHA1_8    // last 8-bytes of standard SHA1 hash
};

/// A globally hashed type represents a hash value that is sufficient to
/// uniquely identify a record across multiple type streams or type sequences.
/// This works by, for any given record A which references B, replacing the
/// TypeIndex that refers to B with a previously-computed global hash for B.  As
/// this is a recursive algorithm (e.g. the global hash of B also depends on the
/// global hashes of the types that B refers to), a global hash can uniquely
/// identify identify that A occurs in another stream that has a completely
/// different graph structure.  Although the hash itself is slower to compute,
/// probing is much faster with a globally hashed type, because the hash itself
/// is considered "as good as" the original type.  Since type records can be
/// quite large, this makes the equality comparison of the hash much faster than
/// equality comparison of a full record.
struct GloballyHashedType {
  GloballyHashedType() = default;
  GloballyHashedType(StringRef H)
      : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
  GloballyHashedType(ArrayRef<uint8_t> H) {
    assert(H.size() == 8);
    ::memcpy(Hash.data(), H.data(), 8);
  }
  std::array<uint8_t, 8> Hash;

  bool empty() const { return *(const uint64_t*)Hash.data() == 0; }

  /// Given a sequence of bytes representing a record, compute a global hash for
  /// this record.  Due to the nature of global hashes incorporating the hashes
  /// of referenced records, this function requires a list of types and ids
  /// that RecordData might reference, indexable by TypeIndex.
  static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData,
                                     ArrayRef<GloballyHashedType> PreviousTypes,
                                     ArrayRef<GloballyHashedType> PreviousIds);

  /// Given a sequence of bytes representing a record, compute a global hash for
  /// this record.  Due to the nature of global hashes incorporating the hashes
  /// of referenced records, this function requires a list of types and ids
  /// that RecordData might reference, indexable by TypeIndex.
  static GloballyHashedType hashType(CVType Type,
                                     ArrayRef<GloballyHashedType> PreviousTypes,
                                     ArrayRef<GloballyHashedType> PreviousIds) {
    return hashType(Type.RecordData, PreviousTypes, PreviousIds);
  }

  /// Given a sequence of combined type and ID records, compute global hashes
  /// for each of them, returning the results in a vector of hashed types.
  template <typename Range>
  static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
    std::vector<GloballyHashedType> Hashes;
    bool UnresolvedRecords = false;
    for (const auto &R : Records) {
      GloballyHashedType H = hashType(R, Hashes, Hashes);
      if (H.empty())
        UnresolvedRecords = true;
      Hashes.push_back(H);
    }

    // In some rare cases, there might be records with forward references in the
    // stream. Several passes might be needed to fully hash each record in the
    // Type stream. However this occurs on very small OBJs generated by MASM,
    // with a dozen records at most. Therefore this codepath isn't
    // time-critical, as it isn't taken in 99% of cases.
    while (UnresolvedRecords) {
      UnresolvedRecords = false;
      auto HashIt = Hashes.begin();
      for (const auto &R : Records) {
        if (HashIt->empty()) {
          GloballyHashedType H = hashType(R, Hashes, Hashes);
          if (H.empty())
            UnresolvedRecords = true;
          else
            *HashIt = H;
        }
        ++HashIt;
      }
    }

    return Hashes;
  }

  /// Given a sequence of combined type and ID records, compute global hashes
  /// for each of them, returning the results in a vector of hashed types.
  template <typename Range>
  static std::vector<GloballyHashedType>
  hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) {
    std::vector<GloballyHashedType> IdHashes;
    for (const auto &R : Records)
      IdHashes.push_back(hashType(R, TypeHashes, IdHashes));

    return IdHashes;
  }

  static std::vector<GloballyHashedType>
  hashTypeCollection(TypeCollection &Types) {
    std::vector<GloballyHashedType> Hashes;
    Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
      Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes));
    });
    return Hashes;
  }
};
#if defined(_MSC_VER)
// is_trivially_copyable is not available in older versions of libc++, but it is
// available in all supported versions of MSVC, so at least this gives us some
// coverage.
static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
              "GloballyHashedType must be trivially copyable so that we can "
              "reinterpret_cast arrays of hash data to arrays of "
              "GloballyHashedType");
#endif
} // namespace codeview

template <> struct DenseMapInfo<codeview::LocallyHashedType> {
  static codeview::LocallyHashedType Empty;
  static codeview::LocallyHashedType Tombstone;

  static codeview::LocallyHashedType getEmptyKey() { return Empty; }

  static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }

  static unsigned getHashValue(codeview::LocallyHashedType Val) {
    return Val.Hash;
  }

  static bool isEqual(codeview::LocallyHashedType LHS,
                      codeview::LocallyHashedType RHS) {
    if (LHS.Hash != RHS.Hash)
      return false;
    return LHS.RecordData == RHS.RecordData;
  }
};

template <> struct DenseMapInfo<codeview::GloballyHashedType> {
  static codeview::GloballyHashedType Empty;
  static codeview::GloballyHashedType Tombstone;

  static codeview::GloballyHashedType getEmptyKey() { return Empty; }

  static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }

  static unsigned getHashValue(codeview::GloballyHashedType Val) {
    return *reinterpret_cast<const unsigned *>(Val.Hash.data());
  }

  static bool isEqual(codeview::GloballyHashedType LHS,
                      codeview::GloballyHashedType RHS) {
    return LHS.Hash == RHS.Hash;
  }
};

template <> struct format_provider<codeview::LocallyHashedType> {
public:
  static void format(const codeview::LocallyHashedType &V,
                     llvm::raw_ostream &Stream, StringRef Style) {
    write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8);
  }
};

template <> struct format_provider<codeview::GloballyHashedType> {
public:
  static void format(const codeview::GloballyHashedType &V,
                     llvm::raw_ostream &Stream, StringRef Style) {
    for (uint8_t B : V.Hash) {
      write_hex(Stream, B, HexPrintStyle::Upper, 2);
    }
  }
};

} // namespace llvm

#endif