1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
| //===- TypeHashing.h ---------------------------------------------*- C++-*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
#define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/TypeCollection.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/Support/FormatProviders.h"
#include <type_traits>
namespace llvm {
namespace codeview {
/// A locally hashed type represents a straightforward hash code of a serialized
/// record. The record is simply serialized, and then the bytes are hashed by
/// a standard algorithm. This is sufficient for the case of de-duplicating
/// records within a single sequence of types, because if two records both have
/// a back-reference to the same type in the same stream, they will both have
/// the same numeric value for the TypeIndex of the back reference.
struct LocallyHashedType {
hash_code Hash;
ArrayRef<uint8_t> RecordData;
/// Given a type, compute its local hash.
static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData);
/// Given a sequence of types, compute all of the local hashes.
template <typename Range>
static std::vector<LocallyHashedType> hashTypes(Range &&Records) {
std::vector<LocallyHashedType> Hashes;
Hashes.reserve(std::distance(std::begin(Records), std::end(Records)));
for (const auto &R : Records)
Hashes.push_back(hashType(R));
return Hashes;
}
static std::vector<LocallyHashedType>
hashTypeCollection(TypeCollection &Types) {
std::vector<LocallyHashedType> Hashes;
Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
Hashes.push_back(hashType(Type.RecordData));
});
return Hashes;
}
};
enum class GlobalTypeHashAlg : uint16_t {
SHA1 = 0, // standard 20-byte SHA1 hash
SHA1_8 // last 8-bytes of standard SHA1 hash
};
/// A globally hashed type represents a hash value that is sufficient to
/// uniquely identify a record across multiple type streams or type sequences.
/// This works by, for any given record A which references B, replacing the
/// TypeIndex that refers to B with a previously-computed global hash for B. As
/// this is a recursive algorithm (e.g. the global hash of B also depends on the
/// global hashes of the types that B refers to), a global hash can uniquely
/// identify identify that A occurs in another stream that has a completely
/// different graph structure. Although the hash itself is slower to compute,
/// probing is much faster with a globally hashed type, because the hash itself
/// is considered "as good as" the original type. Since type records can be
/// quite large, this makes the equality comparison of the hash much faster than
/// equality comparison of a full record.
struct GloballyHashedType {
GloballyHashedType() = default;
GloballyHashedType(StringRef H)
: GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
GloballyHashedType(ArrayRef<uint8_t> H) {
assert(H.size() == 8);
::memcpy(Hash.data(), H.data(), 8);
}
std::array<uint8_t, 8> Hash;
bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
/// Given a sequence of bytes representing a record, compute a global hash for
/// this record. Due to the nature of global hashes incorporating the hashes
/// of referenced records, this function requires a list of types and ids
/// that RecordData might reference, indexable by TypeIndex.
static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData,
ArrayRef<GloballyHashedType> PreviousTypes,
ArrayRef<GloballyHashedType> PreviousIds);
/// Given a sequence of bytes representing a record, compute a global hash for
/// this record. Due to the nature of global hashes incorporating the hashes
/// of referenced records, this function requires a list of types and ids
/// that RecordData might reference, indexable by TypeIndex.
static GloballyHashedType hashType(CVType Type,
ArrayRef<GloballyHashedType> PreviousTypes,
ArrayRef<GloballyHashedType> PreviousIds) {
return hashType(Type.RecordData, PreviousTypes, PreviousIds);
}
/// Given a sequence of combined type and ID records, compute global hashes
/// for each of them, returning the results in a vector of hashed types.
template <typename Range>
static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
std::vector<GloballyHashedType> Hashes;
bool UnresolvedRecords = false;
for (const auto &R : Records) {
GloballyHashedType H = hashType(R, Hashes, Hashes);
if (H.empty())
UnresolvedRecords = true;
Hashes.push_back(H);
}
// In some rare cases, there might be records with forward references in the
// stream. Several passes might be needed to fully hash each record in the
// Type stream. However this occurs on very small OBJs generated by MASM,
// with a dozen records at most. Therefore this codepath isn't
// time-critical, as it isn't taken in 99% of cases.
while (UnresolvedRecords) {
UnresolvedRecords = false;
auto HashIt = Hashes.begin();
for (const auto &R : Records) {
if (HashIt->empty()) {
GloballyHashedType H = hashType(R, Hashes, Hashes);
if (H.empty())
UnresolvedRecords = true;
else
*HashIt = H;
}
++HashIt;
}
}
return Hashes;
}
/// Given a sequence of combined type and ID records, compute global hashes
/// for each of them, returning the results in a vector of hashed types.
template <typename Range>
static std::vector<GloballyHashedType>
hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) {
std::vector<GloballyHashedType> IdHashes;
for (const auto &R : Records)
IdHashes.push_back(hashType(R, TypeHashes, IdHashes));
return IdHashes;
}
static std::vector<GloballyHashedType>
hashTypeCollection(TypeCollection &Types) {
std::vector<GloballyHashedType> Hashes;
Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes));
});
return Hashes;
}
};
#if defined(_MSC_VER)
// is_trivially_copyable is not available in older versions of libc++, but it is
// available in all supported versions of MSVC, so at least this gives us some
// coverage.
static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
"GloballyHashedType must be trivially copyable so that we can "
"reinterpret_cast arrays of hash data to arrays of "
"GloballyHashedType");
#endif
} // namespace codeview
template <> struct DenseMapInfo<codeview::LocallyHashedType> {
static codeview::LocallyHashedType Empty;
static codeview::LocallyHashedType Tombstone;
static codeview::LocallyHashedType getEmptyKey() { return Empty; }
static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }
static unsigned getHashValue(codeview::LocallyHashedType Val) {
return Val.Hash;
}
static bool isEqual(codeview::LocallyHashedType LHS,
codeview::LocallyHashedType RHS) {
if (LHS.Hash != RHS.Hash)
return false;
return LHS.RecordData == RHS.RecordData;
}
};
template <> struct DenseMapInfo<codeview::GloballyHashedType> {
static codeview::GloballyHashedType Empty;
static codeview::GloballyHashedType Tombstone;
static codeview::GloballyHashedType getEmptyKey() { return Empty; }
static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }
static unsigned getHashValue(codeview::GloballyHashedType Val) {
return *reinterpret_cast<const unsigned *>(Val.Hash.data());
}
static bool isEqual(codeview::GloballyHashedType LHS,
codeview::GloballyHashedType RHS) {
return LHS.Hash == RHS.Hash;
}
};
template <> struct format_provider<codeview::LocallyHashedType> {
public:
static void format(const codeview::LocallyHashedType &V,
llvm::raw_ostream &Stream, StringRef Style) {
write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8);
}
};
template <> struct format_provider<codeview::GloballyHashedType> {
public:
static void format(const codeview::GloballyHashedType &V,
llvm::raw_ostream &Stream, StringRef Style) {
for (uint8_t B : V.Hash) {
write_hex(Stream, B, HexPrintStyle::Upper, 2);
}
}
};
} // namespace llvm
#endif
|