aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Collingbourne <peter@pcc.me.uk>2016-11-29 20:43:47 +0000
committerPeter Collingbourne <peter@pcc.me.uk>2016-11-29 20:43:47 +0000
commite6480e2add92580bb9016e26e3c35919849476a6 (patch)
treeca12a250f83de9868098129f7c623ff1767aaf10
parentc83371f0ebba44b9a11e3e694a4425a0a83f1170 (diff)
Bitcode: Introduce BitcodeWriter interface.
This interface allows clients to write multiple modules to a single bitcode file. Also introduce the llvm-cat utility which can be used to create a bitcode file containing multiple modules. Differential Revision: https://reviews.llvm.org/D26179 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288195 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Bitcode/BitcodeReader.h2
-rw-r--r--include/llvm/Bitcode/BitcodeWriter.h27
-rw-r--r--include/llvm/Bitcode/BitstreamWriter.h1
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp169
-rw-r--r--test/Bitcode/Inputs/multi-module.ll3
-rw-r--r--test/Bitcode/multi-module.ll39
-rw-r--r--test/CMakeLists.txt1
-rw-r--r--tools/LLVMBuild.txt1
-rw-r--r--tools/llvm-cat/CMakeLists.txt10
-rw-r--r--tools/llvm-cat/LLVMBuild.txt22
-rw-r--r--tools/llvm-cat/llvm-cat.cpp73
11 files changed, 254 insertions, 94 deletions
diff --git a/include/llvm/Bitcode/BitcodeReader.h b/include/llvm/Bitcode/BitcodeReader.h
index 2fd19ffdbab..30ae4abfa96 100644
--- a/include/llvm/Bitcode/BitcodeReader.h
+++ b/include/llvm/Bitcode/BitcodeReader.h
@@ -66,6 +66,8 @@ namespace llvm {
bool ShouldLazyLoadMetadata);
public:
+ ArrayRef<uint8_t> getBuffer() const { return Buffer; }
+
/// Read the bitcode module and prepare for lazy deserialization of function
/// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
Expected<std::unique_ptr<Module>>
diff --git a/include/llvm/Bitcode/BitcodeWriter.h b/include/llvm/Bitcode/BitcodeWriter.h
index 57955dfcef0..4f72f98bbf9 100644
--- a/include/llvm/Bitcode/BitcodeWriter.h
+++ b/include/llvm/Bitcode/BitcodeWriter.h
@@ -18,9 +18,36 @@
#include <string>
namespace llvm {
+ class BitstreamWriter;
class Module;
class raw_ostream;
+ class BitcodeWriter {
+ SmallVectorImpl<char> &Buffer;
+ std::unique_ptr<BitstreamWriter> Stream;
+
+ public:
+ /// Create a BitcodeWriter that writes to Buffer.
+ BitcodeWriter(SmallVectorImpl<char> &Buffer);
+
+ ~BitcodeWriter();
+
+ /// Write the specified module to the buffer specified at construction time.
+ ///
+ /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a
+ /// Value in \c M. These will be reconstructed exactly when \a M is
+ /// deserialized.
+ ///
+ /// If \c Index is supplied, the bitcode will contain the summary index
+ /// (currently for use in ThinLTO optimization).
+ ///
+ /// \p GenerateHash enables hashing the Module and including the hash in the
+ /// bitcode (currently for use in ThinLTO incremental build).
+ void writeModule(const Module *M, bool ShouldPreserveUseListOrder = false,
+ const ModuleSummaryIndex *Index = nullptr,
+ bool GenerateHash = false);
+ };
+
/// \brief Write the specified module to the specified raw output stream.
///
/// For streams where it matters, the given stream should be in "binary"
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 707b2105c16..b79e88d2c05 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -509,6 +509,7 @@ public:
void EnterBlockInfoBlock() {
EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, 2);
BlockInfoCurBID = ~0U;
+ BlockInfoRecords.clear();
}
private:
/// SwitchToBlockID - If we aren't already talking about the specified block
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index c62cd1f2042..c324100e1b7 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -65,36 +65,20 @@ enum {
};
/// Abstract class to manage the bitcode writing, subclassed for each bitcode
-/// file type. Owns the BitstreamWriter, and includes the main entry point for
-/// writing.
-class BitcodeWriter {
+/// file type.
+class BitcodeWriterBase {
protected:
- /// Pointer to the buffer allocated by caller for bitcode writing.
- const SmallVectorImpl<char> &Buffer;
-
- /// The stream created and owned by the BitodeWriter.
- BitstreamWriter Stream;
+ /// The stream created and owned by the client.
+ BitstreamWriter &Stream;
/// Saves the offset of the VSTOffset record that must eventually be
/// backpatched with the offset of the actual VST.
uint64_t VSTOffsetPlaceholder = 0;
public:
- /// Constructs a BitcodeWriter object, and initializes a BitstreamRecord,
- /// writing to the provided \p Buffer.
- BitcodeWriter(SmallVectorImpl<char> &Buffer)
- : Buffer(Buffer), Stream(Buffer) {}
-
- virtual ~BitcodeWriter() = default;
-
- /// Main entry point to write the bitcode file, which writes the bitcode
- /// header and will then invoke the virtual writeBlocks() method.
- void write();
-
-private:
- /// Derived classes must implement this to write the corresponding blocks for
- /// that bitcode file type.
- virtual void writeBlocks() = 0;
+ /// Constructs a BitcodeWriterBase object that writes to the provided
+ /// \p Stream.
+ BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {}
protected:
bool hasVSTOffsetPlaceholder() { return VSTOffsetPlaceholder != 0; }
@@ -103,7 +87,10 @@ protected:
};
/// Class to manage the bitcode writing for a module.
-class ModuleBitcodeWriter : public BitcodeWriter {
+class ModuleBitcodeWriter : public BitcodeWriterBase {
+ /// Pointer to the buffer allocated by caller for bitcode writing.
+ const SmallVectorImpl<char> &Buffer;
+
/// The Module to write to bitcode.
const Module &M;
@@ -116,8 +103,8 @@ class ModuleBitcodeWriter : public BitcodeWriter {
/// True if a module hash record should be written.
bool GenerateHash;
- /// The start bit of the module block, for use in generating a module hash
- uint64_t BitcodeStartBit = 0;
+ /// The start bit of the identification block.
+ uint64_t BitcodeStartBit;
/// Map that holds the correspondence between GUIDs in the summary index,
/// that came from indirect call profiles, and a value id generated by this
@@ -131,16 +118,11 @@ public:
/// Constructs a ModuleBitcodeWriter object for the given Module,
/// writing to the provided \p Buffer.
ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
- bool ShouldPreserveUseListOrder,
+ BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index, bool GenerateHash)
- : BitcodeWriter(Buffer), M(*M), VE(*M, ShouldPreserveUseListOrder),
- Index(Index), GenerateHash(GenerateHash) {
- // Save the start bit of the actual bitcode, in case there is space
- // saved at the start for the darwin header above. The reader stream
- // will start at the bitcode, and we need the offset of the VST
- // to line up.
- BitcodeStartBit = Stream.GetCurrentBitNo();
-
+ : BitcodeWriterBase(Stream), Buffer(Buffer), M(*M),
+ VE(*M, ShouldPreserveUseListOrder), Index(Index),
+ GenerateHash(GenerateHash), BitcodeStartBit(Stream.GetCurrentBitNo()) {
// Assign ValueIds to any callee values in the index that came from
// indirect call profiles and were recorded as a GUID not a Value*
// (which would have been assigned an ID by the ValueEnumerator).
@@ -162,21 +144,12 @@ public:
assignValueId(CallEdge.first.getGUID());
}
-private:
- /// Main entry point for writing a module to bitcode, invoked by
- /// BitcodeWriter::write() after it writes the header.
- void writeBlocks() override;
-
- /// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
- /// current llvm version, and a record for the epoch number.
- void writeIdentificationBlock();
-
/// Emit the current module to the bitstream.
- void writeModule();
+ void write();
+private:
uint64_t bitcodeStartBit() { return BitcodeStartBit; }
- void writeStringRecord(unsigned Code, StringRef Str, unsigned AbbrevToUse);
void writeAttributeGroupTable();
void writeAttributeTable();
void writeTypeTable();
@@ -310,7 +283,7 @@ private:
};
/// Class to manage the bitcode writing for a combined index.
-class IndexBitcodeWriter : public BitcodeWriter {
+class IndexBitcodeWriter : public BitcodeWriterBase {
/// The combined index to write to bitcode.
const ModuleSummaryIndex &Index;
@@ -329,11 +302,10 @@ public:
/// Constructs a IndexBitcodeWriter object for the given combined index,
/// writing to the provided \p Buffer. When writing a subset of the index
/// for a distributed backend, provide a \p ModuleToSummariesForIndex map.
- IndexBitcodeWriter(SmallVectorImpl<char> &Buffer,
- const ModuleSummaryIndex &Index,
+ IndexBitcodeWriter(BitstreamWriter &Stream, const ModuleSummaryIndex &Index,
const std::map<std::string, GVSummaryMapTy>
*ModuleToSummariesForIndex = nullptr)
- : BitcodeWriter(Buffer), Index(Index),
+ : BitcodeWriterBase(Stream), Index(Index),
ModuleToSummariesForIndex(ModuleToSummariesForIndex) {
// Assign unique value ids to all summaries to be written, for use
// in writing out the call graph edges. Save the mapping from GUID
@@ -480,11 +452,10 @@ public:
/// Obtain the end iterator over the summaries to be written.
iterator end() { return iterator(*this, /*IsAtEnd=*/true); }
-private:
- /// Main entry point for writing a combined index to bitcode, invoked by
- /// BitcodeWriter::write() after it writes the header.
- void writeBlocks() override;
+ /// Main entry point for writing a combined index to bitcode.
+ void write();
+private:
void writeIndex();
void writeModStrings();
void writeCombinedValueSymbolTable();
@@ -597,8 +568,8 @@ static unsigned getEncodedSynchScope(SynchronizationScope SynchScope) {
llvm_unreachable("Invalid synch scope");
}
-void ModuleBitcodeWriter::writeStringRecord(unsigned Code, StringRef Str,
- unsigned AbbrevToUse) {
+static void writeStringRecord(BitstreamWriter &Stream, unsigned Code,
+ StringRef Str, unsigned AbbrevToUse) {
SmallVector<unsigned, 64> Vals;
// Code: [strchar x N]
@@ -922,7 +893,7 @@ void ModuleBitcodeWriter::writeTypeTable() {
// Emit the name if it is present.
if (!ST->getName().empty())
- writeStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
+ writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
StructNameAbbrev);
}
break;
@@ -1073,7 +1044,7 @@ void ModuleBitcodeWriter::writeComdats() {
/// Write a record that will eventually hold the word offset of the
/// module-level VST. For now the offset is 0, which will be backpatched
/// after the real VST is written. Saves the bit offset to backpatch.
-void BitcodeWriter::writeValueSymbolTableForwardDecl() {
+void BitcodeWriterBase::writeValueSymbolTableForwardDecl() {
// Write a placeholder value in for the offset of the real VST,
// which is written after the function blocks so that it can include
// the offset of each function. The placeholder offset will be
@@ -1120,13 +1091,13 @@ static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
void ModuleBitcodeWriter::writeModuleInfo() {
// Emit various pieces of data attached to a module.
if (!M.getTargetTriple().empty())
- writeStringRecord(bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(),
+ writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(),
0 /*TODO*/);
const std::string &DL = M.getDataLayoutStr();
if (!DL.empty())
- writeStringRecord(bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+ writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
if (!M.getModuleInlineAsm().empty())
- writeStringRecord(bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
+ writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
0 /*TODO*/);
// Emit information about sections and GC, computing how many there are. Also
@@ -1142,7 +1113,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Give section names unique ID's.
unsigned &Entry = SectionMap[GV.getSection()];
if (!Entry) {
- writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV.getSection(),
+ writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, GV.getSection(),
0 /*TODO*/);
Entry = SectionMap.size();
}
@@ -1154,7 +1125,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Give section names unique ID's.
unsigned &Entry = SectionMap[F.getSection()];
if (!Entry) {
- writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, F.getSection(),
+ writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, F.getSection(),
0 /*TODO*/);
Entry = SectionMap.size();
}
@@ -1163,7 +1134,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
// Same for GC names.
unsigned &Entry = GCMap[F.getGC()];
if (!Entry) {
- writeStringRecord(bitc::MODULE_CODE_GCNAME, F.getGC(), 0 /*TODO*/);
+ writeStringRecord(Stream, bitc::MODULE_CODE_GCNAME, F.getGC(),
+ 0 /*TODO*/);
Entry = GCMap.size();
}
}
@@ -2761,11 +2733,13 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
// Get the offset of the VST we are writing, and backpatch it into
// the VST forward declaration record.
uint64_t VSTOffset = Stream.GetCurrentBitNo();
- // The BitcodeStartBit was the stream offset of the actual bitcode
- // (e.g. excluding any initial darwin header).
+ // The BitcodeStartBit was the stream offset of the identification block.
VSTOffset -= bitcodeStartBit();
assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
- Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32);
+ // Note that we add 1 here because the offset is relative to one word
+ // before the start of the identification block, which was historically
+ // always the start of the regular bitcode header.
+ Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1);
}
Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
@@ -2853,7 +2827,10 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
// actual bitcode written to the stream).
uint64_t BitcodeIndex = (*FunctionToBitcodeIndex)[F] - bitcodeStartBit();
assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
- NameVals.push_back(BitcodeIndex / 32);
+ // Note that we add 1 here because the offset is relative to one word
+ // before the start of the identification block, which was historically
+ // always the start of the regular bitcode header.
+ NameVals.push_back(BitcodeIndex / 32 + 1);
Code = bitc::VST_CODE_FNENTRY;
AbbrevToUse = FnEntry8BitAbbrev;
@@ -3617,7 +3594,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Stream.ExitBlock();
}
-void ModuleBitcodeWriter::writeIdentificationBlock() {
+/// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
+/// current llvm version, and a record for the epoch number.
+void writeIdentificationBlock(BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);
// Write the "user readable" string identifying the bitcode producer
@@ -3626,7 +3605,7 @@ void ModuleBitcodeWriter::writeIdentificationBlock() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
auto StringAbbrev = Stream.EmitAbbrev(Abbv);
- writeStringRecord(bitc::IDENTIFICATION_CODE_STRING,
+ writeStringRecord(Stream, bitc::IDENTIFICATION_CODE_STRING,
"LLVM" LLVM_VERSION_STRING, StringAbbrev);
// Write the epoch version
@@ -3655,24 +3634,9 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
}
-void BitcodeWriter::write() {
- // Emit the file header first.
- writeBitcodeHeader();
-
- writeBlocks();
-}
-
-void ModuleBitcodeWriter::writeBlocks() {
- writeIdentificationBlock();
- writeModule();
-}
-
-void IndexBitcodeWriter::writeBlocks() {
- // Index contains only a single outer (module) block.
- writeIndex();
-}
+void ModuleBitcodeWriter::write() {
+ writeIdentificationBlock(Stream);
-void ModuleBitcodeWriter::writeModule() {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
size_t BlockStartPos = Buffer.size();
@@ -3801,7 +3765,7 @@ static void emitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
}
/// Helper to write the header common to all bitcode files.
-void BitcodeWriter::writeBitcodeHeader() {
+static void writeBitcodeHeader(BitstreamWriter &Stream) {
// Emit the file header.
Stream.Emit((unsigned)'B', 8);
Stream.Emit((unsigned)'C', 8);
@@ -3811,6 +3775,22 @@ void BitcodeWriter::writeBitcodeHeader() {
Stream.Emit(0xD, 4);
}
+BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer)
+ : Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) {
+ writeBitcodeHeader(*Stream);
+}
+
+BitcodeWriter::~BitcodeWriter() = default;
+
+void BitcodeWriter::writeModule(const Module *M,
+ bool ShouldPreserveUseListOrder,
+ const ModuleSummaryIndex *Index,
+ bool GenerateHash) {
+ ModuleBitcodeWriter ModuleWriter(
+ M, Buffer, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash);
+ ModuleWriter.write();
+}
+
/// WriteBitcodeToFile - Write the specified module to the specified output
/// stream.
void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
@@ -3826,10 +3806,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0);
- // Emit the module into the buffer.
- ModuleBitcodeWriter ModuleWriter(M, Buffer, ShouldPreserveUseListOrder, Index,
- GenerateHash);
- ModuleWriter.write();
+ BitcodeWriter Writer(Buffer);
+ Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash);
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
emitDarwinBCHeaderAndTrailer(Buffer, TT);
@@ -3838,7 +3816,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
Out.write((char*)&Buffer.front(), Buffer.size());
}
-void IndexBitcodeWriter::writeIndex() {
+void IndexBitcodeWriter::write() {
Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
SmallVector<unsigned, 1> Vals;
@@ -3872,7 +3850,10 @@ void llvm::WriteIndexToFile(
SmallVector<char, 0> Buffer;
Buffer.reserve(256 * 1024);
- IndexBitcodeWriter IndexWriter(Buffer, Index, ModuleToSummariesForIndex);
+ BitstreamWriter Stream(Buffer);
+ writeBitcodeHeader(Stream);
+
+ IndexBitcodeWriter IndexWriter(Stream, Index, ModuleToSummariesForIndex);
IndexWriter.write();
Out.write((char *)&Buffer.front(), Buffer.size());
diff --git a/test/Bitcode/Inputs/multi-module.ll b/test/Bitcode/Inputs/multi-module.ll
new file mode 100644
index 00000000000..e4e9b82be64
--- /dev/null
+++ b/test/Bitcode/Inputs/multi-module.ll
@@ -0,0 +1,3 @@
+define void @f2() {
+ ret void
+}
diff --git a/test/Bitcode/multi-module.ll b/test/Bitcode/multi-module.ll
new file mode 100644
index 00000000000..a6c25c7cdbe
--- /dev/null
+++ b/test/Bitcode/multi-module.ll
@@ -0,0 +1,39 @@
+; RUN: llvm-cat -o %t %s %S/Inputs/multi-module.ll
+; RUN: not llvm-dis -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s
+; ERROR: Expected a single module
+
+; FIXME: Introduce a tool for extracting modules from bitcode and use it here.
+; For now we can at least check that the bitcode contains multiple modules.
+; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s
+
+; RUN: llvm-as -o %t1 %s
+; RUN: llvm-as -o %t2 %S/Inputs/multi-module.ll
+; RUN: llvm-cat -o %t %t1 %t2
+; RUN: not llvm-dis -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s
+; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s
+
+; RUN: llvm-cat -b -o %t %t1 %t2
+; RUN: not llvm-dis -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s
+; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s
+
+; RUN: llvm-cat -b -o %t3 %t %t
+; RUN: not llvm-dis -o - %t3 2>&1 | FileCheck --check-prefix=ERROR %s
+; RUN: llvm-bcanalyzer -dump %t3 | FileCheck --check-prefix=BCA4 %s
+
+; BCA: <IDENTIFICATION_BLOCK
+; BCA: <MODULE_BLOCK
+; BCA: <IDENTIFICATION_BLOCK
+; BCA: <MODULE_BLOCK
+
+; BCA4: <IDENTIFICATION_BLOCK
+; BCA4: <MODULE_BLOCK
+; BCA4: <IDENTIFICATION_BLOCK
+; BCA4: <MODULE_BLOCK
+; BCA4: <IDENTIFICATION_BLOCK
+; BCA4: <MODULE_BLOCK
+; BCA4: <IDENTIFICATION_BLOCK
+; BCA4: <MODULE_BLOCK
+
+define void @f1() {
+ ret void
+}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c2ba73a5a67..43e5b823d8d 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -32,6 +32,7 @@ set(LLVM_TEST_DEPENDS
llvm-as
llvm-bcanalyzer
llvm-c-test
+ llvm-cat
llvm-cxxfilt
llvm-config
llvm-cov
diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt
index d4b01477185..d8cf22e5051 100644
--- a/tools/LLVMBuild.txt
+++ b/tools/LLVMBuild.txt
@@ -24,6 +24,7 @@ subdirectories =
llvm-ar
llvm-as
llvm-bcanalyzer
+ llvm-cat
llvm-cov
llvm-diff
llvm-dis
diff --git a/tools/llvm-cat/CMakeLists.txt b/tools/llvm-cat/CMakeLists.txt
new file mode 100644
index 00000000000..3d503c351e8
--- /dev/null
+++ b/tools/llvm-cat/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(LLVM_LINK_COMPONENTS
+ IRReader
+ BitWriter
+ Core
+ Support
+ )
+
+add_llvm_tool(llvm-cat
+ llvm-cat.cpp
+ )
diff --git a/tools/llvm-cat/LLVMBuild.txt b/tools/llvm-cat/LLVMBuild.txt
new file mode 100644
index 00000000000..c3e0c6feeae
--- /dev/null
+++ b/tools/llvm-cat/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-cat/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-cat
+parent = Tools
+required_libraries = AsmParser BitWriter
diff --git a/tools/llvm-cat/llvm-cat.cpp b/tools/llvm-cat/llvm-cat.cpp
new file mode 100644
index 00000000000..439709311d4
--- /dev/null
+++ b/tools/llvm-cat/llvm-cat.cpp
@@ -0,0 +1,73 @@
+//===-- llvm-cat.cpp - LLVM module concatenation utility ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program is for testing features that rely on multi-module bitcode files.
+// It takes a list of input modules and uses them to create a multi-module
+// bitcode file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ BinaryCat("b", cl::desc("Whether to perform binary concatenation"));
+
+static cl::opt<std::string> OutputFilename("o", cl::Required,
+ cl::desc("Output filename"),
+ cl::value_desc("filename"));
+
+static cl::list<std::string> InputFilenames(cl::Positional, cl::OneOrMore,
+ cl::desc("<input files>"));
+
+int main(int argc, char **argv) {
+ cl::ParseCommandLineOptions(argc, argv, "Module concatenation");
+
+ ExitOnError ExitOnErr("llvm-cat: ");
+ LLVMContext Context;
+
+ SmallVector<char, 0> Buffer;
+ BitcodeWriter Writer(Buffer);
+ if (BinaryCat) {
+ for (std::string InputFilename : InputFilenames) {
+ std::unique_ptr<MemoryBuffer> MB = ExitOnErr(
+ errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename)));
+ std::vector<BitcodeModule> Mods = ExitOnErr(getBitcodeModuleList(*MB));
+ for (auto &BitcodeMod : Mods)
+ Buffer.insert(Buffer.end(), BitcodeMod.getBuffer().begin(),
+ BitcodeMod.getBuffer().end());
+ }
+ } else {
+ for (std::string InputFilename : InputFilenames) {
+ SMDiagnostic Err;
+ std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);
+ if (!M) {
+ Err.print(argv[0], errs());
+ return 1;
+ }
+ Writer.writeModule(M.get());
+ }
+ }
+
+ std::error_code EC;
+ raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::F_None);
+ if (EC) {
+ llvm::errs() << argv[0] << ": cannot open " << OutputFilename
+ << " for writing: " << EC.message();
+ return 1;
+ }
+
+ OS.write(Buffer.data(), Buffer.size());
+ return 0;
+}