Module 8: Full Chain, Writing BOFs & Security
End-to-end loader flow, writing production BOFs, compilation, and detection considerations.
Why This Module?
This final module ties everything together. We trace the complete execution flow from reading the COFF file to calling go() and retrieving output. Then we cover practical BOF authoring, compilation for both MSVC and MinGW, security/OPSEC considerations for both the loader and BOFs, and how COFFLoader compares to alternative loaders like bof-launcher by The-Z-Labs.
The Complete RunCOFF Flow
Here is the entire execution pipeline in one view, referencing every module:
TEXTRunCOFF(functionname="go", coff_data, filesize, argumentdata, argumentSize)
|
+-- [Module 2] Parse COFF header
| coff_header = (coff_file_header_t*)coff_data
| Validate: Machine == 0x8664, SizeOfOptionalHeader == 0
|
+-- [Module 2] Locate tables
| sections = coff_data + sizeof(coff_file_header_t)
| symbols = coff_data + PointerToSymbolTable
| string_table = symbols + (NumberOfSymbols * 18)
|
+-- [Module 4] Load sections
| for each section i:
| sectionMapping[i] = VirtualAlloc(NULL, SizeOfRawData, RWX)
| memcpy(sectionMapping[i], coff_data + PointerToRawData, SizeOfRawData)
|
+-- [Module 4] Allocate function pointer table
| relocationCount = sum of all sections' NumberOfRelocations
| functionMapping = VirtualAlloc(NULL, relocationCount * 8, RWX)
|
+-- [Module 7] Populate InternalFunctions[30]
| InternalFunctions[0] = {"BeaconDataParse", &BeaconDataParse}
| InternalFunctions[1] = {"BeaconDataInt", &BeaconDataInt}
| ... (all 24 entries)
|
+-- [Module 5+6] Process relocations (per section)
| for each section secIdx:
| for each relocation relIdx:
| symIdx = relocs[relIdx].SymbolTableIndex
| symName = get_symbol_name(symbols[symIdx])
| |
| +-- [Module 5] Resolve symbol
| | if defined(symIdx): addr = sectionMapping[sec-1] + Value
| | elif __imp_: addr = functionMapping slot (filled by process_symbol)
| | else: addr = process_symbol(symName) direct
| |
| +-- [Module 6] Apply relocation fixup
| ADDR64: *(uint64_t*)fixup = symbolAddr
| REL32: *(int32_t*)fixup = symbolAddr - (fixup + 4)
| REL32_N: *(int32_t*)fixup = symbolAddr - (fixup + 4 + N)
| ADDR32NB: *(uint32_t*)fixup = symbolAddr - imageBase
|
+-- Find entry point
| Scan symbol table for "go" (x64) or "_go" (x86)
| entryAddr = sectionMapping[sym.SectionNumber - 1] + sym.Value
|
+-- Call the BOF
| typedef void (*entry_t)(char*, int);
| entry_t entry = (entry_t)entryAddr;
| entry(argumentdata, argumentSize);
|
+-- Retrieve output
| char* output = BeaconGetOutputData(&outsize);
|
+-- Cleanup
for each section: VirtualFree(sectionMapping[i])
VirtualFree(functionMapping)
free(sectionMapping)
Entry Point Discovery
COFFLoader finds the entry function by scanning the symbol table for a symbol whose name matches the requested function name. On x64, it looks for the exact name (e.g., go). On x86, the C calling convention prepends an underscore, so it looks for _go:
C// Find the entry point symbol
void* entryPoint = NULL;
for (int i = 0; i < coff_header->NumberOfSymbols; i++) {
char* name = get_symbol_name(&symbols[i], string_table);
#ifdef _WIN64
if (strcmp(name, functionname) == 0) {
#else
// x86: prepend underscore
char decorated[256];
snprintf(decorated, sizeof(decorated), "_%s", functionname);
if (strcmp(name, decorated) == 0) {
#endif
int secIdx = symbols[i].SectionNumber - 1;
entryPoint = sectionMapping[secIdx] + symbols[i].Value;
break;
}
// Skip auxiliary symbols
i += symbols[i].NumberOfAuxSymbols;
}
// Cast and call
if (entryPoint) {
typedef void (*entry_fn)(char*, int);
entry_fn go = (entry_fn)entryPoint;
go(argumentdata, argumentSize);
}
Writing a Complete BOF
Here is a practical BOF that demonstrates proper API usage, error handling, and DLL import conventions:
C// whoami_bof.c -- BOF that displays current user and privilege info
#include <windows.h>
#include "beacon.h"
// Declare DLL imports using LIBRARY$Function convention
DECLSPEC_IMPORT BOOL WINAPI ADVAPI32$OpenProcessToken(HANDLE, DWORD, PHANDLE);
DECLSPEC_IMPORT BOOL WINAPI ADVAPI32$GetTokenInformation(HANDLE, TOKEN_INFORMATION_CLASS,
LPVOID, DWORD, PDWORD);
DECLSPEC_IMPORT BOOL WINAPI ADVAPI32$LookupAccountSidA(LPCSTR, PSID, LPSTR, LPDWORD,
LPSTR, LPDWORD, PSID_NAME_USE);
DECLSPEC_IMPORT HANDLE WINAPI KERNEL32$GetCurrentProcess(void);
DECLSPEC_IMPORT BOOL WINAPI KERNEL32$CloseHandle(HANDLE);
DECLSPEC_IMPORT DWORD WINAPI KERNEL32$GetCurrentProcessId(void);
DECLSPEC_IMPORT DWORD WINAPI KERNEL32$GetLastError(void);
void go(char* args, int len) {
HANDLE hToken = NULL;
DWORD pid = KERNEL32$GetCurrentProcessId();
BeaconPrintf(CALLBACK_OUTPUT, "[*] PID: %d\n", pid);
// Open our process token
if (!ADVAPI32$OpenProcessToken(
KERNEL32$GetCurrentProcess(),
TOKEN_QUERY,
&hToken))
{
BeaconPrintf(CALLBACK_ERROR, "[!] OpenProcessToken failed: %d\n",
KERNEL32$GetLastError());
return;
}
// Get the token user
BYTE tokenInfo[256];
DWORD returnLength = 0;
if (!ADVAPI32$GetTokenInformation(
hToken, TokenUser, tokenInfo, sizeof(tokenInfo), &returnLength))
{
BeaconPrintf(CALLBACK_ERROR, "[!] GetTokenInformation failed: %d\n",
KERNEL32$GetLastError());
KERNEL32$CloseHandle(hToken);
return;
}
// Look up the account name from the SID
TOKEN_USER* pUser = (TOKEN_USER*)tokenInfo;
char username[128] = {0};
char domain[128] = {0};
DWORD userLen = sizeof(username);
DWORD domainLen = sizeof(domain);
SID_NAME_USE sidType;
if (ADVAPI32$LookupAccountSidA(
NULL, pUser->User.Sid,
username, &userLen,
domain, &domainLen,
&sidType))
{
BeaconPrintf(CALLBACK_OUTPUT, "[+] User: %s\\%s\n", domain, username);
}
else {
BeaconPrintf(CALLBACK_ERROR, "[!] LookupAccountSid failed: %d\n",
KERNEL32$GetLastError());
}
KERNEL32$CloseHandle(hToken);
}
Compilation Guide
BOFs must be compiled to object files (not linked) and must avoid CRT dependencies:
BASH# ============================
# MinGW Cross-Compilation (Linux -> Windows x64)
# ============================
x86_64-w64-mingw32-gcc -c whoami_bof.c -o whoami_bof.o
# For x86 BOFs:
i686-w64-mingw32-gcc -c whoami_bof.c -o whoami_bof_x86.o
# ============================
# MSVC (Windows, x64)
# ============================
cl.exe /c /GS- /O2 whoami_bof.c /Fo whoami_bof.obj
# Flag explanations:
# /c - compile only, do not link
# /GS- - disable stack buffer security checks (no CRT cookie)
# /O2 - optimize for speed (optional but recommended)
# ============================
# Running with COFFLoader
# ============================
COFFLoader.exe go whoami_bof.o
# With arguments (hex-encoded):
COFFLoader.exe go enumerate_bof.o 0c0000000800000068006f0073007400
Critical Compilation Rules
| Rule | Reason |
|---|---|
Always use -c / /c | Produces an object file, not an executable. Linking would create a PE with an import table. |
Use /GS- with MSVC | Disables __security_check_cookie. The CRT initializes this cookie; BOFs have no CRT. |
| Never use CRT functions | No printf, malloc, free, strlen, etc. Use Beacon API or declare Windows API imports. |
| No global constructors | C++ static initializers require CRT. Use plain C only. |
| No static linking | Do not link any .lib files. All external references must use LIBRARY$ convention. |
Security Considerations
Both the COFFLoader itself and BOFs have OPSEC characteristics that defenders can detect:
COFFLoader Detection Indicators
| Indicator | Detail | Mitigation |
|---|---|---|
| RWX Memory | VirtualAlloc with PAGE_EXECUTE_READWRITE for every section | Allocate RW, apply relocations, then VirtualProtect to proper permissions |
| LoadLibraryA calls | Loading DLLs during symbol resolution (KERNEL32, ADVAPI32, NTDLL, etc.) | Many of these DLLs are already loaded; use GetModuleHandle first |
| GetProcAddress chains | Rapid sequential GetProcAddress calls during relocation processing | Use manual PE parsing (export table walking) instead |
| Unbacked executable memory | Code executing from VirtualAlloc'd regions (no backing file on disk) | Map a file-backed section or use module stomping |
| No exception handler registration | .pdata/.xdata are loaded but not registered with RtlAddFunctionTable | Call RtlAddFunctionTable after loading for proper SEH support |
BOF-Specific OPSEC
BOF OPSEC Checklist
1. Clean up memory: BOFs should zero sensitive buffers before returning. The loader will VirtualFree the sections, but sensitive data could remain in unfreed heap allocations.
2. Handle errors gracefully: A crash in the BOF crashes the entire host process. Always check return values and use __try/__except if available.
3. Minimize API calls: Every LoadLibraryA and GetProcAddress is logged by ETW. Prefer DLLs already loaded in the process.
4. Avoid AMSI-triggering strings: String literals in .rdata can be scanned by AMSI if the process has AMSI loaded. Encrypt or obfuscate sensitive strings.
5. Keep BOFs small: The smaller the memory allocation, the less likely it is to attract attention from memory scanners.
COFFLoader vs. bof-launcher (The-Z-Labs)
bof-launcher is an alternative COFF loader written in Zig with several improvements over COFFLoader:
| Feature | COFFLoader (TrustedSec) | bof-launcher (The-Z-Labs) |
|---|---|---|
| Language | C | Zig (with C API) |
| Memory permissions | RWX for all sections | Proper per-section permissions (RX, RW, R) |
| Cross-platform | Windows only | Windows + Linux (ELF COFF support) |
| Exception handling | Not registered | Registered via RtlAddFunctionTable |
| Contiguous allocation | Separate VirtualAlloc per section | Single contiguous block, partitioned |
| Beacon API | Full compatibility layer | Extended API with additional functions |
| Thread safety | Global output buffer (not thread-safe) | Per-invocation context |
| API resolution | LoadLibraryA + GetProcAddress | Manual PE export table parsing |
When to Use Which
COFFLoader is ideal for learning (its source is straightforward and well-commented), for quick BOF testing from the command line, and for integration into C-based tools. bof-launcher is better suited for production C2 frameworks where OPSEC matters: proper memory permissions, registered exception handlers, contiguous section layout, and cross-platform support. Many frameworks (Sliver, Havoc, Mythic agents) have their own COFF loaders inspired by both projects.
Building COFFLoader from Source
BASH# Clone the repository
git clone https://github.com/trustedsec/COFFLoader.git
cd COFFLoader
# Build with MSVC (Visual Studio Developer Command Prompt)
cl.exe /Fe:COFFLoader.exe COFFLoader.c beacon_compatibility.c
# Build with MinGW (on Linux or Windows)
x86_64-w64-mingw32-gcc COFFLoader.c beacon_compatibility.c -o COFFLoader.exe -luser32
# Test with a simple BOF
COFFLoader.exe go test_bof.o
Advanced: Adding Custom Internal Functions
If you are integrating COFFLoader into a custom framework, you can register additional internal functions that BOFs can call:
C// Custom function that BOFs can call
void MyCustomOutput(int type, char* data, int len) {
// Send data over your custom C2 channel
send_to_c2_server(data, len);
}
// Register it in the InternalFunctions table (before calling RunCOFF)
InternalFunctions[24][0] = (unsigned char*)"MyCustomOutput";
InternalFunctions[24][1] = (unsigned char*)&MyCustomOutput;
// In the BOF:
// DECLSPEC_IMPORT void MyCustomOutput(int type, char* data, int len);
// void go(char* args, int len) {
// MyCustomOutput(0, "hello from BOF", 14);
// }
Common BOF Patterns
Here are patterns frequently used in production BOFs:
C// Pattern 1: Argument parsing with multiple types
void go(char* args, int len) {
datap parser;
BeaconDataParse(&parser, args, len);
int targetPid = BeaconDataInt(&parser);
short someFlag = BeaconDataShort(&parser);
int strLen;
char* targetName = BeaconDataExtract(&parser, &strLen);
// ... use parsed arguments ...
}
// Pattern 2: Structured output with formatp
void go(char* args, int len) {
formatp buffer;
BeaconFormatAlloc(&buffer, 4096);
// Build structured output
BeaconFormatPrintf(&buffer, "%-20s %-10s %-8s\n", "PROCESS", "PID", "ARCH");
BeaconFormatPrintf(&buffer, "%-20s %-10d %-8s\n", "explorer.exe", 1234, "x64");
BeaconFormatPrintf(&buffer, "%-20s %-10d %-8s\n", "svchost.exe", 5678, "x64");
int outLen;
char* output = BeaconFormatToString(&buffer, &outLen);
BeaconOutput(CALLBACK_OUTPUT, output, outLen);
BeaconFormatFree(&buffer);
}
// Pattern 3: Dynamic API resolution within the BOF
// (for APIs you do not want in the symbol table)
DECLSPEC_IMPORT HMODULE WINAPI KERNEL32$LoadLibraryA(LPCSTR);
DECLSPEC_IMPORT FARPROC WINAPI KERNEL32$GetProcAddress(HMODULE, LPCSTR);
void go(char* args, int len) {
// Resolve at runtime -- the symbol table only shows LoadLibrary/GetProcAddress
HMODULE hLib = KERNEL32$LoadLibraryA("amsi.dll");
if (hLib) {
typedef HRESULT (WINAPI *pAmsiInit)(LPCWSTR, HAMSICONTEXT*);
pAmsiInit AmsiInitialize = (pAmsiInit)KERNEL32$GetProcAddress(hLib, "AmsiInitialize");
// ... use resolved function ...
}
}
Pop Quiz: Full Chain & Security
Q1: Why must BOFs compiled with MSVC use the /GS- flag?
Q2: What is the most significant OPSEC weakness of COFFLoader's memory allocation strategy?
Q3: How does bof-launcher improve on COFFLoader's section allocation?