Difficulty: Advanced

Module 8: Full Chain, Writing BOFs & Security

End-to-end loader flow, writing production BOFs, compilation, and detection considerations.

Why This Module?

This final module ties everything together. We trace the complete execution flow from reading the COFF file to calling go() and retrieving output. Then we cover practical BOF authoring, compilation for both MSVC and MinGW, security/OPSEC considerations for both the loader and BOFs, and how COFFLoader compares to alternative loaders like bof-launcher by The-Z-Labs.

The Complete RunCOFF Flow

Here is the entire execution pipeline in one view, referencing every module:

TEXTRunCOFF(functionname="go", coff_data, filesize, argumentdata, argumentSize)
  |
  +-- [Module 2] Parse COFF header
  |     coff_header = (coff_file_header_t*)coff_data
  |     Validate: Machine == 0x8664, SizeOfOptionalHeader == 0
  |
  +-- [Module 2] Locate tables
  |     sections     = coff_data + sizeof(coff_file_header_t)
  |     symbols      = coff_data + PointerToSymbolTable
  |     string_table = symbols + (NumberOfSymbols * 18)
  |
  +-- [Module 4] Load sections
  |     for each section i:
  |       sectionMapping[i] = VirtualAlloc(NULL, SizeOfRawData, RWX)
  |       memcpy(sectionMapping[i], coff_data + PointerToRawData, SizeOfRawData)
  |
  +-- [Module 4] Allocate function pointer table
  |     relocationCount = sum of all sections' NumberOfRelocations
  |     functionMapping = VirtualAlloc(NULL, relocationCount * 8, RWX)
  |
  +-- [Module 7] Populate InternalFunctions[30]
  |     InternalFunctions[0] = {"BeaconDataParse", &BeaconDataParse}
  |     InternalFunctions[1] = {"BeaconDataInt",   &BeaconDataInt}
  |     ... (all 24 entries)
  |
  +-- [Module 5+6] Process relocations (per section)
  |     for each section secIdx:
  |       for each relocation relIdx:
  |         symIdx    = relocs[relIdx].SymbolTableIndex
  |         symName   = get_symbol_name(symbols[symIdx])
  |         |
  |         +-- [Module 5] Resolve symbol
  |         |     if defined(symIdx): addr = sectionMapping[sec-1] + Value
  |         |     elif __imp_: addr = functionMapping slot (filled by process_symbol)
  |         |     else: addr = process_symbol(symName) direct
  |         |
  |         +-- [Module 6] Apply relocation fixup
  |               ADDR64:   *(uint64_t*)fixup = symbolAddr
  |               REL32:    *(int32_t*)fixup  = symbolAddr - (fixup + 4)
  |               REL32_N:  *(int32_t*)fixup  = symbolAddr - (fixup + 4 + N)
  |               ADDR32NB: *(uint32_t*)fixup = symbolAddr - imageBase
  |
  +-- Find entry point
  |     Scan symbol table for "go" (x64) or "_go" (x86)
  |     entryAddr = sectionMapping[sym.SectionNumber - 1] + sym.Value
  |
  +-- Call the BOF
  |     typedef void (*entry_t)(char*, int);
  |     entry_t entry = (entry_t)entryAddr;
  |     entry(argumentdata, argumentSize);
  |
  +-- Retrieve output
  |     char* output = BeaconGetOutputData(&outsize);
  |
  +-- Cleanup
        for each section: VirtualFree(sectionMapping[i])
        VirtualFree(functionMapping)
        free(sectionMapping)

Entry Point Discovery

COFFLoader finds the entry function by scanning the symbol table for a symbol whose name matches the requested function name. On x64, it looks for the exact name (e.g., go). On x86, the C calling convention prepends an underscore, so it looks for _go:

C// Find the entry point symbol
void* entryPoint = NULL;
for (int i = 0; i < coff_header->NumberOfSymbols; i++) {
    char* name = get_symbol_name(&symbols[i], string_table);

#ifdef _WIN64
    if (strcmp(name, functionname) == 0) {
#else
    // x86: prepend underscore
    char decorated[256];
    snprintf(decorated, sizeof(decorated), "_%s", functionname);
    if (strcmp(name, decorated) == 0) {
#endif
        int secIdx = symbols[i].SectionNumber - 1;
        entryPoint = sectionMapping[secIdx] + symbols[i].Value;
        break;
    }
    // Skip auxiliary symbols
    i += symbols[i].NumberOfAuxSymbols;
}

// Cast and call
if (entryPoint) {
    typedef void (*entry_fn)(char*, int);
    entry_fn go = (entry_fn)entryPoint;
    go(argumentdata, argumentSize);
}

Writing a Complete BOF

Here is a practical BOF that demonstrates proper API usage, error handling, and DLL import conventions:

C// whoami_bof.c -- BOF that displays current user and privilege info
#include <windows.h>
#include "beacon.h"

// Declare DLL imports using LIBRARY$Function convention
DECLSPEC_IMPORT BOOL    WINAPI ADVAPI32$OpenProcessToken(HANDLE, DWORD, PHANDLE);
DECLSPEC_IMPORT BOOL    WINAPI ADVAPI32$GetTokenInformation(HANDLE, TOKEN_INFORMATION_CLASS,
                                                             LPVOID, DWORD, PDWORD);
DECLSPEC_IMPORT BOOL    WINAPI ADVAPI32$LookupAccountSidA(LPCSTR, PSID, LPSTR, LPDWORD,
                                                           LPSTR, LPDWORD, PSID_NAME_USE);
DECLSPEC_IMPORT HANDLE  WINAPI KERNEL32$GetCurrentProcess(void);
DECLSPEC_IMPORT BOOL    WINAPI KERNEL32$CloseHandle(HANDLE);
DECLSPEC_IMPORT DWORD   WINAPI KERNEL32$GetCurrentProcessId(void);
DECLSPEC_IMPORT DWORD   WINAPI KERNEL32$GetLastError(void);

void go(char* args, int len) {
    HANDLE hToken = NULL;
    DWORD  pid = KERNEL32$GetCurrentProcessId();

    BeaconPrintf(CALLBACK_OUTPUT, "[*] PID: %d\n", pid);

    // Open our process token
    if (!ADVAPI32$OpenProcessToken(
            KERNEL32$GetCurrentProcess(),
            TOKEN_QUERY,
            &hToken))
    {
        BeaconPrintf(CALLBACK_ERROR, "[!] OpenProcessToken failed: %d\n",
                     KERNEL32$GetLastError());
        return;
    }

    // Get the token user
    BYTE tokenInfo[256];
    DWORD returnLength = 0;

    if (!ADVAPI32$GetTokenInformation(
            hToken, TokenUser, tokenInfo, sizeof(tokenInfo), &returnLength))
    {
        BeaconPrintf(CALLBACK_ERROR, "[!] GetTokenInformation failed: %d\n",
                     KERNEL32$GetLastError());
        KERNEL32$CloseHandle(hToken);
        return;
    }

    // Look up the account name from the SID
    TOKEN_USER* pUser = (TOKEN_USER*)tokenInfo;
    char username[128] = {0};
    char domain[128]   = {0};
    DWORD userLen   = sizeof(username);
    DWORD domainLen = sizeof(domain);
    SID_NAME_USE sidType;

    if (ADVAPI32$LookupAccountSidA(
            NULL, pUser->User.Sid,
            username, &userLen,
            domain, &domainLen,
            &sidType))
    {
        BeaconPrintf(CALLBACK_OUTPUT, "[+] User: %s\\%s\n", domain, username);
    }
    else {
        BeaconPrintf(CALLBACK_ERROR, "[!] LookupAccountSid failed: %d\n",
                     KERNEL32$GetLastError());
    }

    KERNEL32$CloseHandle(hToken);
}

Compilation Guide

BOFs must be compiled to object files (not linked) and must avoid CRT dependencies:

BASH# ============================
# MinGW Cross-Compilation (Linux -> Windows x64)
# ============================
x86_64-w64-mingw32-gcc -c whoami_bof.c -o whoami_bof.o

# For x86 BOFs:
i686-w64-mingw32-gcc -c whoami_bof.c -o whoami_bof_x86.o

# ============================
# MSVC (Windows, x64)
# ============================
cl.exe /c /GS- /O2 whoami_bof.c /Fo whoami_bof.obj

# Flag explanations:
#   /c     - compile only, do not link
#   /GS-   - disable stack buffer security checks (no CRT cookie)
#   /O2    - optimize for speed (optional but recommended)

# ============================
# Running with COFFLoader
# ============================
COFFLoader.exe go whoami_bof.o

# With arguments (hex-encoded):
COFFLoader.exe go enumerate_bof.o 0c0000000800000068006f0073007400

Critical Compilation Rules

RuleReason
Always use -c / /cProduces an object file, not an executable. Linking would create a PE with an import table.
Use /GS- with MSVCDisables __security_check_cookie. The CRT initializes this cookie; BOFs have no CRT.
Never use CRT functionsNo printf, malloc, free, strlen, etc. Use Beacon API or declare Windows API imports.
No global constructorsC++ static initializers require CRT. Use plain C only.
No static linkingDo not link any .lib files. All external references must use LIBRARY$ convention.

Security Considerations

Both the COFFLoader itself and BOFs have OPSEC characteristics that defenders can detect:

COFFLoader Detection Indicators

IndicatorDetailMitigation
RWX MemoryVirtualAlloc with PAGE_EXECUTE_READWRITE for every sectionAllocate RW, apply relocations, then VirtualProtect to proper permissions
LoadLibraryA callsLoading DLLs during symbol resolution (KERNEL32, ADVAPI32, NTDLL, etc.)Many of these DLLs are already loaded; use GetModuleHandle first
GetProcAddress chainsRapid sequential GetProcAddress calls during relocation processingUse manual PE parsing (export table walking) instead
Unbacked executable memoryCode executing from VirtualAlloc'd regions (no backing file on disk)Map a file-backed section or use module stomping
No exception handler registration.pdata/.xdata are loaded but not registered with RtlAddFunctionTableCall RtlAddFunctionTable after loading for proper SEH support

BOF-Specific OPSEC

BOF OPSEC Checklist

1. Clean up memory: BOFs should zero sensitive buffers before returning. The loader will VirtualFree the sections, but sensitive data could remain in unfreed heap allocations.

2. Handle errors gracefully: A crash in the BOF crashes the entire host process. Always check return values and use __try/__except if available.

3. Minimize API calls: Every LoadLibraryA and GetProcAddress is logged by ETW. Prefer DLLs already loaded in the process.

4. Avoid AMSI-triggering strings: String literals in .rdata can be scanned by AMSI if the process has AMSI loaded. Encrypt or obfuscate sensitive strings.

5. Keep BOFs small: The smaller the memory allocation, the less likely it is to attract attention from memory scanners.

COFFLoader vs. bof-launcher (The-Z-Labs)

bof-launcher is an alternative COFF loader written in Zig with several improvements over COFFLoader:

FeatureCOFFLoader (TrustedSec)bof-launcher (The-Z-Labs)
LanguageCZig (with C API)
Memory permissionsRWX for all sectionsProper per-section permissions (RX, RW, R)
Cross-platformWindows onlyWindows + Linux (ELF COFF support)
Exception handlingNot registeredRegistered via RtlAddFunctionTable
Contiguous allocationSeparate VirtualAlloc per sectionSingle contiguous block, partitioned
Beacon APIFull compatibility layerExtended API with additional functions
Thread safetyGlobal output buffer (not thread-safe)Per-invocation context
API resolutionLoadLibraryA + GetProcAddressManual PE export table parsing

When to Use Which

COFFLoader is ideal for learning (its source is straightforward and well-commented), for quick BOF testing from the command line, and for integration into C-based tools. bof-launcher is better suited for production C2 frameworks where OPSEC matters: proper memory permissions, registered exception handlers, contiguous section layout, and cross-platform support. Many frameworks (Sliver, Havoc, Mythic agents) have their own COFF loaders inspired by both projects.

Building COFFLoader from Source

BASH# Clone the repository
git clone https://github.com/trustedsec/COFFLoader.git
cd COFFLoader

# Build with MSVC (Visual Studio Developer Command Prompt)
cl.exe /Fe:COFFLoader.exe COFFLoader.c beacon_compatibility.c

# Build with MinGW (on Linux or Windows)
x86_64-w64-mingw32-gcc COFFLoader.c beacon_compatibility.c -o COFFLoader.exe -luser32

# Test with a simple BOF
COFFLoader.exe go test_bof.o

Advanced: Adding Custom Internal Functions

If you are integrating COFFLoader into a custom framework, you can register additional internal functions that BOFs can call:

C// Custom function that BOFs can call
void MyCustomOutput(int type, char* data, int len) {
    // Send data over your custom C2 channel
    send_to_c2_server(data, len);
}

// Register it in the InternalFunctions table (before calling RunCOFF)
InternalFunctions[24][0] = (unsigned char*)"MyCustomOutput";
InternalFunctions[24][1] = (unsigned char*)&MyCustomOutput;

// In the BOF:
// DECLSPEC_IMPORT void MyCustomOutput(int type, char* data, int len);
// void go(char* args, int len) {
//     MyCustomOutput(0, "hello from BOF", 14);
// }

Common BOF Patterns

Here are patterns frequently used in production BOFs:

C// Pattern 1: Argument parsing with multiple types
void go(char* args, int len) {
    datap parser;
    BeaconDataParse(&parser, args, len);

    int   targetPid  = BeaconDataInt(&parser);
    short someFlag   = BeaconDataShort(&parser);
    int   strLen;
    char* targetName = BeaconDataExtract(&parser, &strLen);

    // ... use parsed arguments ...
}

// Pattern 2: Structured output with formatp
void go(char* args, int len) {
    formatp buffer;
    BeaconFormatAlloc(&buffer, 4096);

    // Build structured output
    BeaconFormatPrintf(&buffer, "%-20s %-10s %-8s\n", "PROCESS", "PID", "ARCH");
    BeaconFormatPrintf(&buffer, "%-20s %-10d %-8s\n", "explorer.exe", 1234, "x64");
    BeaconFormatPrintf(&buffer, "%-20s %-10d %-8s\n", "svchost.exe",  5678, "x64");

    int outLen;
    char* output = BeaconFormatToString(&buffer, &outLen);
    BeaconOutput(CALLBACK_OUTPUT, output, outLen);
    BeaconFormatFree(&buffer);
}

// Pattern 3: Dynamic API resolution within the BOF
//   (for APIs you do not want in the symbol table)
DECLSPEC_IMPORT HMODULE WINAPI KERNEL32$LoadLibraryA(LPCSTR);
DECLSPEC_IMPORT FARPROC WINAPI KERNEL32$GetProcAddress(HMODULE, LPCSTR);

void go(char* args, int len) {
    // Resolve at runtime -- the symbol table only shows LoadLibrary/GetProcAddress
    HMODULE hLib = KERNEL32$LoadLibraryA("amsi.dll");
    if (hLib) {
        typedef HRESULT (WINAPI *pAmsiInit)(LPCWSTR, HAMSICONTEXT*);
        pAmsiInit AmsiInitialize = (pAmsiInit)KERNEL32$GetProcAddress(hLib, "AmsiInitialize");
        // ... use resolved function ...
    }
}

Pop Quiz: Full Chain & Security

Q1: Why must BOFs compiled with MSVC use the /GS- flag?

The /GS flag enables stack buffer overrun detection via a security cookie. The CRT's DllMain or mainCRTStartup initializes __security_cookie with a random value. BOFs bypass CRT initialization entirely, so the cookie is never set. When the function epilogue checks the cookie, it finds an invalid value and calls __security_check_cookie, which either crashes or calls an uninitialized handler. /GS- disables this mechanism.

Q2: What is the most significant OPSEC weakness of COFFLoader's memory allocation strategy?

COFFLoader allocates every section (code, data, and read-only) with PAGE_EXECUTE_READWRITE. RWX memory regions are rare in legitimate applications and are a strong indicator of shellcode or runtime code generation. EDR products and memory scanners specifically flag RWX allocations. A better approach is to allocate with RW, apply relocations, then VirtualProtect to the correct per-section permissions (RX for .text, RW for .data, R for .rdata).

Q3: How does bof-launcher improve on COFFLoader's section allocation?

bof-launcher allocates a single contiguous memory region for all sections, partitions it, copies section data, applies relocations, and then uses VirtualProtect to set proper per-section permissions (RX for code, RW for data, R for read-only). The contiguous allocation also ensures REL32 relocations always succeed (sections are close together), and proper permissions avoid the RWX detection signature.