Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a MLIR dialect for ghidra pcode. #2

Closed
wants to merge 36 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
79f9169
ghidra: Moves the JSON deserializer to `tools`.
surovic May 15, 2024
e7cc65c
pc: Adds basics of the `pc` pcode dialect.
surovic May 15, 2024
1a9d9e8
treewide: Relax compiler warnings in MLIR and LLVM headers.
surovic May 15, 2024
f6d1e51
cmake: Formatting with `cmake-format`.
surovic May 30, 2024
ca4e646
ghidra_scripts: Fix `patchestry` executable path.
surovic May 30, 2024
35f4783
ghidra: Removes `kind` from `varnode` as it is arbitrary.
surovic Jun 6, 2024
c4fde5b
ghidra: Move deserializer to `include` and `lib`.
surovic Jun 10, 2024
8f61143
pc: Add structural operations.
surovic Jun 10, 2024
2566d36
pc: Add a varnode operation.
surovic Jun 10, 2024
988bbf2
pc: Add a few unary operations to start.
surovic Jun 10, 2024
ef9db4a
pc: Add a few binary operations to start.
surovic Jun 10, 2024
0f32d58
pc: Add (conditonal) branches, call and return.
surovic Jun 10, 2024
0046e0c
pc: Add store and load operations.
surovic Jun 10, 2024
2292f09
ghidra: Add mlir codegen from deserialized json pcode.
surovic Jun 10, 2024
b8150b8
patchestry: Integrates MLIR codegen.
surovic Jun 11, 2024
2638a69
pc: Minor cosmetic renaming.
surovic Jun 13, 2024
fcc7feb
pc: Reworks varnode handling.
surovic Jun 13, 2024
82c9024
cmake: Update required LLVM to 18
surovic Jun 17, 2024
171ef35
pc: Makes registers follow SSA.
surovic Jun 17, 2024
bbac847
cmake: Treat MLIR headers as system headers.
surovic Jun 17, 2024
8f2a105
pc: Prefix `name` and `mnemonic` to avoid shadowing.
surovic Jun 17, 2024
d831abf
pc: Resolve missing includes.
surovic Jun 17, 2024
2f4b376
pc: Resolve namespace comment linter warning.
surovic Jun 17, 2024
2c544f2
pc: Replace `Builtin_Integer` w/ `AnySignlessInteger`.
surovic Jun 17, 2024
a87d783
patchestry: Enable pretty printing of output mlir.
surovic Jun 17, 2024
6f881a2
patchestry: Enable MLIR multithreading.
surovic Jun 17, 2024
2dd77f6
pc: Add missing insertion guard.
surovic Jun 17, 2024
f15f877
pc: Return `{}` instead of `UnitAttr` in `fold`.
surovic Jun 17, 2024
e5b6158
pc: Renames confusing type alias.
surovic Jun 17, 2024
c4647da
pc: Replaces `StringSwitch` with `if` cascade.
surovic Jun 17, 2024
4338e5f
cmake: Ties MLIR version to LLVM version.
surovic Jun 17, 2024
8a57eee
pc: Unifies address space maps into a scoped hash table.
surovic Jun 17, 2024
35ff453
pc: Replace custom hashing w/ `llvm::hash_value`.
surovic Jun 18, 2024
d06fdd6
pc: Add `pc.const` reuse inside `pc.instruction`.
surovic Jun 18, 2024
c4d8435
ghidra_scripts: Look for `patchestry` in PATH.
surovic Jun 18, 2024
1d2d20e
readme: Add a short guide to run `patchestry` via ghidra GUI.
surovic Jun 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 27 additions & 38 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,52 +1,43 @@
# Copyright (c) 2024, Trail of Bits, Inc. All rights reserved. This source code
# is licensed in accordance with the terms specified in the LICENSE file found
# in the root directory of this source tree.

cmake_minimum_required(VERSION 3.14)

include(cmake/prelude.cmake)

project(
patchestry
VERSION 0.1.0
DESCRIPTION "Patchestry is a binary patching framework built with MLIR and Ghidra."
HOMEPAGE_URL "https://github.com/lifting-bits/patchestry"
LANGUAGES C CXX
)
patchestry
VERSION 0.1.0
DESCRIPTION
"Patchestry is a binary patching framework built with MLIR and Ghidra."
HOMEPAGE_URL "https://github.com/lifting-bits/patchestry"
LANGUAGES C CXX)

include(cmake/project-is-top-level.cmake)
include(cmake/variables.cmake)

# ---- Add LLVM ----

find_package(LLVM 17 REQUIRED CONFIG)
find_package(LLVM 18.1 REQUIRED CONFIG)
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
include(TableGen)
include(AddLLVM)
include(HandleLLVMOptions)

# ---- Declare library ----

add_library(
patchestry_lib OBJECT
source/ghidra.cpp
)

target_include_directories(
patchestry_lib ${warning_guard}
PUBLIC
"\$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/source>"
"\$<BUILD_INTERFACE:${LLVM_INCLUDE_DIRS}>"
)

target_compile_features(patchestry_lib PUBLIC cxx_std_20)

target_link_libraries(patchestry_lib PUBLIC LLVMSupport)

# ---- Declare executable ----

add_executable(patchestry_exe source/main.cpp)
add_executable(patchestry::exe ALIAS patchestry_exe)

set_property(TARGET patchestry_exe PROPERTY OUTPUT_NAME patchestry)
find_package(MLIR ${LLVM_PACKAGE_VERSION} REQUIRED CONFIG)
message(STATUS "Found MLIR ${MLIR_PACKAGE_VERSION}")
message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}")
list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}")
include(AddMLIR)

target_compile_features(patchestry_exe PRIVATE cxx_std_20)
include_directories(SYSTEM ${MLIR_INCLUDE_DIRS})
include_directories(${PROJECT_SOURCE_DIR}/include/)
include_directories(${PROJECT_BINARY_DIR}/include/)

target_link_libraries(patchestry_exe PRIVATE patchestry_lib)
add_subdirectory(include)
add_subdirectory(lib)
add_subdirectory(tools)

# ---- Install rules ----

Expand All @@ -60,9 +51,7 @@ if(NOT patchestry_DEVELOPER_MODE)
return()
elseif(NOT PROJECT_IS_TOP_LEVEL)
message(
AUTHOR_WARNING
"Developer mode is intended for developers of patchestry"
)
AUTHOR_WARNING "Developer mode is intended for developers of patchestry")
endif()

include(cmake/dev-mode.cmake)
41 changes: 41 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,47 @@ This is the patchestry project.

See the [BUILDING](BUILDING.md) document.

# Getting Ghidra

Get Java JDK (x64)

```shell
wget -c https://download.oracle.com/java/22/latest/jdk-22_linux-x64_bin.tar.gz -O jdk.tar.gz
tar xvf jdk.tar.gz
mv jdk-22.0.1 ~/jdk
echo "export PATH=\$PATH:~/jdk/bin" >> ~/.bashrc
```

Get Ghidra
```shell
wget -c https://github.com/NationalSecurityAgency/ghidra/releases/download/Ghidra_11.1.1_build/ghidra_11.1.1_PUBLIC_20240614.zip -O ghidra.zip
unzip ghidra.zip
mv ghidra_11.1.1_PUBLIC ~/ghidra
```

# Installing Ghidra Scripts

Link `ghidra_scripts` directory to `$HOME`. We assume that `./patchestry` contains the cloned repository.
```shell
ln -s patchestry/ghidra_scripts ~
```

# Running patchestry via Ghidra GUI

Make sure patchestry is available via PATH
```shell
patchestry
```

Start Ghidra GUI
```shell
~/ghidra/ghidraRun
```

Create a project and import a binary file.

Run `PatchestryScript.java`.

# Contributing

See the [CONTRIBUTING](CONTRIBUTING.md) document.
Expand Down
2 changes: 1 addition & 1 deletion ghidra_scripts/PatchestryScript.java
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ public void run() throws Exception {
final var curFunction = getFunctionContaining(currentAddress);
final var pInputPath = Files.createTempFile(curFunction.getName() + '.', ".patchestry.json");
final var pOutputPath = Files.createTempFile(curFunction.getName() + '.', ".patchestry.out");
final var pBinaryPath = "/home/surovic/git/patchestry/build/dev/patchestry";
final var pBinaryPath = "patchestry";

final var serializer = new PcodeSerializer(Files.newBufferedWriter(pInputPath));
serializer.serialize(curFunction).close();
Expand Down
5 changes: 5 additions & 0 deletions include/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) 2024, Trail of Bits, Inc. All rights reserved. This source code
# is licensed in accordance with the terms specified in the LICENSE file found
# in the root directory of this source tree.

add_subdirectory(patchestry)
6 changes: 6 additions & 0 deletions include/patchestry/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (c) 2024, Trail of Bits, Inc. All rights reserved. This source code
# is licensed in accordance with the terms specified in the LICENSE file found
# in the root directory of this source tree.

add_subdirectory(Dialect)
add_subdirectory(Ghidra)
5 changes: 5 additions & 0 deletions include/patchestry/Dialect/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) 2024, Trail of Bits, Inc. All rights reserved. This source code
# is licensed in accordance with the terms specified in the LICENSE file found
# in the root directory of this source tree.

add_subdirectory(Pcode)
5 changes: 5 additions & 0 deletions include/patchestry/Dialect/Pcode/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) 2024, Trail of Bits, Inc. All rights reserved. This source code
# is licensed in accordance with the terms specified in the LICENSE file found
# in the root directory of this source tree.

add_mlir_dialect(Pcode pc)
33 changes: 33 additions & 0 deletions include/patchestry/Dialect/Pcode/Pcode.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright (c) 2024, Trail of Bits, Inc.
* All rights reserved.
*
* This source code is licensed in accordance with the terms specified in
* the LICENSE file found in the root directory of this source tree.
*/

#ifndef PCODE_DIALECT
#define PCODE_DIALECT

include "mlir/IR/OpBase.td"

def Pcode_Dialect : Dialect {
let name = "pc";
let cppNamespace = "::patchestry::pc";

let summary = "Dialect for representing ghidra pcode operations.";

let extraClassDeclaration = [{
void registerTypes();
}];

let useDefaultTypePrinterParser = 1;
}

class PcodeOp< string op_mnemonic, list< Trait > traits = [] >
: Op< Pcode_Dialect, op_mnemonic, traits >;

include "PcodeOps.td"
include "PcodeTypes.td"

#endif // PCODE_DIALECT
18 changes: 18 additions & 0 deletions include/patchestry/Dialect/Pcode/PcodeDialect.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* Copyright (c) 2024, Trail of Bits, Inc.
* All rights reserved.
*
* This source code is licensed in accordance with the terms specified in
* the LICENSE file found in the root directory of this source tree.
*/

#pragma once

#include "patchestry/Util/Warnings.hpp"

PATCHESTRY_RELAX_WARNINGS
#include <mlir/IR/Dialect.h>
PATCHESTRY_UNRELAX_WARNINGS

// Pull in the dialect definition.
#include "patchestry/Dialect/Pcode/PcodeDialect.h.inc"
22 changes: 22 additions & 0 deletions include/patchestry/Dialect/Pcode/PcodeOps.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright (c) 2024, Trail of Bits, Inc.
* All rights reserved.
*
* This source code is licensed in accordance with the terms specified in
* the LICENSE file found in the root directory of this source tree.
*/

#pragma once

#include "patchestry/Util/Warnings.hpp"

PATCHESTRY_RELAX_WARNINGS
#include <mlir/IR/BuiltinOps.h>
#include <mlir/IR/RegionKindInterface.h>
#include <mlir/Interfaces/InferTypeOpInterface.h>
PATCHESTRY_UNRELAX_WARNINGS

#include "patchestry/Dialect/Pcode/PcodeDialect.hpp"

#define GET_OP_CLASSES
#include "patchestry/Dialect/Pcode/Pcode.h.inc"
146 changes: 146 additions & 0 deletions include/patchestry/Dialect/Pcode/PcodeOps.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Copyright (c) 2024, Trail of Bits, Inc.
* All rights reserved.
*
* This source code is licensed in accordance with the terms specified in
* the LICENSE file found in the root directory of this source tree.
*/

#ifndef PCODE_DIALECT_OPS
#define PCODE_DIALECT_OPS

include "mlir/Interfaces/InferTypeOpInterface.td"
include "mlir/IR/BuiltinAttributes.td"
include "mlir/IR/BuiltinTypes.td"

include "patchestry/Dialect/Pcode/PcodeTypes.td"

def FuncOp
: PcodeOp< "function" >
, Arguments<( ins StrAttr:$func_name )>
{
let regions = (region AnyRegion:$blocks);
let assemblyFormat = [{ attr-dict `:` $blocks }];
}

def BlockOp
: PcodeOp< "block" >
, Arguments<( ins StrAttr:$block_label )>
{
let regions = (region AnyRegion:$instructions);
let assemblyFormat = [{ attr-dict `:` $instructions }];
}

def InstOp
: PcodeOp< "instruction" >
, Arguments<( ins StrAttr:$inst_mnemonic )>
{
let regions = (region AnyRegion:$semantics);
let assemblyFormat = [{ attr-dict `:` $semantics }];
}

def ConstOp
: PcodeOp< "const", [ConstantLike, AllTypesMatch< ["value", "result"] >] >
, Arguments<( ins TypedAttrInterface:$value )>
, Results<( outs AnyType:$result )>
{
let hasFolder = 1;
let assemblyFormat = [{ attr-dict `:` type($result) }];
}

class VarnodeTypeOp< string varnode_mnemonic, list< Trait > traits = [] >
: PcodeOp< varnode_mnemonic, traits >
, Arguments<( ins StrAttr:$addr_space, I64Attr:$addr, I8Attr:$size )>
, Results<( outs AnyType:$result )>
{
let assemblyFormat = [{ attr-dict `:` type($result) }];
}

def RegOp : VarnodeTypeOp< "reg" >;
def MemOp : VarnodeTypeOp< "mem" >;
def VarOp : VarnodeTypeOp< "var" >;

class UnaryOp< string op_mnemonic, list< Trait > traits = [] >
: PcodeOp< op_mnemonic, traits >
, Arguments<( ins AnySignlessInteger:$op )>
, Results<( outs AnySignlessInteger:$result )>
{
let assemblyFormat = [{ $op attr-dict `:` functional-type(operands, results) }];
}

def CopyOp : UnaryOp < "copy", [SameOperandsAndResultType] >;
def PopcountOp : UnaryOp < "popcount" >;
def BoolNegateOp : UnaryOp < "bool_negate", [SameOperandsAndResultType] >;

class BinOp< string op_mnemonic, list< Trait > traits = [] >
: PcodeOp< op_mnemonic, traits >
, Arguments<( ins AnySignlessInteger:$lhs, AnySignlessInteger:$rhs )>
, Results<( outs AnySignlessInteger:$result )>
{
let assemblyFormat = [{ $lhs `,` $rhs attr-dict `:` functional-type(operands, results) }];
}

def IntAddOp : BinOp< "int_add" >;
def IntSubOp : BinOp< "int_sub" >;
def IntLessOp : BinOp< "int_less" >;
def IntEqualOp : BinOp< "int_equal" >;
def IntSBorrowOp : BinOp< "int_sborrow" >;
def IntSLessOp : BinOp< "int_sless" >;
def IntAndOp : BinOp< "int_and" >;

def BranchOp
: PcodeOp< "branch" >
, Arguments<( ins AnySignlessInteger:$addr )>
{
let summary = "Pcode BRANCH operation";
let description = "TODO(surovic)";
let assemblyFormat = [{ $addr attr-dict `:` type(operands) }];
}

def CBranchOp
: PcodeOp< "cbranch" >
, Arguments<( ins AnySignlessInteger:$addr, AnySignlessInteger:$cond )>
{
let summary = "Pcode CBRANCH operation";
let description = "TODO(surovic)";
let assemblyFormat = [{ $addr `,` $cond attr-dict `:` type(operands) }];
}

def CallOp
: PcodeOp< "call" >
, Arguments<( ins AnySignlessInteger:$addr )>
{
let summary = "Pcode CALL operation";
let description = "TODO(surovic)";
let assemblyFormat = [{ $addr attr-dict `:` type(operands) }];
}

def ReturnOp
: PcodeOp< "return" >
, Arguments<( ins AnySignlessInteger:$varnode )>
{
let summary = "Pcode RETURN operation";
let description = "TODO(surovic)";
let assemblyFormat = [{ $varnode attr-dict `:` type(operands) }];
}

def StoreOp
: PcodeOp< "store" >
, Arguments<( ins AnySignlessInteger:$addr_space, AnySignlessInteger:$addr, AnySignlessInteger:$data )>
{
let summary = "Pcode STORE operation";
let description = "TODO(surovic)";
let assemblyFormat = [{ $addr_space `,` $addr `,` $data attr-dict `:` type(operands) }];
}

def LoadOp
: PcodeOp< "load" >
, Arguments<( ins AnySignlessInteger:$addr_space, AnySignlessInteger:$addr )>
, Results<( outs AnySignlessInteger:$result )>
{
let summary = "Pcode LOAD operation";
let description = "TODO(surovic)";
let assemblyFormat = [{ $addr_space `,` $addr attr-dict `:` functional-type(operands, results) }];
}

#endif // PCODE_DIALECT_OPS
Loading