Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incorrect result when decompiling pass-by-value struct arguments via WebAssembly #195

Closed
frabert opened this issue Oct 27, 2021 · 2 comments · Fixed by #197
Closed

Incorrect result when decompiling pass-by-value struct arguments via WebAssembly #195

frabert opened this issue Oct 27, 2021 · 2 comments · Fixed by #197
Labels
bug Something isn't working decomp Related to LLVM IR to C decompiler

Comments

@frabert
Copy link
Collaborator

frabert commented Oct 27, 2021

Compiling the following program to WebAssembly (no codegen, only IR clang --target=wasm32-unknown-wasi -S -emit-llvm ../test_struct_split.c)

extern int printf(const char *fmt, ...);
extern int atoi(const char* s);

struct foo {
  int x;
  int y;
};

static int get_x(struct foo f) {
  f.x *= 2;
  return f.x;
}

static int get_y(struct foo f) {
  f.y *= 3;
  return f.y;
}

int main() {
  struct foo f = { atoi("1"), atoi("2") };
  int x = get_x(f);
  int y = get_y(f);
  printf("%d %d %d %d\n", x, y, f.x, f.y);
}

produces

; ModuleID = '../test_struct_split.c'
source_filename = "../test_struct_split.c"
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-wasi"

%struct.foo = type { i32, i32 }

@.str = private unnamed_addr constant [2 x i8] c"1\00", align 1
@.str.1 = private unnamed_addr constant [2 x i8] c"2\00", align 1
@.str.2 = private unnamed_addr constant [13 x i8] c"%d %d %d %d\0A\00", align 1
@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @__main_void to i8*)], section "llvm.metadata"

@__main_void = alias i32 (), i32 ()* @main

; Function Attrs: noinline nounwind optnone
define hidden i32 @main() #0 {
entry:
  %f = alloca %struct.foo, align 4
  %x2 = alloca i32, align 4
  %y4 = alloca i32, align 4
  %x = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 0
  %call = call i32 @atoi(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0))
  store i32 %call, i32* %x, align 4
  %y = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 1
  %call1 = call i32 @atoi(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
  store i32 %call1, i32* %y, align 4
  %call3 = call i32 @get_x(%struct.foo* byval(%struct.foo) align 4 %f)
  store i32 %call3, i32* %x2, align 4
  %call5 = call i32 @get_y(%struct.foo* byval(%struct.foo) align 4 %f)
  store i32 %call5, i32* %y4, align 4
  %0 = load i32, i32* %x2, align 4
  %1 = load i32, i32* %y4, align 4
  %x6 = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 0
  %2 = load i32, i32* %x6, align 4
  %y7 = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 1
  %3 = load i32, i32* %y7, align 4
  %call8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.2, i32 0, i32 0), i32 %0, i32 %1, i32 %2, i32 %3)
  ret i32 0
}

declare i32 @atoi(i8*) #1

; Function Attrs: noinline nounwind optnone
define internal i32 @get_x(%struct.foo* byval(%struct.foo) align 4 %f) #0 {
entry:
  %x = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 0
  %0 = load i32, i32* %x, align 4
  %mul = mul nsw i32 %0, 2
  store i32 %mul, i32* %x, align 4
  %x1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 0
  %1 = load i32, i32* %x1, align 4
  ret i32 %1
}

; Function Attrs: noinline nounwind optnone
define internal i32 @get_y(%struct.foo* byval(%struct.foo) align 4 %f) #0 {
entry:
  %y = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 1
  %0 = load i32, i32* %y, align 4
  %mul = mul nsw i32 %0, 3
  store i32 %mul, i32* %y, align 4
  %y1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 1
  %1 = load i32, i32* %y1, align 4
  ret i32 %1
}

declare i32 @printf(i8*, ...) #1

attributes #0 = { noinline nounwind optnone "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0}
!llvm.ident = !{!1}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 12.0.1 (https://github.com/microsoft/vcpkg.git 2a31089e777fc187f1cc05338250b8e1810cfb52)"}

whose expected output is 2 6 1 2.
Decompiling it via Rellic produces

unsigned char _str[2] = "1\000";
unsigned char _str_1[2] = "2\000";
unsigned char _str_2[13] = "%d %d %d %d\n\000";
unsigned int main();
unsigned int atoi(unsigned char *arg0);
struct struct_foo {
    unsigned int field0;
    unsigned int field1;
};
unsigned int get_x(struct struct_foo *f);
unsigned int get_y(struct struct_foo *f);
unsigned int printf(unsigned char *arg0, ...);
unsigned int main() {
    struct struct_foo var0;
    unsigned int var1;
    unsigned int var2;
    unsigned int val3;
    unsigned int val4;
    unsigned int val5;
    unsigned int val6;
    unsigned int val7;
    val3 = atoi(_str);
    var0.field0 = val3;
    val4 = atoi(_str_1);
    var0.field1 = val4;
    val5 = get_x(&var0);
    var1 = val5;
    val6 = get_y(&var0);
    var2 = val6;
    val7 = printf(_str_2, var1, var2, var0.field0, var0.field1);
    return 0U;
}
unsigned int get_x(struct struct_foo *f) {
    f->field0 = f->field0 * 2U;
    return f->field0;
}
unsigned int get_y(struct struct_foo *f) {
    f->field1 = f->field1 * 3U;
    return f->field1;
}

whose output (after a bit of massaging to correct the type inaccuracies) is 2 6 2 6, which is incorrect as the structs are passed by reference instead of by value

@frabert frabert added bug Something isn't working decomp Related to LLVM IR to C decompiler labels Oct 27, 2021
@pgoodman
Copy link
Collaborator

Good find.

@frabert
Copy link
Collaborator Author

frabert commented Nov 1, 2021

This issue shows up also in some of the AnghaBench tests, for example in amd64/linux/drivers/gpu/drm/gma500/extr_psb_intel_sdvo.c_psb_intel_sdvo_create_enhance_property_tv.bc, where a %struct.psb_intel_sdvo_enhancements_reply is passed byval.
Debug info for such arguments is produced as if it was a struct value, not a pointer value, so it currently crashes #191

Possible solution: when a byval argument is detected, produce

int foo(struct foo arg0_byval) {
  struct foo *arg0 = &arg0_byval;
  // ...
}

instead of

int foo(struct foo *arg0) {
  // ...
}

to minimize the amount of changes to the backend

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working decomp Related to LLVM IR to C decompiler
Projects
None yet
Development

Successfully merging a pull request may close this issue.

2 participants