Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix BOF in the last channel handling logic of SSE ShuffleChannel #5735

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 48 additions & 3 deletions src/layer/arm/shufflechannel_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ int ShuffleChannel_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
float32x4_t _p0 = vld1q_f32(ptr0);
float32x4_t _p1 = vld1q_f32(ptr1);
Expand All @@ -130,6 +130,21 @@ int ShuffleChannel_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 4;
outptr0 += 4;
}

for (int i = 0; i < 4; i++)
{
if (i % 2)
{
*outptr0 = *ptr1;
ptr1 += 1;
}
else
{
*outptr0 = *ptr0;
ptr0 += 1;
}
outptr0 += 1;
}
Comment on lines +133 to +147
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unroll this

}

return 0;
Expand Down Expand Up @@ -364,7 +379,7 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo

ptr1 += 4;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
uint16x4_t _p0 = vld1_u16(ptr0);
uint16x4_t _p1 = vld1_u16(ptr1);
Expand All @@ -378,6 +393,21 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo
ptr1 += 8;
outptr0 += 8;
}

for (int i = 0; i < 8; i++)
{
if (i % 2)
{
*outptr0 = *ptr1;
ptr1 += 1;
}
else
{
*outptr0 = *ptr0;
ptr0 += 1;
}
outptr0 += 1;
}
Comment on lines +396 to +410
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there is a risk of out of range reading in some of the arm pack8 codes.

}

return 0;
Expand Down Expand Up @@ -598,7 +628,7 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
uint16x4_t _p0 = vld1_u16(ptr0);
uint16x4_t _p1 = vld1_u16(ptr1);
Expand All @@ -611,6 +641,21 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo
ptr1 += 4;
outptr0 += 4;
}

for (int i = 0; i < 4; i++)
{
if (i % 2)
{
*outptr0 = *ptr1;
ptr1 += 1;
}
else
{
*outptr0 = *ptr0;
ptr0 += 1;
}
outptr0 += 1;
}
Comment on lines +645 to +658
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unroll this

}

return 0;
Expand Down
51 changes: 48 additions & 3 deletions src/layer/x86/shufflechannel_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 8;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m256 _p0 = _mm256_loadu_ps(ptr0);
__m256 _p1 = _mm256_loadu_ps(ptr1);
Expand All @@ -134,6 +134,21 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 16;
outptr += 16;
}

for (int i = 0; i < 16; i++)
{
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
Comment on lines +137 to +151
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there is a risk of out of range reading in some of the pack8/pack16 codes.

}

return 0;
Expand Down Expand Up @@ -372,7 +387,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 4;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m128 _p0 = _mm_loadu_ps(ptr0);
__m128 _p1 = _mm_loadu_ps(ptr1);
Expand All @@ -387,6 +402,21 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 8;
outptr += 8;
}

for (int i = 0; i < 8; i++)
{
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think there is a risk of out of range reading in some of the pack8/pack16 codes.

}

return 0;
Expand Down Expand Up @@ -607,7 +637,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m128 _p0 = _mm_loadu_ps(ptr0);
__m128 _p1 = _mm_loadu_ps(ptr1);
Expand All @@ -620,6 +650,21 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 4;
outptr += 4;
}

for (int i = 0; i < 4; i++)
{
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
Comment on lines +653 to +667
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unroll the tail part

Suggested change
for (int i = 0; i < 4; i++)
{
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
{
outptr[0] = ptr0[0];
outptr[1] = ptr1[0];
outptr[2] = ptr0[1];
outptr[3] = ptr1[1];
ptr0 += 2;
ptr1 += 2;
outptr += 4;
}

}

return 0;
Expand Down
Loading