Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Diskscan hangs on Fedora 25 #58

Open
deamen opened this issue Apr 12, 2017 · 7 comments
Open

Diskscan hangs on Fedora 25 #58

deamen opened this issue Apr 12, 2017 · 7 comments

Comments

@deamen
Copy link

deamen commented Apr 12, 2017

When I try to run diskscan /dev/sdc, it hangs at "I: Validating path /dev/sdc"

The strace log as below:

execve("./diskscan", ["./diskscan", "/dev/sdc"], [/* 20 vars */]) = 0
brk(NULL)                               = 0xd3c000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f42c9ad3000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=29307, ...}) = 0
mmap(NULL, 29307, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f42c9acb000
close(3)                                = 0
open("/lib64/libm.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 W\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1169408, ...}) = 0
mmap(NULL, 3178728, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f42c95a7000
mprotect(0x7f42c96af000, 2093056, PROT_NONE) = 0
mmap(0x7f42c98ae000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x107000) = 0x7f42c98ae000
close(3)                                = 0
open("/lib64/libtinfo.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240\331\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=180200, ...}) = 0
mmap(NULL, 2275584, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f42c937b000
mprotect(0x7f42c93a2000, 2097152, PROT_NONE) = 0
mmap(0x7f42c95a2000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x27000) = 0x7f42c95a2000
close(3)                                = 0
open("/lib64/libz.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0` \0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=89520, ...}) = 0
mmap(NULL, 2183272, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f42c9165000
mprotect(0x7f42c917a000, 2093056, PROT_NONE) = 0
mmap(0x7f42c9379000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14000) = 0x7f42c9379000
close(3)                                = 0
open("/lib64/librt.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340 \0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=44040, ...}) = 0
mmap(NULL, 2128800, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f42c8f5d000
mprotect(0x7f42c8f64000, 2093056, PROT_NONE) = 0
mmap(0x7f42c9163000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6000) = 0x7f42c9163000
close(3)                                = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \5\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2115824, ...}) = 0
mmap(NULL, 3955040, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f42c8b97000
mprotect(0x7f42c8d54000, 2093056, PROT_NONE) = 0
mmap(0x7f42c8f53000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bc000) = 0x7f42c8f53000
mmap(0x7f42c8f59000, 14688, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f42c8f59000
close(3)                                = 0
open("/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240`\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=151104, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f42c9ac9000
mmap(NULL, 2217000, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f42c8979000
mprotect(0x7f42c8991000, 2097152, PROT_NONE) = 0
mmap(0x7f42c8b91000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x18000) = 0x7f42c8b91000
mmap(0x7f42c8b93000, 13352, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f42c8b93000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f42c9ac7000
arch_prctl(ARCH_SET_FS, 0x7f42c9ac7700) = 0
mprotect(0x7f42c8f53000, 16384, PROT_READ) = 0
mprotect(0x7f42c8b91000, 4096, PROT_READ) = 0
mprotect(0x7f42c9163000, 4096, PROT_READ) = 0
mprotect(0x7f42c9379000, 4096, PROT_READ) = 0
mprotect(0x7f42c95a2000, 16384, PROT_READ) = 0
mprotect(0x7f42c98ae000, 4096, PROT_READ) = 0
mprotect(0x617000, 4096, PROT_READ)     = 0
mprotect(0x7f42c9ad5000, 4096, PROT_READ) = 0
munmap(0x7f42c9acb000, 29307)           = 0
set_tid_address(0x7f42c9ac79d0)         = 4494
set_robust_list(0x7f42c9ac79e0, 24)     = 0
rt_sigaction(SIGRTMIN, {sa_handler=0x7f42c897eb40, sa_mask=[], sa_flags=SA_RESTORER|SA_SIGINFO, sa_restorer=0x7f42c898a5c0}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {sa_handler=0x7f42c897ebd0, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO, sa_restorer=0x7f42c898a5c0}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
brk(NULL)                               = 0xd3c000
brk(0xd5d000)                           = 0xd5d000
brk(NULL)                               = 0xd5d000
write(1, "diskscan version 0.19\n\n", 23) = 23
rt_sigaction(SIGINT, {sa_handler=0x404033, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f42c8bcc990}, NULL, 8) = 0
rt_sigaction(SIGTERM, {sa_handler=0x404033, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f42c8bcc990}, NULL, 8) = 0
write(1, "I: Validating path /dev/sdc\n", 28) = 28
access("/dev/sdc", F_OK)                = 0
access("/dev/sdc", R_OK)                = 0
open("/dev/sdc", O_RDWR|O_DIRECT)       = 3
ioctl(3, SG_IO, {interface_id='S', dxfer_direction=SG_DXFER_FROM_DEV, cmd_len=10, cmdp="\x25\x00\x00\x00\x00\x00\x00\x00\x00\x00", mx_sb_len=128, iovec_count=0, dxfer_len=512, timeout=600000, flags=SG_FLAG_UNUSED_LUN_INHIBIT, dxferp="\x2e\x93\x90\xaf\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"..., status=0, masked_status=0, msg_status=0, sb_len_wr=0, sbp="", host_status=0, driver_status=0, resid=0, duration=0, info=0}) = 0
ioctl(3, SG_IO, {interface_id='S', dxfer_direction=SG_DXFER_FROM_DEV, cmd_len=6, cmdp="\x12\x00\x00\x00\x60\x00", mx_sb_len=128, iovec_count=0, dxfer_len=512, timeout=600000, flags=SG_FLAG_UNUSED_LUN_INHIBIT, dxferp="\x00\x00\x06\x12\x45\x00\x00\x02\x41\x54\x41\x20\x20\x20\x20\x20\x53\x54\x33\x34\x30\x30\x36\x32\x30\x4e\x53\x20\x20\x20\x20\x20"..., status=0, masked_status=0, msg_status=0, sb_len_wr=0, sbp="", host_status=0, driver_status=0, resid=438, duration=1, info=0}) = 0
ioctl(3, SG_IO, {interface_id='S', dxfer_direction=SG_DXFER_FROM_DEV, cmd_len=12, cmdp="\xa1\x0c\x0e\x00\x01\x00\x00\x00\x00\xec\x00\x00", mx_sb_len=128, iovec_count=0, dxfer_len=512, timeout=600000, flags=SG_FLAG_UNUSED_LUN_INHIBIT
@baruch
Copy link
Owner

baruch commented Apr 12, 2017

What drive is this?
Can you show the output of smartctl -a /dev/sdc?

It looks like some diagnostics command didn't return from the drive.

@baruch
Copy link
Owner

baruch commented Apr 12, 2017

No idea why it got hanged like that but I probably should set the timeout to a lower value in any case to avoid it hanging.

baruch added a commit that referenced this issue Apr 12, 2017
Use 5 seconds for diagnostic IOs (read cap, ata identify, etc.)

Use 1 minute for data IOs, should be long enough to be safe for media
errors and not too long to bore the user when there are many errors.

Related to #58
@baruch
Copy link
Owner

baruch commented Apr 12, 2017

In addition to the requested information from above, can you please try the head of the master branch? I reduced the timeout for the stuck IO to 5 seconds but I have no way to test it locally.

@deamen
Copy link
Author

deamen commented Apr 12, 2017

Hi Baruch,
The HDD is a Seagate SATA HDD, ST3400620NS.

The hanging issue only happens when the drive is connected to a LSI SAS HBA, when I connect the drive to an on-board SATA port diskscan works fine.
The LSI SAS HBA information:
02:00.0 Serial Attached SCSI controller: LSI Logic / Symbios Logic SAS2008 PCI-Express Fusion-MPT SAS-2 [Falcon] (rev 03)

I will try your timeout change later today and report back to you.

The HDD information as below:
hdd.txt
Thanks

@deamen
Copy link
Author

deamen commented Apr 12, 2017

Hi Baruch,
I confirm the timeout commit (cd596a1) works, I can cancel diskscan with ctrl + c now:

diskscan version 0.19

I: Validating path /dev/sdc
E: Failed to read SMART attributes from device
I: Opened disk /dev/sdc sector size 512 num bytes 5000981077504
I: Scanning disk /dev/sdc in 65536 byte steps
I: Scan started at: Thu Apr 13 09:01:22 2017

Disk scan |                                                                                                                  | ETA: 0h00m00s
E: Error when reading at offset 0 size 65536 read -1, errno=0: Success

E: Details: error=need_retry data=none 06/29/00

E: Error when reading at offset 40501248 size 65536 read -1, errno=0: Success

E: Details: error=need_retry data=none 06/29/00
^C^C^C^C^C^C^C^C^C^C^C^C^C^C^C^C^C^C^C^C^C^C^C^C
E: Failed to read SMART attributes from device
I: Disk scan interrupted

@MasterCATZ
Copy link

can we have a setting to set our own time out
as 2ms seems to be my max timeout at the moment
still takes a long time to try and cancel

even pkill -9 did not do much to finish it

@baruch
Copy link
Owner

baruch commented Aug 26, 2017

I can add a command line option to set the timeout but a too low timeout will mean that it will be impossible to scan bad disks. I'll also try to think about what else can be done in such cases.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants