fq2fa 尝试使用 fast_zlib
起因
看到适用于绝大部分临床NGS数据分析的底层高度性能优化方案想测试一下效果
1. 查看 fq2fa.c 文件
cat fq2fa.c
C |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40 | #include <stdio.h>
#include <zlib.h>
#include "klib/kseq.h"
KSEQ_INIT(gzFile, gzread)
int main(int argc, char *argv[])
{
gzFile fp;
gzFile fo;
if (argc < 2 ){
return -1;
}
if ( argc == 3 ){
fo = gzopen (argv[2], "wb");
}
kseq_t *seq;
int l;
if (argc == 1){
fprintf(stderr, "Usage: %s <in.fasta|in.fasta.gz>\n", argv[0]);
return 1;
}
fp = gzopen(argv[1], "r");
seq = kseq_init(fp); // 分配内存给seq
while( (l = kseq_read(seq)) >= 0){ //读取数据到seq中
gzprintf(fo, "%s", seq->name.s);
gzprintf(fo, "%s", seq->seq.s);
}
kseq_destroy(seq); //释放内存
gzclose(fp);
if (argc == 3) gzclose(fo);
return 0;
}
|
2. 系统安装zlib
Bash |
---|
| yum install -y zlib1g-dev zlib zlib-devel
|
Text Only |
---|
| apt-get install -y zlib1g zlib1g.dev zlib
|
3. 下载fast_zlib, 下载zlib
- 下载 klib
Text Only |
---|
| git clone https://github.com/attractivechaos/klib.git
|
移动到 fq2fa
文件夹
Bash |
---|
| git clone https://github.com/gildor2/fast_zlib.git
|
- http://www.zlib.net/
- 下载 http://www.zlib.net/zlib-1.2.12.tar.gz
Bash |
---|
| $ sha256sum zlib-1.2.12.tar.gz
91844808532e5ce316b3c010929493c0244f3d37593afd6de04f71821d5136d9 zlib-1.2.12.tar.gz
|
4. 修改zlib代码
- 复制
fast_zlib/Sources/match.h
到 zlib-1.2.12
cd zlib-1.2.12
mv deflate.c deflate.old.c
vim deflate.c
- 写入:
C |
---|
| #define ASMV
#include "deflate.old.c"
#undef local
#define local
#include "match.h"
void match_init()
{
}
|
5. 编译安装 zlib-1.2.12
Bash |
---|
| ./configure --prefix=/home/lixy/Clion/fast_zlib_test/zlib-1.2.12/build --shared --static
make && make install
|
6. 编译链接 fq2fa.c
下载klib
Text Only |
---|
| git clone git@github.com:attractivechaos/klib.git
|
Text Only |
---|
| gcc -o fq2fa_zlib fq2fa.c -lz -Lzlib
gcc -o fq2fa_fast_zlib fq2fa.c -I/home/lixy/Clion/fast_zlib_test/zlib-1.2.12/build/include -L/home/lixy/Clion/fast_zlib_test/zlib-1.2.12/build/lib -lz
|
查看 MD5:
Text Only |
---|
| $ md5sum fq2fa_zlib fq2fa_fast_zlib
4c03dc0377470f6a589e1bb4a9ffb7b0 fq2fa_zlib
e237f08440a7db5821aa902c5a8cfc1a fq2fa_fast_zlib
|
7. 测试 zlib 和 fast_zlib 版本各自的速度
生成测试文件, in.fq.gz 太小,体现不出速度差异
Bash |
---|
| for i in $(seq 1 4000);do cat in.fq.gz >> test.fq.gz ;done
$ zcat in.fq.gz |wc -l
4000000
|
fq2fa_zlib:
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | $ /bin/time -v ./fq2fa_zlib test.fq.gz test_zlib.fa.gz
Command being timed: "./fq2fa_zlib test.fq.gz test_zlib.fa.gz"
User time (seconds): 35.82
System time (seconds): 0.17
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:36.29
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 980
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 1
Minor (reclaiming a frame) page faults: 289
Voluntary context switches: 14
Involuntary context switches: 813
Swaps: 0
File system inputs: 205752
File system outputs: 93184
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
|
fq2fa_fast_zlib:
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | $ /bin/time -v ./fq2fa_fast_zlib test.fq.gz test_fast_zlib.fa.gz
Command being timed: "./fq2fa_fast_zlib test.fq.gz test_fast_zlib.fa.gz"
User time (seconds): 34.85
System time (seconds): 0.11
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:35.31
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 980
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 290
Voluntary context switches: 5
Involuntary context switches: 730
Swaps: 0
File system inputs: 8
File system outputs: 93184
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
|
查看输出结果一致性:
Bash |
---|
| $ md5sum test_zlib.fa.gz test_fast_zlib.fa.gz
5bd3de8cf81c78962aa7100da6ab2719 test_zlib.fa.gz
5bd3de8cf81c78962aa7100da6ab2719 test_fast_zlib.fa.gz
|
8. 疑问?
- fast_zlib 对 zlib的优化是否成功? 如果成功了,为什么两个版本的程序速度没有差异
9. 听从大佬建议,使用静态库
或者 直接用
Bash |
---|
| gcc -o fq2fa_fast_zlib fq2fa.c /home/lixy/Clion/fast_zlib_test/zlib-1.2.12/build/lib/libz.a
$ md5sum fq2fa_zlib fq2fa_fast_zlib
4c03dc0377470f6a589e1bb4a9ffb7b0 fq2fa_zlib
262db896e101b93ca1f2b0b7b6ee8ddd fq2fa_fast_zlib
|
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | $ /bin/time -v ./fq2fa_fast_zlib test.fq.gz test_fast_zlib.fa.gz
Command being timed: "./fq2fa_fast_zlib test.fq.gz test_fast_zlib.fa.gz"
User time (seconds): 24.68
System time (seconds): 0.12
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:24.99
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1012
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 292
Voluntary context switches: 6
Involuntary context switches: 535
Swaps: 0
File system inputs: 0
File system outputs: 93240
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
|
可以看到,速度明显快了⅓
10. 发现异常
- 上述测试是在Centos 7.9, 2 CPUs, 4G MEM 环境下测试
- 切换至 Ubuntu 18.04, 36 CPUs, 128G MEM / Ubuntu 20.04, 32 CPUs, 128G MEM后,发现 优化后的速度还不如不优化
Bash |
---|
| for i in $(seq 1 10);do printf "test.fq.gz ";done
cat test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz > aa.fq.gz
|
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | $ /usr/bin/time -v ./fq2fa_fast_zlib aa.fq.gz aa_fast_zlib.fa.gz
Command being timed: "./fq2fa_fast_zlib aa.fq.gz aa_fast_zlib.fa.gz"
User time (seconds): 19.61
System time (seconds): 0.02
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:19.63
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1888
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 152
Voluntary context switches: 1
Involuntary context switches: 25
Swaps: 0
File system inputs: 0
File system outputs: 93128
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
|
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | $ /usr/bin/time -v ./fq2fa_zlib aa.fq.gz aa_zlib.fa.gz
Command being timed: "./fq2fa_zlib aa.fq.gz aa_zlib.fa.gz"
User time (seconds): 18.20
System time (seconds): 0.03
Percent of CPU this job got: 100%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:18.24
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 2040
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 160
Voluntary context switches: 1
Involuntary context switches: 23
Swaps: 0
File system inputs: 0
File system outputs: 93064
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
|
- 推测:
- Ubuntu 系统上
fast_zlib
对 longest_match
函数的实现 与 CentOS 系统上的不同,所以相同的修改效果不显著,甚至是无用的
- 更换的两个Ubuntu系统均为多核心CPU, 高内存服务器,使得
fast_zlib
对 longest_match
函数的优化仅能在 较少CPU和较少内存是体现优势
11. 解决 10 提出的异常
11.1 重新编译,保持单一变量
- 上诉两个程序的编译命令不同,不符合单一变量原则
- 解决:
- 解压 zlib-1.2.12.tar.gz
cp -r zlib-1.2.12 fast_zlib-1.2.12
- 不修改 zlib 代码,直接编译
zlib-1.2.12
- 按 4 5 步骤,修改zlib代码,编译
fast_zlib-1.2.12
- 分别编译链接
fq2fc
gcc -o fq2fa_fast_zlib fq2fa.c /home/lixy/myproject/fast_zlib_test/fast_zlib-1.2.12/build/lib/libz.a -I/home/lixy/myproject/fast_zlib_test/fast_zlib-1.2.12/build/include
gcc -o fq2fa_zlib fq2fa.c /home/lixy/myproject/fast_zlib_test/zlib-1.2.12/build/lib/libz.a -I/home/lixy/myproject/fast_zlib_test/zlib-1.2.12/build/include
测试两个文件的速度:
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | $ /usr/bin/time -v ./fq2fa_zlib aa.fq.gz aa_zlib.fa.gz
Command being timed: "./fq2fa_zlib aa.fq.gz aa_zlib.fa.gz"
User time (seconds): 28.85
System time (seconds): 0.05
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:28.91
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1892
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 153
Voluntary context switches: 1
Involuntary context switches: 37
Swaps: 0
File system inputs: 0
File system outputs: 93064
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
|
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | $ /usr/bin/time -v ./fq2fa_fast_zlib aa.fq.gz aa_fast_zlib.fa.gz
Command being timed: "./fq2fa_fast_zlib aa.fq.gz aa_fast_zlib.fa.gz"
User time (seconds): 19.87
System time (seconds): 0.05
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:19.92
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1948
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 152
Voluntary context switches: 1
Involuntary context switches: 26
Swaps: 0
File system inputs: 0
File system outputs: 93128
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
|
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 | $ /usr/bin/time -v ./fq2fa_zlib-ubuntu aa.fq.gz aa_zlib-ubuntu.fa.gz
Command being timed: "./fq2fa_zlib-ubuntu aa.fq.gz aa_zlib-ubuntu.fa.gz"
User time (seconds): 18.59
System time (seconds): 0.07
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:18.68
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 2020
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 158
Voluntary context switches: 2
Involuntary context switches: 24
Swaps: 0
File system inputs: 0
File system outputs: 93064
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
|
11.2 查看系统(ubuntu)中zlib的版本
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14 | $ cat /usr/lib/x86_64-linux-gnu/pkgconfig/zlib.pc
prefix=/usr
exec_prefix=${prefix}
libdir=${prefix}/lib/x86_64-linux-gnu
sharedlibdir=${libdir}
includedir=${prefix}/include
Name: zlib
Description: zlib compression library
Version: 1.2.11
Requires:
Libs: -L${libdir} -L${sharedlibdir} -lz
Cflags: -I${includedir}
|
11.3 那么,zlib-1.2.11
会比 zlib-1.2.12
更快吗?
测试如下:
Text Only |
---|
| axel -n 8 https://github.com/madler/zlib/archive/refs/tags/v1.2.11.tar.gz
|
Text Only |
---|
| $ md5sum zlib-1.2.11.tar.gz
0095d2d2d1f3442ce1318336637b695f zlib-1.2.11.tar.gz
|
编译安装
Text Only |
---|
| mkdir build
./configure --prefix=/home/lixy/myproject/fast_zlib_test/zlib-1.2.11/build --shared --static
make && make install
|
编译
Bash |
---|
| gcc -o fq2fa_zlib-1.2.12 fq2fa.c /home/lixy/myproject/fast_zlib_test/zlib-1.2.12/build/lib/libz.a -I/home/lixy/myproject/fast_zlib_test/zlib-1.2.12/build/include
gcc -o fq2fa_zlib-1.2.11 fq2fa.c /home/lixy/myproject/fast_zlib_test/zlib-1.2.11/build/lib/libz.a -I/home/lixy/myproject/fast_zlib_test/zlib-1.2.11/build/include
gcc -o fq2fa_zlib-ubuntu fq2fa.c -lz -Lzlib
(
gcc -o fq2fa_zlib-ubuntu fq2fa.c /usr/lib/x86_64-linux-gnu/libz.a -I/usr/include/
)
|
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77 | $ /usr/bin/time -v ./fq2fa_zlib-1.2.11 aa.fq.gz aa_zlib-1.2.11.fa.gz
Command being timed: "./fq2fa_zlib-1.2.11 aa.fq.gz aa_zlib-1.2.11.fa.gz"
User time (seconds): 29.69
System time (seconds): 0.03
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:29.73
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1948
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 153
Voluntary context switches: 1
Involuntary context switches: 38
Swaps: 0
File system inputs: 0
File system outputs: 93064
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
$ /usr/bin/time -v ./fq2fa_zlib-1.2.12 aa.fq.gz aa_zlib-1.2.12.fa.gz
Command being timed: "./fq2fa_zlib-1.2.12 aa.fq.gz aa_zlib-1.2.12.fa.gz"
User time (seconds): 29.02
System time (seconds): 0.07
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:29.10
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1948
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 153
Voluntary context switches: 2
Involuntary context switches: 39
Swaps: 0
File system inputs: 0
File system outputs: 93064
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
$ /usr/bin/time -v ./fq2fa_zlib-ubuntu aa.fq.gz aa_zlib-ubuntu.fa.gz
Command being timed: "./fq2fa_zlib-ubuntu aa.fq.gz aa_zlib-ubuntu.fa.gz"
User time (seconds): 18.58
System time (seconds): 0.03
Percent of CPU this job got: 100%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:18.61
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 2008
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 157
Voluntary context switches: 1
Involuntary context switches: 22
Swaps: 0
File system inputs: 0
File system outputs: 93064
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
|
- 发现,我们编译的
zlib-1.2.11
速度和 zlib-1.2.12
一样,但是却比使用系统默认zlib的版本慢很多
- 难道是 zlib 在编译的过程中,可以加入一些优化参数?
- 后续我在 一个docker image 中测试了几种版本的区别,发现,现在 ubuntu 中安装 zlib 相关的包,再
gcc -o fq2fa_zlib-ubuntu fq2fa.c -lz -Lzlib
出的程序,确实比 使用 zlib-1.2.11
速度和 zlib-1.2.12
快,原因未知。
12 为什么系统自带的zlib(deb 1.2.11)比我们手动编译的要快
Bash |
---|
| $ apt list --installed | rg zlib
WARNING: apt does not have a stable CLI interface. Use with caution in scripts.
zlib1g-dev/focal-updates,focal-security,now 1:1.2.11.dfsg-2ubuntu1.3 amd64 [installed]
zlib1g/focal-updates,focal-security,now 1:1.2.11.dfsg-2ubuntu1.3 amd64 [installed,automatic]
|
网上搜索ubuntu 中得这些包,找到了 build.log 文件
- https://www.ubuntuupdates.org/pm/zlib1g-dev
- https://www.ubuntuupdates.org/pm/zlib1g
- https://launchpadlibrarian.net/593215494/buildlog_ubuntu-focal-amd64.zlib_1%3A1.2.11.dfsg-2ubuntu1.3_BUILDING.txt.gz
build.log 文件 编译命令就是:
Text Only |
---|
| AR=ar CC="x86_64-linux-gnu-gcc" CFLAGS="`dpkg-buildflags --get CFLAGS` `dpkg-buildflags --get CPPFLAGS` -Wall -D_REENTRANT -O3 -DUNALIGNED_OK" LDFLAGS="`dpkg-buildflags --get LDFLAGS`" uname=GNU ./configure --shared --prefix=/usr --libdir=\${prefix}/lib/x86_64-linux-gnu
|
创建 build.sh
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 | #!/bin/bash
#@File : build.sh
#@Time : 2022/08/18 10:41:29
#@Author : biolxy
#@Version : 1.0
#@Contact : biolxy@aliyun.com
#@Desc : None
SCRIPT_FOLDER=$(cd "$(dirname "$0")";pwd)
make distclean
test -d _build && rm -rf _build
mkdir _build
# --static
# --shared
AR=ar CC="x86_64-linux-gnu-gcc" CFLAGS="`dpkg-buildflags --get CFLAGS` `dpkg-buildflags --get CPPFLAGS` -Wall -D_REENTRANT -O3 -DUNALIGNED_OK" LDFLAGS="`dpkg-buildflags --get LDFLAGS`" uname=GNU ./configure --shared --prefix=${SCRIPT_FOLDER}/_build
make && make install
|
执行 bash ./build
重新编译链接程序,发现 手动编译的程序的速度也来到了 22s
, 可以确定确实是不同的编译参数导致zlib库文件的执行效率不同
12.2 fast_zlib 的优化 是否能与 ubuntu zlib的编译参数一起使用
- 可以,但是没有效,编译出的
fq2fa_fast_zlib-1.2.11
速度还是在 24s
, 和未使用 ubuntu zlib的编译参数 前的速度一致
13 测试 zlib-1.3.1 和 fast_zlib-1.2.13 版本各自的速度
- https://github.com/biolxy/zlib/tree/fast_zlib-v1.2.13
最近 zlib 升级到了 1.3.1 , fast_zlib 也升级到了 zlib-1.2.13
13.1 编译zlib 时不使用优化参数
Bash |
---|
| ./configure --prefix=/fast_zlib_test/zlib-1.3.1/_build --shared --static
|
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 | $ /usr/bin/time -v ./fq2fa_fast_zlib-1.2.13 in.fq.gz out_fq2fa_zlib-1.2.13.fa.gz
Command being timed: "./fq2fa_fast_zlib-1.2.13 in.fq.gz out_fq2fa_zlib-1.2.13.fa.gz"
User time (seconds): 24.58
System time (seconds): 0.10
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:24.74
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1860
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 357
Voluntary context switches: 144
Involuntary context switches: 166
Swaps: 0
File system inputs: 0
File system outputs: 92624
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
$ /usr/bin/time -v ./fq2fa_zlib-1.3.1 in.fq.gz out_fq2fa_zlib-1.3.1.fa.gz
Command being timed: "./fq2fa_zlib-1.3.1 in.fq.gz out_fq2fa_zlib-1.3.1.fa.gz"
User time (seconds): 37.16
System time (seconds): 0.13
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:37.48
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1828
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 257
Voluntary context switches: 154
Involuntary context switches: 300
Swaps: 0
File system inputs: 0
File system outputs: 92608
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
|
13.2 编译时添加 -03
优化后
Bash |
---|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50 | $ /usr/bin/time -v ./fq2fa_fast_zlib-1.2.13 in.fq.gz out_fq2fa_zlib-1.2.13.fa.gz
Command being timed: "./fq2fa_fast_zlib-1.2.13 in.fq.gz out_fq2fa_zlib-1.2.13.fa.gz"
User time (seconds): 23.62
System time (seconds): 0.12
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:23.84
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1852
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 152
Voluntary context switches: 188
Involuntary context switches: 163
Swaps: 0
File system inputs: 0
File system outputs: 92624
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
$ /usr/bin/time -v ./fq2fa_zlib-1.3.1 in.fq.gz out_fq2fa_zlib-1.3.1.fa.gz
Command being timed: "./fq2fa_zlib-1.3.1 in.fq.gz out_fq2fa_zlib-1.3.1.fa.gz"
User time (seconds): 21.15
System time (seconds): 0.10
Percent of CPU this job got: 99%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:21.42
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1856
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 357
Voluntary context switches: 58
Involuntary context switches: 191
Swaps: 0
File system inputs: 0
File system outputs: 92608
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
|
- 结论1: 编译zlib时不使用
-03
时, fast_zlib-1.2.13 和 zlib-1.3.1 耗时比是 24.74 / 37.48 = 0.6601 , 可节约 ⅓ 的时间
- 结论2: 编译zlib时使用
-03
时, fast_zlib-1.2.13 和 zlib-1.3.1 耗时比是 23.84 / 21.42, 不能节约时间
- 建议: 以后写c/cpp项目多使用
-O3