跳转至

C/C++

fq2fa 尝试使用 fast_zlib

起因

看到适用于绝大部分临床NGS数据分析的底层高度性能优化方案想测试一下效果

1. 查看 fq2fa.c 文件

cat fq2fa.c

C
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#include <stdio.h>
#include <zlib.h>
#include "klib/kseq.h"


KSEQ_INIT(gzFile, gzread)

int main(int argc, char *argv[])
{

        gzFile fp;
        gzFile fo;
        if (argc < 2 ){
            return -1;
        }
        if ( argc == 3 ){
            fo = gzopen (argv[2], "wb");
        }

        kseq_t *seq;
        int l;
        if (argc == 1){
            fprintf(stderr, "Usage: %s <in.fasta|in.fasta.gz>\n", argv[0]);
            return 1;
        }

        fp = gzopen(argv[1], "r");
        seq = kseq_init(fp); // 分配内存给seq
        while( (l = kseq_read(seq)) >= 0){ //读取数据到seq中
            gzprintf(fo, "%s", seq->name.s);
            gzprintf(fo, "%s", seq->seq.s);
        }

        kseq_destroy(seq); //释放内存
        gzclose(fp);
        if (argc == 3) gzclose(fo);
        return 0;


}

2. 系统安装zlib

Bash
1
yum install -y zlib1g-dev zlib zlib-devel
Text Only
1
apt-get install -y zlib1g zlib1g.dev zlib

3. 下载fast_zlib, 下载zlib

  • 下载 klib
    Text Only
    1
    git clone https://github.com/attractivechaos/klib.git
    
    移动到 fq2fa 文件夹
Bash
1
git clone https://github.com/gildor2/fast_zlib.git
  • http://www.zlib.net/
  • 下载 http://www.zlib.net/zlib-1.2.12.tar.gz
    Bash
    1
    2
    $ sha256sum zlib-1.2.12.tar.gz 
    91844808532e5ce316b3c010929493c0244f3d37593afd6de04f71821d5136d9  zlib-1.2.12.tar.gz
    

4. 修改zlib代码

  1. 复制 fast_zlib/Sources/match.hzlib-1.2.12
  2. cd zlib-1.2.12
  3. mv deflate.c deflate.old.c
  4. vim deflate.c
  5. 写入:
C
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
#define ASMV
#include "deflate.old.c"

#undef local
#define local

#include "match.h"

void match_init()
{
}

5. 编译安装 zlib-1.2.12

Bash
1
2
./configure --prefix=/home/lixy/Clion/fast_zlib_test/zlib-1.2.12/build --shared --static
make && make install

6. 编译链接 fq2fa.c

下载klib

Text Only
1
git clone git@github.com:attractivechaos/klib.git
Text Only
1
2
gcc -o fq2fa_zlib fq2fa.c -lz -Lzlib 
gcc -o fq2fa_fast_zlib fq2fa.c -I/home/lixy/Clion/fast_zlib_test/zlib-1.2.12/build/include -L/home/lixy/Clion/fast_zlib_test/zlib-1.2.12/build/lib -lz

查看 MD5:

Text Only
1
2
3
$  md5sum fq2fa_zlib fq2fa_fast_zlib 
4c03dc0377470f6a589e1bb4a9ffb7b0  fq2fa_zlib
e237f08440a7db5821aa902c5a8cfc1a  fq2fa_fast_zlib

7. 测试 zlib 和 fast_zlib 版本各自的速度

生成测试文件, in.fq.gz 太小,体现不出速度差异

Bash
1
2
3
for i in $(seq 1 4000);do cat in.fq.gz >> test.fq.gz ;done
$ zcat in.fq.gz |wc -l 
4000000

fq2fa_zlib:

Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
$ /bin/time -v ./fq2fa_zlib test.fq.gz test_zlib.fa.gz  
    Command being timed: "./fq2fa_zlib test.fq.gz test_zlib.fa.gz"
    User time (seconds): 35.82
    System time (seconds): 0.17
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:36.29
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 980
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 1
    Minor (reclaiming a frame) page faults: 289
    Voluntary context switches: 14
    Involuntary context switches: 813
    Swaps: 0
    File system inputs: 205752
    File system outputs: 93184
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0

fq2fa_fast_zlib:

Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
$ /bin/time -v ./fq2fa_fast_zlib test.fq.gz test_fast_zlib.fa.gz  
    Command being timed: "./fq2fa_fast_zlib test.fq.gz test_fast_zlib.fa.gz"
    User time (seconds): 34.85
    System time (seconds): 0.11
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:35.31
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 980
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 290
    Voluntary context switches: 5
    Involuntary context switches: 730
    Swaps: 0
    File system inputs: 8
    File system outputs: 93184
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0

查看输出结果一致性:

Bash
1
2
3
$ md5sum test_zlib.fa.gz test_fast_zlib.fa.gz  
5bd3de8cf81c78962aa7100da6ab2719  test_zlib.fa.gz
5bd3de8cf81c78962aa7100da6ab2719  test_fast_zlib.fa.gz

8. 疑问?

  • fast_zlib 对 zlib的优化是否成功? 如果成功了,为什么两个版本的程序速度没有差异

9. 听从大佬建议,使用静态库

或者 直接用

Bash
1
2
3
4
5
gcc -o fq2fa_fast_zlib fq2fa.c /home/lixy/Clion/fast_zlib_test/zlib-1.2.12/build/lib/libz.a

$ md5sum fq2fa_zlib fq2fa_fast_zlib               
4c03dc0377470f6a589e1bb4a9ffb7b0  fq2fa_zlib
262db896e101b93ca1f2b0b7b6ee8ddd  fq2fa_fast_zlib
Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
$ /bin/time -v ./fq2fa_fast_zlib test.fq.gz test_fast_zlib.fa.gz                          
    Command being timed: "./fq2fa_fast_zlib test.fq.gz test_fast_zlib.fa.gz"
    User time (seconds): 24.68
    System time (seconds): 0.12
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:24.99
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1012
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 292
    Voluntary context switches: 6
    Involuntary context switches: 535
    Swaps: 0
    File system inputs: 0
    File system outputs: 93240
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0

可以看到,速度明显快了⅓

10. 发现异常

  • 上述测试是在Centos 7.9, 2 CPUs, 4G MEM 环境下测试
  • 切换至 Ubuntu 18.04, 36 CPUs, 128G MEM / Ubuntu 20.04, 32 CPUs, 128G MEM后,发现 优化后的速度还不如不优化
Bash
1
2
for i in $(seq 1 10);do printf "test.fq.gz ";done
cat test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz test.fq.gz > aa.fq.gz 
Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
$ /usr/bin/time -v ./fq2fa_fast_zlib aa.fq.gz aa_fast_zlib.fa.gz 
    Command being timed: "./fq2fa_fast_zlib aa.fq.gz aa_fast_zlib.fa.gz"
    User time (seconds): 19.61
    System time (seconds): 0.02
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:19.63
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1888
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 152
    Voluntary context switches: 1
    Involuntary context switches: 25
    Swaps: 0
    File system inputs: 0
    File system outputs: 93128
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0
Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
$ /usr/bin/time -v ./fq2fa_zlib aa.fq.gz aa_zlib.fa.gz     
    Command being timed: "./fq2fa_zlib aa.fq.gz aa_zlib.fa.gz"
    User time (seconds): 18.20
    System time (seconds): 0.03
    Percent of CPU this job got: 100%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:18.24
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 2040
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 160
    Voluntary context switches: 1
    Involuntary context switches: 23
    Swaps: 0
    File system inputs: 0
    File system outputs: 93064
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0
  • 推测:
  • Ubuntu 系统上fast_zliblongest_match 函数的实现 与 CentOS 系统上的不同,所以相同的修改效果不显著,甚至是无用的
  • 更换的两个Ubuntu系统均为多核心CPU, 高内存服务器,使得 fast_zliblongest_match 函数的优化仅能在 较少CPU和较少内存是体现优势

11. 解决 10 提出的异常

11.1 重新编译,保持单一变量

  • 上诉两个程序的编译命令不同,不符合单一变量原则
  • 解决:
  • 解压 zlib-1.2.12.tar.gz
  • cp -r zlib-1.2.12 fast_zlib-1.2.12
  • 不修改 zlib 代码,直接编译 zlib-1.2.12
  • 按 4 5 步骤,修改zlib代码,编译 fast_zlib-1.2.12
  • 分别编译链接 fq2fc
    • gcc -o fq2fa_fast_zlib fq2fa.c /home/lixy/myproject/fast_zlib_test/fast_zlib-1.2.12/build/lib/libz.a -I/home/lixy/myproject/fast_zlib_test/fast_zlib-1.2.12/build/include
    • gcc -o fq2fa_zlib fq2fa.c /home/lixy/myproject/fast_zlib_test/zlib-1.2.12/build/lib/libz.a -I/home/lixy/myproject/fast_zlib_test/zlib-1.2.12/build/include

测试两个文件的速度:

Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
$ /usr/bin/time -v ./fq2fa_zlib aa.fq.gz aa_zlib.fa.gz
    Command being timed: "./fq2fa_zlib aa.fq.gz aa_zlib.fa.gz"
    User time (seconds): 28.85
    System time (seconds): 0.05
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:28.91
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1892
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 153
    Voluntary context switches: 1
    Involuntary context switches: 37
    Swaps: 0
    File system inputs: 0
    File system outputs: 93064
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0
Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
$ /usr/bin/time -v ./fq2fa_fast_zlib aa.fq.gz aa_fast_zlib.fa.gz 
    Command being timed: "./fq2fa_fast_zlib aa.fq.gz aa_fast_zlib.fa.gz"
    User time (seconds): 19.87
    System time (seconds): 0.05
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:19.92
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1948
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 152
    Voluntary context switches: 1
    Involuntary context switches: 26
    Swaps: 0
    File system inputs: 0
    File system outputs: 93128
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0
  • 结论:在 ubuntu系统中,fast_zlib 项目对 zlib代码的修改,依旧有较大的速度提升

    • 新的问题:在 ubuntu系统中,直接使用 gcc -o fq2fa_zlib_u fq2fa.c -lz -Lzlib 编译链接,速度比 fast_zlib 修改版的尽然还要稍微快一点,原因是什么?
    • 使用 -lz -Lzlib 时候,使用的是系统的 zlib, 该版本比 zlib-1.2.12 有较大的速度提升 ?
Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
$ /usr/bin/time -v ./fq2fa_zlib-ubuntu aa.fq.gz aa_zlib-ubuntu.fa.gz
    Command being timed: "./fq2fa_zlib-ubuntu aa.fq.gz aa_zlib-ubuntu.fa.gz"
    User time (seconds): 18.59
    System time (seconds): 0.07
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:18.68
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 2020
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 158
    Voluntary context switches: 2
    Involuntary context switches: 24
    Swaps: 0
    File system inputs: 0
    File system outputs: 93064
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0

11.2 查看系统(ubuntu)中zlib的版本

Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
$ cat /usr/lib/x86_64-linux-gnu/pkgconfig/zlib.pc
prefix=/usr
exec_prefix=${prefix}
libdir=${prefix}/lib/x86_64-linux-gnu
sharedlibdir=${libdir}
includedir=${prefix}/include

Name: zlib
Description: zlib compression library
Version: 1.2.11

Requires:
Libs: -L${libdir} -L${sharedlibdir} -lz
Cflags: -I${includedir}

11.3 那么,zlib-1.2.11 会比 zlib-1.2.12 更快吗?

测试如下:

Text Only
1
axel -n 8 https://github.com/madler/zlib/archive/refs/tags/v1.2.11.tar.gz
Text Only
1
2
$ md5sum zlib-1.2.11.tar.gz 
0095d2d2d1f3442ce1318336637b695f  zlib-1.2.11.tar.gz

编译安装

Text Only
1
2
3
mkdir build
./configure --prefix=/home/lixy/myproject/fast_zlib_test/zlib-1.2.11/build  --shared --static
make && make install

编译

Bash
1
2
3
4
5
6
7
8
9
gcc -o fq2fa_zlib-1.2.12 fq2fa.c /home/lixy/myproject/fast_zlib_test/zlib-1.2.12/build/lib/libz.a -I/home/lixy/myproject/fast_zlib_test/zlib-1.2.12/build/include

gcc -o fq2fa_zlib-1.2.11 fq2fa.c /home/lixy/myproject/fast_zlib_test/zlib-1.2.11/build/lib/libz.a -I/home/lixy/myproject/fast_zlib_test/zlib-1.2.11/build/include

gcc -o fq2fa_zlib-ubuntu fq2fa.c -lz -Lzlib

(
    gcc -o fq2fa_zlib-ubuntu fq2fa.c /usr/lib/x86_64-linux-gnu/libz.a -I/usr/include/
)

Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
$ /usr/bin/time -v ./fq2fa_zlib-1.2.11 aa.fq.gz aa_zlib-1.2.11.fa.gz  
    Command being timed: "./fq2fa_zlib-1.2.11 aa.fq.gz aa_zlib-1.2.11.fa.gz"
    User time (seconds): 29.69
    System time (seconds): 0.03
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:29.73
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1948
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 153
    Voluntary context switches: 1
    Involuntary context switches: 38
    Swaps: 0
    File system inputs: 0
    File system outputs: 93064
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0



$ /usr/bin/time -v ./fq2fa_zlib-1.2.12 aa.fq.gz aa_zlib-1.2.12.fa.gz   
    Command being timed: "./fq2fa_zlib-1.2.12 aa.fq.gz aa_zlib-1.2.12.fa.gz"
    User time (seconds): 29.02
    System time (seconds): 0.07
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:29.10
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1948
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 153
    Voluntary context switches: 2
    Involuntary context switches: 39
    Swaps: 0
    File system inputs: 0
    File system outputs: 93064
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0


$ /usr/bin/time -v ./fq2fa_zlib-ubuntu aa.fq.gz aa_zlib-ubuntu.fa.gz 
    Command being timed: "./fq2fa_zlib-ubuntu aa.fq.gz aa_zlib-ubuntu.fa.gz"
    User time (seconds): 18.58
    System time (seconds): 0.03
    Percent of CPU this job got: 100%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:18.61
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 2008
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 157
    Voluntary context switches: 1
    Involuntary context switches: 22
    Swaps: 0
    File system inputs: 0
    File system outputs: 93064
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0
  • 发现,我们编译的 zlib-1.2.11 速度和 zlib-1.2.12 一样,但是却比使用系统默认zlib的版本慢很多
  • 难道是 zlib 在编译的过程中,可以加入一些优化参数?
  • 后续我在 一个docker image 中测试了几种版本的区别,发现,现在 ubuntu 中安装 zlib 相关的包,再 gcc -o fq2fa_zlib-ubuntu fq2fa.c -lz -Lzlib 出的程序,确实比 使用 zlib-1.2.11 速度和 zlib-1.2.12 快,原因未知。

12 为什么系统自带的zlib(deb 1.2.11)比我们手动编译的要快

12.1 查看 apt 安装的软件是什么版本,下载到本地,看一下configure.log

Bash
1
2
3
4
5
6
$ apt list --installed | rg zlib

WARNING: apt does not have a stable CLI interface. Use with caution in scripts.

zlib1g-dev/focal-updates,focal-security,now 1:1.2.11.dfsg-2ubuntu1.3 amd64 [installed]
zlib1g/focal-updates,focal-security,now 1:1.2.11.dfsg-2ubuntu1.3 amd64 [installed,automatic]

网上搜索ubuntu 中得这些包,找到了 build.log 文件 - https://www.ubuntuupdates.org/pm/zlib1g-dev - https://www.ubuntuupdates.org/pm/zlib1g

  • https://launchpadlibrarian.net/593215494/buildlog_ubuntu-focal-amd64.zlib_1%3A1.2.11.dfsg-2ubuntu1.3_BUILDING.txt.gz

build.log 文件 编译命令就是:

Text Only
1
AR=ar CC="x86_64-linux-gnu-gcc" CFLAGS="`dpkg-buildflags --get CFLAGS` `dpkg-buildflags --get CPPFLAGS` -Wall -D_REENTRANT -O3 -DUNALIGNED_OK" LDFLAGS="`dpkg-buildflags --get LDFLAGS`" uname=GNU ./configure --shared --prefix=/usr --libdir=\${prefix}/lib/x86_64-linux-gnu

创建 build.sh

Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
#!/bin/bash
#@File    :   build.sh
#@Time    :   2022/08/18 10:41:29
#@Author  :   biolxy
#@Version :   1.0
#@Contact :   biolxy@aliyun.com
#@Desc    :   None

SCRIPT_FOLDER=$(cd "$(dirname "$0")";pwd)

make distclean
test -d _build && rm -rf _build
mkdir _build


# --static
# --shared

AR=ar CC="x86_64-linux-gnu-gcc" CFLAGS="`dpkg-buildflags --get CFLAGS` `dpkg-buildflags --get CPPFLAGS` -Wall -D_REENTRANT -O3 -DUNALIGNED_OK" LDFLAGS="`dpkg-buildflags --get LDFLAGS`" uname=GNU ./configure --shared --prefix=${SCRIPT_FOLDER}/_build

make && make install

执行 bash ./build 重新编译链接程序,发现 手动编译的程序的速度也来到了 22s, 可以确定确实是不同的编译参数导致zlib库文件的执行效率不同

12.2 fast_zlib 的优化 是否能与 ubuntu zlib的编译参数一起使用

  • 可以,但是没有效,编译出的 fq2fa_fast_zlib-1.2.11 速度还是在 24s, 和未使用 ubuntu zlib的编译参数 前的速度一致

13 测试 zlib-1.3.1 和 fast_zlib-1.2.13 版本各自的速度

  • https://github.com/biolxy/zlib/tree/fast_zlib-v1.2.13

最近 zlib 升级到了 1.3.1 , fast_zlib 也升级到了 zlib-1.2.13

13.1 编译zlib 时不使用优化参数

Bash
1
./configure --prefix=/fast_zlib_test/zlib-1.3.1/_build --shared --static
Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
$ /usr/bin/time -v ./fq2fa_fast_zlib-1.2.13 in.fq.gz out_fq2fa_zlib-1.2.13.fa.gz 
    Command being timed: "./fq2fa_fast_zlib-1.2.13 in.fq.gz out_fq2fa_zlib-1.2.13.fa.gz"
    User time (seconds): 24.58
    System time (seconds): 0.10
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:24.74
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1860
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 357
    Voluntary context switches: 144
    Involuntary context switches: 166
    Swaps: 0
    File system inputs: 0
    File system outputs: 92624
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0

$ /usr/bin/time -v ./fq2fa_zlib-1.3.1 in.fq.gz out_fq2fa_zlib-1.3.1.fa.gz
    Command being timed: "./fq2fa_zlib-1.3.1 in.fq.gz out_fq2fa_zlib-1.3.1.fa.gz"
    User time (seconds): 37.16
    System time (seconds): 0.13
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:37.48
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1828
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 257
    Voluntary context switches: 154
    Involuntary context switches: 300
    Swaps: 0
    File system inputs: 0
    File system outputs: 92608
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0

13.2 编译时添加 -03 优化后

Bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
$ /usr/bin/time -v ./fq2fa_fast_zlib-1.2.13 in.fq.gz out_fq2fa_zlib-1.2.13.fa.gz
    Command being timed: "./fq2fa_fast_zlib-1.2.13 in.fq.gz out_fq2fa_zlib-1.2.13.fa.gz"
    User time (seconds): 23.62
    System time (seconds): 0.12
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:23.84
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1852
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 152
    Voluntary context switches: 188
    Involuntary context switches: 163
    Swaps: 0
    File system inputs: 0
    File system outputs: 92624
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0


$ /usr/bin/time -v ./fq2fa_zlib-1.3.1 in.fq.gz out_fq2fa_zlib-1.3.1.fa.gz
    Command being timed: "./fq2fa_zlib-1.3.1 in.fq.gz out_fq2fa_zlib-1.3.1.fa.gz"
    User time (seconds): 21.15
    System time (seconds): 0.10
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:21.42
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1856
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 357
    Voluntary context switches: 58
    Involuntary context switches: 191
    Swaps: 0
    File system inputs: 0
    File system outputs: 92608
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0
  • 结论1: 编译zlib时不使用 -03时, fast_zlib-1.2.13 和 zlib-1.3.1 耗时比是 24.74 / 37.48 = 0.6601 , 可节约 ⅓ 的时间
  • 结论2: 编译zlib时使用 -03时, fast_zlib-1.2.13 和 zlib-1.3.1 耗时比是 23.84 / 21.42, 不能节约时间
  • 建议: 以后写c/cpp项目多使用 -O3