如何写Linux下的tar打包软件？

相信你对 linux 的 .tar.gz 有点熟悉，这就是先 tar 打包（.tar 后缀），再对此 tar 文件用 gzip 压缩（.tar.gz）的后缀名。
值得注意的是， tar 不是压缩软件，它只做把一堆文件/文件夹打包到一个文件（tar 文件）里的事情，而文件联系，文件权限，相对的路径等都会给你保存好。一开始设计是 tar 跟 gzip 只做一件事情，各司其事，后来发现太麻烦了，于是就把压缩功能整合到 tar 里了。
- create a gzipped archive：
tar czf target.tar.gz file1 file2 file3
最近学习 os 时写了一个类似 tar 的项目，那么今天就趁热打铁简单说一下如何写一个打包软件，这个软件会将重复的文件内容通过 md5 比较，复用旧的内容。
基本单位 block
block 可以理解为文件系统的最小单位，分别有以下类型：
•directory block，文件夹 block，存储文件夹 meta 信息；
•file block，文件 block，存储文件 meta 信息；
•data block，只用来存文件内容；
directory block，注意的是 entry 里要有 fileindex 来存储重复文件的 name 的下标。同时，给项目一个 root dir。
typedef struct {
char name［sifs_max_name_length］; // name of the directory
time_t modtime; // time last modified 《- time（）
uint32_t nentries;// 文件夹内的文件/文件夹数量
struct {
sifs_blockid blockid; // subdirectory 或者 file 的 blockid
uint32_t fileindex; // 重复文件的不同名字
} entries［sifs_max_entries］;
} sifs_dirblock;
文件 block，length 就是有多少 bytes 的文件内容，之后用来算有多少个 data block，firstblockid 记录第一个数据 block 的 id，nfiles 记录有多少重复内容的文件数量了，filenames 就是重复此文件 block 的文件内容的文件名字。
typedef struct {
time_t modtime; // time first file added 《- time（）
size_t length; // length of files‘ contents in bytes
unsigned char md5［md5_bytelen］;//the md5 cryptographic digest （a summary） of the files’ contents
sifs_blockid firstblockid;// the block number （blockid） of the files‘ first data-block
uint32_t nfiles; // n files with identical contents
char filenames［sifs_max_entries］［sifs_max_name_length］;// an array of each same file’s name and its modification time.
} sifs_fileblock;
bitmaps数组，记录了每个 block 的类型，有：文件、文件夹以及data block 三种类型。
通用函数
就让大家看看关键函数好了：
读 tar 后的文件的 meta 头，记录了 block 的大小（ blocksize）以及多少个 blocks。
void read_vol_header（file *vol， sifs_volume_header *header） {
fread（header， sizeof（sifs_volume_header）， 1， vol）;
printf（“header-》blocksize %zu， header-》nblocks %u
”， header-》blocksize ， header-》nblocks）;
}
bitmap，每次操作 tar 文件都要读的。
void read_bitmap（file *vol， sifs_bit *bitmap， int nblocks） {
int size = nblocks * sizeof（sifs_bit）;
fread（bitmap， size， 1， vol）;
}
root_block 同理，读和写啥东西都要从 root block、root dir 出发。
void read_root_block（file *vol， sifs_dirblock *dirblock）{
fread（dirblock， sizeof（sifs_dirblock）， 1， vol）;
printf（“read_root_block finish， dirblock.name： %s， dirblock.entrieds： %d， dirblock.modtime %ld
”， dirblock-》name， dirblock-》nentries，dirblock-》modtime）;
}
路径嘛，你懂的，。/sifs_put volumn ~/res.txt /dirb/subdirb/subsubdir/newfileb，要读的内容可以靠 read 函数解决，但是写到 tar 文件里的就要手动解析递归查路径了。
void read_route_names（char* pathname， char** route_names， int *route_cnt） {
char *dir;
char *pathname_to_split = copystr（pathname）;
strcpy（pathname_to_split， pathname）;
while （（dir = strsep（&pathname_to_split， “/”））！= null） {
route_names［*route_cnt］ = copystr（dir）;
（*route_cnt）++;
}
}
以上几乎是 mkdir，rmdir，writefile，readfile，putfile 等等操作都要做的。
实现
然后，应该举一个 readfile 的例子就可以做代表了。
int recursive_dirinfo（sifs_dirblock *cur_dir_block， char **route_names， int route_name_p， int route_cnt）;
实现：
int recursive_dirinfo（sifs_dirblock *cur_dir_block， char **route_names， int route_name_p， int route_cnt） {
for（int i=0; i《cur_dir_block-》nentries ; i++） {
int blockid = cur_dir_block-》entries［i］.blockid;
if（bitmap［blockid］==sifs_dir） {
sifs_dirblock dirblock;
int start = sizeof（sifs_volume_header） + header.nblocks*sizeof（sifs_bit）;
read_dir_block（vol， &dirblock， blockid * blocksize， start）;
if（strcmp（dirblock.name， route_names［route_name_p］） == 0） {
if（route_name_p+2 == route_cnt） {
return do_read_file（cur_dir_block， route_names［route_name_p+1］， blockid）;
}
return recursive_dirinfo（&dirblock， route_names， route_name_p+1， route_cnt）;
}
}
}
return 1;
}
以``。/sifs_put volumn ~/res.txt /dirb/subdirb/subsubdir/newfileb 为例子，如果递归找到 subsubdir`这个文件夹 block，进行相应操作：
•写文件就往 bitmap 一直找没有用过的 block，够写文件就写进去，文件夹更新一下信息。
•读文件就是根据此文件夹 block，找里面的 newfileb
int do_read_file（sifs_dirblock *parent_dir， char *filename， int parent_dir_block） {
printf（“do_find_file_info， filename %s
”， filename）;
for（int i=1; i《header.nblocks ; i++） {
sifs_fileblock fileblock;
if（bitmap［i］==sifs_file） {
int start = sizeof（sifs_volume_header） + header.nblocks*sizeof（sifs_bit）;
read_file_block（vol， &fileblock， i * blocksize， start）;
*nbytes = fileblock.length;
int need_data_blocks = *nbytes / header.blocksize;
if（strcmp（fileblock.filenames［0］， filename） == 0） {
for（int d_block_id = fileblock.firstblockid; d_block_id - i -1 《 need_data_blocks; d_block_id++） {
read_data_block（vol，（char*）（*data）+（d_block_id - i -1）， blocksize， d_block_id * header.blocksize， start）;
}
return 0;
}
}
}
return 1;
}
而真实的 tar 自然更复杂，还要记录用户权限、用户、group文件等等：
struct posix_header
{ /* byte offset */
char name［100］; /* 0 */ 文件名
char mode［8］; /* 100 */ 用户权限
char uid［8］; /* 108 */ user id
char gid［8］; /* 116 */ group id
char size［12］; /* 124 */ 文件大小
char mtime［12］; /* 136 */ 修改时间
char chksum［8］; /* 148 */ 校验值
char typeflag; /* 156 */ 文件类型标志
char linkname［100］; /* 157 */ 符号链接指向
char magic［6］; /* 257 */
char version［2］; /* 263 */
char uname［32］; /* 265 */ user name
char gname［32］; /* 297 */ group name
char devmajor［8］; /* 329 */ 设备文件 major
char devminor［8］; /* 337 */ 设备文件 minor
char prefix［155］; /* 345 */
/* 500 */
};
文件类型标志定义，包含了所有 unix 系统中的文件类型
#define regtype ‘0’ /* regular file */
#define lnktype ‘1’ /* link */
#define symtype ‘2’ /* reserved */
#define chrtype ‘3’ /* character special */
#define blktype ‘4’ /* block special */
#define dirtype ‘5’ /* directory */
#define fifotype ‘6’ /* fifo special */
#define conttype ‘7’ /* reserved */
概览如此，写起来其实有点烦 - = -，有兴趣的读者可以写写。
原文标题：带你写一个 linux 下的打包软件 tar
文章出处：【微信公众号：linux爱好者】欢迎添加关注！文章转载请注明出处。

怎样建立一个电子设备电磁兼容的仿真模型？
FC装配技术最全资料
Maxim推出完全集成的PMIC，为LCD TV提供紧凑的高
深度学习对于生物学有什么影响
电子行业设备对用锂离子电池和电池组的安全要求
如何写Linux下的tar打包软件？
人类科技的下一个时代将是VR/AR的时代
一二次融合环网柜和普通环网柜区别一二次融合环网柜结构分析
汽车行业将步入电动化、智能化、网联化新时代
MathType怎么保存常用公式的办法
基于74LS161的扭环形计数器自启动设计
最新专利深入“轻量化”！华为这样做？
全志D1和t113对比
什么样的电机最好？
如何延长智能工厂无线信标的使用寿命
NAND Flash 原理深度解析（上）
银行业场景或将为虹膜识别技术接下来主要的市场增长点
你是否选择了合适的电线？
如何操作业余RC：升级您的汽车和电池
DEKRA德凯上海可再生能源测试中心满足企业所需的测试与认证需求