远行的心 发表于 2018-10-1 07:40:05

mysql 从源码解读.frm文件构成

  Mysql的.frm文件是用于保存表结构信息的文件,最近从源码了解了一下.frm的组成,在网上也有一些大神对.frm的组成做过分享,自己再去了解一下总是没坏处的,.frm文件的创建在create_frm函数,但是这个函数只有对header部分的记录信息,具体表结构信息未找到在哪个函数生成的,不过有个open_binary_frm函数是对.frm文件的读取可以了解.frm的详细结构组成,注意下面的函数都是从5.7.19的版本中提取,5.7之前的版本可能有点不同,比如虚拟列
  首先看看create_frm创建header的组成部分
  File create_frm(THD *thd, const char *name, const char *db,
  const char *table, uint reclength, uchar *fileinfo,
  HA_CREATE_INFO *create_info, uint keys, KEY *key_info)
  {
  ........................
  if ((file= mysql_file_create(key_file_frm,
  name, CREATE_MODE, create_flags, MYF(0))) >= 0)
  {
  size_t key_length, tmp_key_length;
  uint tmp, csid;
  memset(fileinfo, 0, 64);
  /* header */
  /*这里开始对header组装,这部分在官网文档中又介绍*/
  fileinfo=(uchar) 254;
  fileinfo= 1;
  /*FRM_VER是frm文件版本*/
  fileinfo= FRM_VER+3+ MY_TEST(create_info->varchar);
  /*表引擎类型的记录,ha_legacy_type这个enum类型中记录了所有mysql目前支持的引擎类型*/
  fileinfo= (uchar) ha_legacy_type(
  ha_checktype(thd,ha_legacy_type(create_info->db_type),0,0));
  fileinfo=1;
  /*这里记录IO_SIZE=4096,在frm中一次写入都是按4k大小计算*/
  int2store(fileinfo+6,IO_SIZE);/* Next block starts here */
  /*
  Keep in sync with pack_keys() in unireg.cc
  For each key:
  8 bytes for the key header
  9 bytes for each key-part (MAX_REF_PARTS)
  NAME_LEN bytes for the name
  1 byte for the NAMES_SEP_CHAR (before the name)
  For all keys:
  6 bytes for the header
  1 byte for the NAMES_SEP_CHAR (after the last name)
  9 extra bytes (padding for safety? alignment?)
  */
  /*下面这个for循环有对索引备注进行操作,索引存储固定6bytes作为header,每个索引有8bytes记录索引基础信息,9bytes记录索引的每个字段信息,如果是组合索引,有几个字段就需要几个9bytes,这个再open_binary_frm中可以得到印证*/
  for (i= 0; i < keys; i++)
  {
  DBUG_ASSERT(MY_TEST(key_info.flags & HA_USES_COMMENT) ==
  (key_info.comment.length > 0));
  if (key_info.flags & HA_USES_COMMENT)
  key_comment_total_bytes += 2 + key_info.comment.length;
  }
  /*这下面都是对数据的存储*/
  key_length= keys * (8 + MAX_REF_PARTS * 9 + NAME_LEN + 1) + 16
  + key_comment_total_bytes;
  length= next_io_size((ulong) (IO_SIZE+key_length+reclength+
  create_info->extra_size));
  int4store(fileinfo+10,length);
  tmp_key_length= (key_length < 0xffff) ? key_length : 0xffff;
  int2store(fileinfo+14, static_cast(tmp_key_length));
  int2store(fileinfo+16,reclength);
  int4store(fileinfo+18, static_cast(create_info->max_rows));
  int4store(fileinfo+22, static_cast(create_info->min_rows));
  /* fileinfo is set in mysql_create_frm() */
  fileinfo=2;// Use long pack-fields
  /* fileinfo is set to key_info_length in mysql_create_frm() */
  create_info->table_options|=HA_OPTION_LONG_BLOB_PTR; // Use portable blob pointers
  int2store(fileinfo+30, static_cast(create_info->table_options));
  fileinfo=0;// No filename anymore
  fileinfo=5;                           // Mark for 5.0 frm file
  int4store(fileinfo+34,create_info->avg_row_length);
  csid= (create_info->default_table_charset ?
  create_info->default_table_charset->number : 0);
  fileinfo= (uchar) csid;
  /*
  In future versions, we will store in fileinfo the values of the
  TRANSACTIONAL and PAGE_CHECKSUM clauses of CREATE TABLE.
  */
  fileinfo= 0;
  fileinfo= (uchar) create_info->row_type;
  /* Bytes 41-46 were for RAID support; now reused for other purposes */
  fileinfo= (uchar) (csid >> 8);
  int2store(fileinfo+42, create_info->stats_sample_pages & 0xffff);
  fileinfo= (uchar) create_info->stats_auto_recalc;
  fileinfo= 0;
  fileinfo= 0;
  int4store(fileinfo+47, static_cast(key_length));
  tmp= MYSQL_VERSION_ID;          // Store to avoid warning from int4store
  int4store(fileinfo+51, tmp);
  int4store(fileinfo+55, create_info->extra_size);
  /*
  59-60 is reserved for extra_rec_buf_length,
  61 for default_part_db_type
  */
  int2store(fileinfo+62, static_cast(create_info->key_block_size));
  .........................................................................
  } /* create_frm */
  从函数代码得出64bytes的组成如下:
  position,bytes(64字节的header)
  0,1: 固定值254
  1,1: 固定值1
  2,1: FRM_VER+3+MY_TEST(create_info->varchar) frm_ver表示frm文件版本号
  3,1: (uchar) ha_legacy_type(ha_checktype(thd,ha_legacy_type(create_info->db_type),0,0)) 引擎类型
  4,1: 固定值1
  5,1: 0
  6,2: IO_size=4096
  8,2: 固定值
  10,4: Length, based on key_length + rec_length + create_info->extra_size
  14,2: tmp_key_length
  16,2: reclength
  18,4: max_rows
  22,4: min_rows
  26,1: 0
  27,1: 固定值2
  28,2: key_info_length
  30,2: blob指针,create_info->table_options
  32,1: 固定值0
  33,1: 固定值5 代表frm版本为5.0
  34,4: create_info->avg_row_length行平均长度
  38,1: 表默认字符集编号
  39,1: 固定值0
  40,1: create_info->row_type
  41,6: 这6个字节记录RAID支持
  47,4: key_length   用于偏移读取默认值信息,默认值其实位置在io_size+(key_length or tmp_key_length)
  51,4: mysql版本号
  55,4: create_info->extra_size
  59,2: extra_rec_buf_length
  61,1: default_part_db_type 分区表类型
  62,2: create_info->key_block_size
  下面再看open_binary_frm的组成
  static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
  File file)
  {
  ...............................................................................................................................
  field_pack_length= new_frm_ver < 2 ? 11 : 17; //单个字段基本信息占用字节数
  disk_buff= 0;
  error= 3;
  /* Position of the form in the form file. */
  if (!(pos= get_form_pos(file, head)))//pos是字段保存信息的起始位置,由header + (header * 4)决定,记录在(header * 4)头4bytes中
  goto err;                                 /* purecov: inspected */
  mysql_file_seek(file,pos,MY_SEEK_SET,MYF(0));
  if (mysql_file_read(file, forminfo,288,MYF(MY_NABP))) //288个基本字节
  goto err;
  /*这一部分是对.frm的header进行读取,和create_frm差不多,直接省略*/
  .......................................................................................................................................................
  /* Read keyinformation */
  key_info_length= (uint) uint2korr(head+28); //索引占的总字节数,在.frm的header中,偏移位置是28占用2bytes
  mysql_file_seek(file, (ulong) uint2korr(head+6), MY_SEEK_SET, MYF(0)); //跳到4096字节处(IO_SIZE),frm文件头的64字节也包含在其中,为固定字节
  if (read_string(file, &disk_buff,key_info_length)) //读取索引信息到disk_buff中
  goto err;                                 /* purecov: inspected */
  if (disk_buff & 0x80)
  {
  share->keys=      keys=      (disk_buff key_parts= key_parts= uint2korr(disk_buff+2);
  }
  else
  {
  share->keys=      keys=      disk_buff;//索引数量
  share->key_parts= key_parts= disk_buff;//所有索引的字段数
  }
  share->keys_for_keyread.init(0);
  share->keys_in_use.init(keys);
  strpos=disk_buff+6;   // 6字节的索引基本信息
  use_extended_sk=
  ha_check_storage_engine_flag(share->db_type(),
  HTON_SUPPORTS_EXTENDED_KEYS);
  uint total_key_parts;
  if (use_extended_sk)
  {
  uint primary_key_parts= keys ?
  (new_frm_ver >= 3) ? (uint) strpos : (uint) strpos : 0;
  total_key_parts= key_parts + primary_key_parts * (keys - 1);
  }
  else
  total_key_parts= key_parts;

  n_length= keys *>  ..........................................................................................................................................................
  //循环解析索引
  for (i=0 ; i < keys ; i++, keyinfo++)
  {
  keyinfo->table= 0;                           // Updated in open_frm
  if (new_frm_ver >= 3)
  {
  keyinfo->flags=   (uint) uint2korr(strpos) ^ HA_NOSAME;
  keyinfo->key_length= (uint) uint2korr(strpos+2);//索引总长度
  keyinfo->user_defined_key_parts= (uint) strpos; //索引字段数
  keyinfo->algorithm=(enum ha_key_alg) strpos; //索引类型,btree、hash、fulltext
  keyinfo->block_size= uint2korr(strpos+6);
  strpos+=8; //一个key占用8个字节,跳过继续
  }
  ...........................................................................................................................................................
  // 循环解析索引字段
  for (j=keyinfo->user_defined_key_parts ; j-- ; key_part++)
  {
  *rec_per_key++ = 0;
  *rec_per_key_float++ = REC_PER_KEY_UNKNOWN;
  key_part->fieldnr=(uint16) (uint2korr(strpos) & FIELD_NR_MASK);
  key_part->offset= (uint) uint2korr(strpos+2)-1;    //record字节偏移量
  key_part->key_type=(uint) uint2korr(strpos+5);//字段类型
  // key_part->field=(Field*) 0;// Will be fixed later
  if (new_frm_ver >= 1)
  {
  key_part->key_part_flag= *(strpos+4);
  key_part->length=(uint) uint2korr(strpos+7);//字段长度
  strpos+=9;//一个字段的信息占用9bytes,指针跳过继续下一个字段信息
  }
  .........................................................................................................................................................
  }
  //****循环解析索引字段
  ..............................................................................................................................................................
  }
  //****循环解析索引
  keynames=(char*) key_part;
  strpos+= (my_stpcpy(keynames, (char *) strpos) - keynames)+1;
  //reading index comments
  ................................................................................................................................................................
  record_offset= (ulong) (uint2korr(head+6)+
  ((uint2korr(head+14) == 0xffff ?
  uint4korr(head+47) : uint2korr(head+14)))); // 读取字段默认数据的偏移量
  ................................................................................................................................................................
  /*默认值读取
  error=4;
  extra_rec_buf_length= uint2korr(head+59);
  rec_buff_length= ALIGN_SIZE(share->reclength + 1 + extra_rec_buf_length);
  share->rec_buff_length= rec_buff_length;
  if (!(record= (uchar *) alloc_root(&share->mem_root,
  rec_buff_length)))
  goto err;                                 /* purecov: inspected */
  share->default_values= record;
  if (mysql_file_pread(file, record, (size_t) share->reclength,
  record_offset, MYF(MY_NABP)))
  goto err;                                 /* purecov: inspected */
  */
  /*读取字段信息,偏移量pos上面已经计算出来,288个基本字节信息也读取到forminfo中*/
  mysql_file_seek(file, pos+288, MY_SEEK_SET, MYF(0));
  share->fields= uint2korr(forminfo+258);//表字段数
  pos= uint2korr(forminfo+260);   /* Length of all screens *///基础信息字节长度
  n_length= uint2korr(forminfo+268); //所有字段名记录长度
  interval_count= uint2korr(forminfo+270);//enum、set类字段数
  interval_parts= uint2korr(forminfo+272);
  int_length= uint2korr(forminfo+274); //enum、set类字段内容所占长度
  share->null_fields= uint2korr(forminfo+282); //允许为空的字段数
  com_length= uint2korr(forminfo+284);//字段注释所占长度
  gcol_screen_length= uint2korr(forminfo+286); //虚拟列所占字节
  share->vfields= 0;
  share->stored_fields= share->fields;
  if (forminfo != (uchar)255)
  {
  share->comment.length=(int) (forminfo);
  share->comment.str= strmake_root(&share->mem_root, (char*) forminfo+47,
  share->comment.length);
  }
  ...........................................................................................................................................................................
  share->field= field_ptr;
  read_length=(uint) (share->fields * field_pack_length +
  pos+ (uint) (n_length+int_length+com_length+
  gcol_screen_length));
  if (read_string(file, &disk_buff,read_length))
  goto err;                                 /* purecov: inspected */
  strpos= disk_buff+pos;//跳过pos的长度
  share->intervals= (TYPELIB*) (field_ptr+share->fields+1);
  interval_array= (const char **) (share->intervals+interval_count);
  names= (char*) (interval_array+share->fields+interval_parts+keys+3);
  if (!interval_count)
  share->intervals= 0;// For better debugging
  memcpy(names, strpos+(share->fields*field_pack_length),//获取字段名字节
  (uint) (n_length+int_length));
  orig_comment_pos= comment_pos= names+(n_length+int_length);//获取注释
  memcpy(comment_pos, disk_buff+read_length-com_length-gcol_screen_length,
  com_length);
  orig_gcol_screen_pos= gcol_screen_pos= names+(n_length+int_length+com_length);
  memcpy(gcol_screen_pos, disk_buff+read_length-gcol_screen_length,
  gcol_screen_length);//获取虚拟列信息
  ..............................................................................................................................................................................
  /*循环获取字段信息*/
  for (i=0 ; i < share->fields; i++, strpos+=field_pack_length)
  {
  //这里有对虚拟列的判断
  if (new_frm_ver >= 3 &&
  (strpos & Field::GENERATED_FIELD) && // Field::unireg_check 是否为虚拟列
  ! (bool) (uint) (gcol_screen_pos))    // Field::stored_in_db 是否存储
  {
  comment_pos+= uint2korr(strpos+15);
  gcol_screen_pos+= uint2korr(gcol_screen_pos + 1) + FRM_GCOL_HEADER_SIZE;
  has_vgc= true;
  }
  else
  {
  if ((error= make_field_from_frm(thd, share,
  new_frm_ver, use_hash,
  i, strpos,
  format_section_fields,
  &comment_pos,
  &gcol_screen_pos,
  &null_pos,
  &null_bit_pos,
  &errarg)))
  goto err;
  }
  }
  ..............................................................................................................................................................
  // 获取索引信息
  /* Fix key->name and key_part->field */
  if (key_parts)
  {
  const int pk_off= find_type(primary_key_name, &share->keynames,
  FIND_TYPE_NO_PREFIX);
  uint primary_key= (pk_off > 0 ? pk_off-1 : MAX_KEY);
  longlong ha_option= handler_file->ha_table_flags();
  keyinfo= share->key_info;
  key_part= keyinfo->key_part;
  for (uint key=0 ; key < share->keys ; key++,keyinfo++)
  {
  .........................................................................................................................................................
  for (i=0 ; i < keyinfo->user_defined_key_parts ; key_part++,i++)
  {
  Field *field;
  if (new_field_pack_flag fieldnr= (uint16) find_field(share->field,
  share->default_values,
  (uint) key_part->offset,
  (uint) key_part->length);
  if (!key_part->fieldnr)
  {
  error= 4;                           // Wrong file
  goto err;
  }
  field= key_part->field= share->field;
  key_part->type= field->key_type();
  if (field->real_maybe_null())
  {
  key_part->null_offset=field->null_offset(share->default_values);
  key_part->null_bit= field->null_bit;
  key_part->store_length+=HA_KEY_NULL_LENGTH;
  keyinfo->flags|=HA_NULL_PART_KEY;
  keyinfo->key_length+= HA_KEY_NULL_LENGTH;
  }
  if (field->type() == MYSQL_TYPE_BLOB ||
  field->real_type() == MYSQL_TYPE_VARCHAR ||
  field->type() == MYSQL_TYPE_GEOMETRY)
  {
  key_part->store_length+=HA_KEY_BLOB_LENGTH;
  if (i + 1 user_defined_key_parts)
  keyinfo->key_length+= HA_KEY_BLOB_LENGTH;
  }
  key_part->init_flags();
  ....................................................................................................................................................................
  //判断是否为自增
  if (share->found_next_number_field)
  {
  Field *reg_field= *share->found_next_number_field;
  if ((int) (share->next_number_index= (uint)
  find_ref_key(share->key_info, share->keys,
  share->default_values, reg_field,
  &share->next_number_key_offset,
  &share->next_number_keypart)) < 0)
  {
  /* Wrong field definition */
  error= 4;
  goto err;
  }
  else
  reg_field->flags |= AUTO_INCREMENT_FLAG;
  }//**判断是否为自增
  .......................................................................................................................................................................
  } /* open_binary_frm */
  代码量太大,很多地方都做了省略,下面来做个文字解释:
  索引部分:
  6bytes: 记录索引基本信息
      0,1:1bytes    索引数
      1,1: 1bytes用于索引的字段数
      4,2: 2bytes 索引信息结束后存储索引名的字节长度
  8bytes: 记录单个索引信息
      0,2: flags   1代表唯一有唯一约束,普通为0
      2,2: 索引总长度
      4,1: 索引字段数,有多少字段,后面就有多少个9bytes
      5,1: 索引类型
      6,2: block_size
      9bytes: 记录所有单个字段信息
        0,1: 字段在表中的编号
        2,2: record字节偏移量
        5,2: 字段类型
        7,2: 字段长度
      后面紧跟索引名:
        ff+索引名
  表字段部分:
  288bytes基本信息
      46,1: 表注释所占长度,后面紧跟注释内容
      258,2: 两个字节记录字段数
      260,2: 字段基本信息长度
      268,2: 所有字段名长度,每个字段名以ff隔开
      284,2: 字段注释所占长度
  288字节后紧跟字段信息,格式排列顺序:
      content_pos: 基础信息
      fields*17 : 字段信息
      n_length: 字段名总长度
      int_length : 紧跟字段之后,为enum、set类型的值
      com_length: 字段注释长度
      gcol_screen: 虚拟列信息
        0,1: 固定值1
        1,3: 虚拟列信息长度
        3,0: 是否存储数据
        剩余的为信息
        如有多个虚拟列,循环上面的字节排序
  17bytes记录字段信息:
  3,5: 字段类型
  5,8: 记录偏移量
  8,10: flags
  10,11: 是否为虚拟列
  12,13: enum、set类型的顺序号
  13,14: 字段类型
  14,15: 字段字符类型
  15,17: 字段注释长度
  
  下面图片内容是我用python写的解析脚本打印的表结构,如果有兴趣的话可以按照上面的介绍利用自己熟悉的语言写一个脚本玩玩,万一哪天真的需要用上呢,作为一个专业DBA来说应该几乎为零

  ps: mysql技术交流QQ群479472450,个人微信公众号会发送一些整理学习结果,多多关注


页: [1]
查看完整版本: mysql 从源码解读.frm文件构成