在Crystal中解析二进制格式

给定带有 Headers 的二进制格式,其中包括记录数和格式记录:

{ type : Int8, timestamp : UInt32, user_id : UInt64 }

0000 0004 0153 0927 d139 6747 c045 d991
2100 53d1 6287 4fd2 69fd 8e5f 0475 0153
f323 a72b 4984 a40b 8d54 db00 53a0 78d4
1db8 b1a6 4129 1651

我来自Ruby,我有以下解决方案可行,但我认为在结构化数据时可能会有更优雅或“晶体”的方式来读取字节数?

class User
  USER_TYPES = {
    0 => :admin,
    1 => :user,
  }
  property user_type : Symbol
  property timestamp : UInt32
  property user_id : UInt64

  def initialize(user_type : Int8, @timestamp : UInt32, @user_id : UInt64)
    @user_type = USER_TYPES[user_type]
  end
end

class Parser
  property users : Array(User)

  def initialize
    @users = [] of User
  end

  def parse(file_path : String)
    File.open(file_path) do |file|
      offset = 0
      count : UInt32 = seek_and_unpack(file, offset, UInt32)
      offset += 4

      (0..count).each do |i|
        user_type = seek_and_unpack(file, offset, Int8)
        timestamp = seek_and_unpack(file, offset + 1, UInt32)
        user_id = seek_and_unpack(file, offset + 5, UInt64)
        user = User.new(user_type, timestamp, user_id)
        @users << user
        offset += 13
      end
      @users
    end
  end

  private def seek_and_unpack(file : File, offset : Int32, read_type)
    file.seek(offset)
    file.read_bytes(read_type, IO::ByteFormat::BigEndian)
  end
end

puts Parser.new.parse("my_file.dat")
# [#<User:0x102805fe0 @user_type=:user, @timestamp=1393108945, @user_id=4136353673894269217>,
# #<User:0x102805fc0 @user_type=:admin, @timestamp=1406231175, @user_id=5751776211841778805>,
# #<User:0x102805fa0 @user_type=:user, @timestamp=1408443303, @user_id=3119170057034093787>,
# #<User:0x102805f80 @user_type=:admin, @timestamp=1403025620, @user_id=2141656950430570065>]

回答(1)

3 years ago

您可以摆脱搜索,因为 read_bytes 已经在寻找IO并将解包操作包装在宏中以使其更具可读性:

class Parser
  property users

  @users = [] of User

  def parse(path)
    File.open(path) do |file|
      count = unpack(UInt32)

      count.times do
        @users << User.new(
          user_type: unpack(Int8),
          timestamp: unpack(UInt32),
          user_id: unpack(UInt64)
        )
      end
      @users
    end
  end

  macro unpack(type)
    file.read_bytes({{type}}, IO::ByteFormat::BigEndian)
  end
end