Idx File [ 99% Original ]
uint8_t header[4]; if (fread(header, 1, 4, f) != 4) fclose(f); return -2;
# Parse magic: first two bytes must be 0 if magic[0] != 0 or magic[1] != 0: raise ValueError("Invalid IDX file: magic prefix missing") data_type_code = magic[2] dim_count = magic[3] # Data type mapping dtypes = 0x08: 'B', # unsigned char 0x09: 'b', # signed char 0x0B: 'h', # short 0x0C: 'i', # int 0x0D: 'f', # float 0x0E: 'd' # double if data_type_code not in dtypes: raise ValueError(f"Unsupported data type code: data_type_code") # Read dimension sizes dims = [] for _ in range(dim_count): dim = struct.unpack('>I', f.read(4))[0] dims.append(dim) # Calculate total elements total_elements = 1 for d in dims: total_elements *= d # Determine numpy dtype np_dtype = 0x08: np.uint8, 0x09: np.int8, 0x0B: np.int16, 0x0C: np.int32, 0x0D: np.float32, 0x0E: np.float64 [data_type_code] # Read data data = np.fromfile(f, dtype=np_dtype, count=total_elements) # Reshape and return return data.reshape(dims) def write_idx(filename, data_array): """Write a numpy array to IDX format.""" # Determine data type code dtype_map = np.uint8: 0x08, np.int8: 0x09, np.int16: 0x0B, np.int32: 0x0C, np.float32: 0x0D, np.float64: 0x0E if data_array.dtype not in dtype_map: raise ValueError(f"Unsupported dtype: data_array.dtype") data_type_code = dtype_map[data_array.dtype] dim_count = len(data_array.shape) idx file
| Operation | Python (struct+numpy) | C (libidx) | NumPy .npy | HDF5 | |-----------|----------------------|------------|--------------|------| | Load 60k images | 0.24 sec | 0.09 sec | 0.19 sec | 0.31 sec | | Memory mapping | N/A | 0.001 sec | 0.001 sec | 0.15 sec | | Random access (per image) | 2.1 µs | 0.4 µs | 1.2 µs | 8.5 µs | uint8_t header[4]; if (fread(header, 1, 4, f)
| Code (decimal) | Code (hex) | Data Type | C equivalent (typical) | .NET equivalent | |----------------|------------|-----------|------------------------|------------------| | 0x08 | 8 | Unsigned byte (uint8) | unsigned char | Byte | | 0x09 | 9 | Signed byte (int8) | signed char | SByte | | 0x0B | 11 | Short (int16) | short | Int16 | | 0x0C | 12 | Int32 (int) | int | Int32 | | 0x0D | 13 | Float (single) | float | Single | | 0x0E | 14 | Double | double | Double | f) != 4) fclose(f)
int idx_read(const char *filename, idx_file_t *out) FILE *f = fopen(filename, "rb"); if (!f) return -1;
with open(filename, 'wb') as f: # Write magic: [0, 0, type_code, dim_count] f.write(bytes([0, 0, data_type_code, dim_count])) # Write dimensions (big-endian) for dim in data_array.shape: f.write(dim.to_bytes(4, 'big')) # Write data (row-major, native endianness) # Convert to flat bytes in correct order data_array.astype(data_array.dtype, copy=False).tofile(f) #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <arpa/inet.h> typedef struct idx_file uint8_t data_type; // 0x08,0x09,0x0B-0x0E uint8_t dim_count; // 1-255 uint32_t *dims; // array of dim_count sizes void *data; // raw data pointer size_t data_size_bytes; idx_file_t;