@@ -470,6 +470,75 @@ Storing Attributes to a group node
470470 store.close()
471471 os.remove(' test.h5' )
472472
473+
474+ .. _cookbook.binary :
475+
476+ Binary Files
477+ ~~~~~~~~~~~~
478+
479+ Pandas readily accepts numpy record arrays, if you need to read in a binary
480+ file consisting of an array of C structs. For example, given this C program
481+ in a file called ``main.c `` compiled with ``gcc main.c -std=gnu99 `` on a
482+ 64-bit machine,
483+
484+ .. code-block :: c
485+
486+ #include <stdio.h>
487+ #include <stdint.h>
488+
489+ typedef struct _Data
490+ {
491+ int32_t count;
492+ double avg;
493+ float scale;
494+ } Data;
495+
496+ int main(int argc, const char *argv[])
497+ {
498+ size_t n = 10;
499+ Data d[n];
500+
501+ for (int i = 0; i < n; ++i)
502+ {
503+ d[i].count = i;
504+ d[i].avg = i + 1.0;
505+ d[i].scale = (float) i + 2.0f;
506+ }
507+
508+ FILE *file = fopen("binary.dat", "wb");
509+ fwrite(&d, sizeof(Data), n, file);
510+ fclose(file);
511+
512+ return 0;
513+ }
514+
515+ the following Python code will read the binary file ``'binary.dat' `` into a
516+ pandas ``DataFrame ``, where each element of the struct corresponds to a column
517+ in the frame:
518+
519+ .. code-block :: python
520+
521+ import numpy as np
522+ from pandas import DataFrame
523+
524+ names = ' count' , ' avg' , ' scale'
525+
526+ # note that the offsets are larger than the size of the type because of
527+ # struct padding
528+ offsets = 0 , 8 , 16
529+ formats = ' i4' , ' f8' , ' f4'
530+ dt = np.dtype({' names' : names, ' offsets' : offsets, ' formats' : formats},
531+ align = True )
532+ df = DataFrame(np.fromfile(' binary.dat' , dt))
533+
534+ .. note ::
535+
536+ The offsets of the structure elements may be different depending on the
537+ architecture of the machine on which the file was created. Using a raw
538+ binary file format like this for general data storage is not recommended, as
539+ it is not cross platform. We recommended either HDF5 or msgpack, both of
540+ which are supported by pandas' IO facilities.
541+
473542Computation
474543-----------
475544
0 commit comments