Generate Figures for Read/Write/File Benchmarks

In [1]:
%matplotlib inline
In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
In [3]:
plt.style.use('seaborn')
In [4]:
colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]
print(colors)
['#4C72B0', '#55A868', '#C44E52', '#8172B2', '#CCB974', '#64B5CD']
In [5]:
headers = [ 'parquet/uncompressed', 'parquet/snappy', \
            'feather', \
            'hdf5/pytables', 'hdf5/fixed', \
            'csv' ]
In [6]:
twall_write_s = [ 5.59, 5.67, \
                  5.78, \
                  12.9, 5.61, \
                  ( 120.0 + 35.0 ) ]
In [7]:
fsize_gb = [ 0.3, 0.2, \
             1.4, \
             1.5, 1.5, \
             1.4 ]
In [8]:
twall_read_s = [ 2.68, 2.97, \
                 0.93, \
                 4.67, 0.81, \
                 17.0 ]
In [9]:
headers = np.array(headers)

twall_write_s = np.array(twall_write_s)
twall_read_s = np.array(twall_read_s)
fsize_gb = np.array(fsize_gb)

Write Speed

In [10]:
x = headers[np.argsort(twall_write_s)]
y = twall_write_s[np.argsort(twall_write_s)]

fig, ax = plt.subplots(1,1)
ax.barh(x,y)
for i, v in enumerate(y[:-1]):
    ax.text(v - 0.5, i, "%.1f s" % v, color='white', va='center', ha='right', fontweight='bold')
    #ax.text(v + 7, i, "%.2f s" % v, color='black', va='center', ha='right', fontweight='bold')
ax.text(16 - 0.5, 5, "%i s -->" % y[-1], color='white', va='center', ha='right', fontweight='bold')
ax.set_xlim([0,16])
ax.set_xlabel('Write Time (Wallclock, Seconds)')

ax.set_title('Write Speed Comparison, Single Core\n(Pandas Dataframe, 18 MRows, 10x Float64, 1x Uint16, 1.4 GB in Mem)')

fig.savefig('/data/volkerh/bench/write_speed.png', bbox_inches='tight')
pass

File Size

In [11]:
x = headers[np.argsort(fsize_gb)]
y = fsize_gb[np.argsort(fsize_gb)]

fig, ax = plt.subplots(1,1)
ax.barh(x, y, facecolor=colors[5])

ax.set_xlabel('File Size (GB)')
for i, v in enumerate(y):
    #ax.text(v - 0.3, i, "%.1f GB" % v, color='white', va='center', ha='right', fontweight='bold')
    ax.text(v-0.02, i, "%.1f GB" % v, color='white', va='center', ha='right', fontweight='bold')

ax.set_title('On-Disk File Size Comparison\n(Pandas Dataframe, 18 MRows, 10x Float64, 1x Uint16, 1.3 GB in Mem)')
    
fig.savefig('/data/volkerh/bench/file_size.png', bbox_inches='tight')

pass

Read Speed

In [12]:
x = headers[np.argsort(twall_read_s)]
y = twall_read_s[np.argsort(twall_read_s)]

fig, ax = plt.subplots(1,1)
ax.barh(x,y,facecolor=colors[4])

for i, v in enumerate(y[:-1]):
    ax.text(v - 0.2, i, "%.1f s" % v, color='white', va='center', ha='right', fontweight='bold')

ax.text(6 - 0.2, 5, "%.1f s -->" % y[-1], color='white', va='center', ha='right', fontweight='bold')
# ax.text(10 - 0.2, 4, "%.1f s -->" % y[-2], color='white', va='center', ha='right', fontweight='bold')

ax.set_xlim([0,6])
ax.set_xlabel('Read Time (Wallclock, Seconds)')

ax.set_title('Read Speed Comparison, Single Core\n(Pandas Dataframe, 18 MRows, 10x Float64, 1x Uint16, 1.4 GB in Mem)')

fig.savefig('/data/volkerh/bench/read_speed.png', bbox_inches='tight')

pass
In [ ]: