Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions cmdbench/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
help="Width and height of the saving plot. Works if --save-plot is specified.")
@click.option("--save-plot", "-p", default = None, type=click.File('wb'),
help="File address to save a plot of the command's resource usage over time (CPU + Memory).")
@click.option("--capture-points", "-c", default = False, is_flag = True, show_default=True,
help="Enables plotting delimiters when your program prints \"cmdbench point\\n\" and option "
"--save-plot enabled. Slows down execution speed. "
"NOTE: after each print you should flush the output buffer of your program (fflush(stdout) in C for example).")

@click.option("--iterations", "-i", default = 1, type = click.IntRange(1), show_default=True,
help="Number of iterations to get benchmarking results for the target command.")
Expand Down Expand Up @@ -63,10 +67,11 @@ def benchmark(command, iterations, **kwargs):
If no printing options are specified, statistics will be printed for more than 1 iterations, and the first iteration for only 1 iteration."""

np.set_printoptions(threshold=15)

capture_items = []
capture_items.append('points') if kwargs.get('capture_points') else None
click.echo("Benchmarking started..")
benchmark_results = BenchmarkResults()
benchmark_generator = benchmark_command_generator(" ".join(command), iterations)
benchmark_generator = benchmark_command_generator(" ".join(command), iterations, capture_items = capture_items)
t = tqdm(range(iterations))
for i in t:
benchmark_result = next(benchmark_generator)
Expand All @@ -78,7 +83,7 @@ def benchmark(command, iterations, **kwargs):
click.echo("Benchmarking done.")
click.echo()

option_keys = ["print_statistics", "print_averages", "print_values", "print_first_iteration", "print_all_iterations"]
option_keys = ["print_statistics", "print_averages", "print_values", "print_first_iteration", "print_all_iterations", "capture_points"]

# Print statistics if user did not tell us what info to print
printing_any = False
Expand Down Expand Up @@ -110,12 +115,13 @@ def benchmark(command, iterations, **kwargs):
print_benchmark_dict(BenchmarkDict.from_dict(iteration), "Iteration #%s" % (ind + 1), indentation = 4, title_fg_color="magenta")

save_plot_value = kwargs["save_plot"]
capture_points = kwargs["capture_points"]
if save_plot_value is not None:
save_plot_sizes = kwargs["save_plot_size"]
save_plot_width = save_plot_sizes[0]
save_plot_height = save_plot_sizes[1]

fig = benchmark_results.get_resources_plot(save_plot_width, save_plot_height)
fig = benchmark_results.get_resources_plot(save_plot_width, save_plot_height, capture_points)
if fig:
fig.savefig(save_plot_value)
click.echo("Plot saved.")
Expand Down
62 changes: 50 additions & 12 deletions cmdbench/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,32 +11,33 @@
import tempfile
import shlex
import click
import io
from sys import platform as _platform

is_linux = _platform.startswith("linux")
is_macos = _platform == "darwin"
is_unix = is_linux or is_macos
is_win = os.name == "nt"

def benchmark_command(command, iterations_num = 1, raw_data = False):
def benchmark_command(command, iterations_num = 1, raw_data = False, capture_items = []):
if iterations_num <= 0:
raise Exception("The number of iterations to run the command should be >= 1")

raw_benchmark_results = []
for _ in range(iterations_num):
raw_benchmark_result = single_benchmark_command_raw(command)
raw_benchmark_result = single_benchmark_command_raw(command, capture_items)
raw_benchmark_results.append(raw_benchmark_result)

final_benchmark_results = list(map(lambda raw_benchmark_result: raw_benchmark_result if raw_data else raw_to_final_benchmark(raw_benchmark_result), raw_benchmark_results))

return BenchmarkResults(final_benchmark_results)

def benchmark_command_generator(command, iterations_num = 1, raw_data = False):
def benchmark_command_generator(command, iterations_num = 1, raw_data = False, capture_items = []):
if iterations_num <= 0:
raise Exception("The number of iterations to run the command should be >= 1")

for _ in range(iterations_num):
raw_benchmark_result = single_benchmark_command_raw(command)
raw_benchmark_result = single_benchmark_command_raw(command, capture_items)
final_benchmark_result = raw_benchmark_result if raw_data else raw_to_final_benchmark(raw_benchmark_result)
yield BenchmarkResults([final_benchmark_result])

Expand Down Expand Up @@ -65,7 +66,7 @@ def raw_to_final_benchmark(benchmark_raw_dict):

exit_code = benchmark_raw_dict["general"]["exit_code"]


points = benchmark_raw_dict["time_series"]["points"] if "points" in benchmark_raw_dict["time_series"] else []

benchmark_results = {
"process": { "stdout_data": process_stdout_data, "stderr_data": process_stderr_data, "execution_time": process_execution_time, "exit_code": exit_code },
Expand All @@ -75,7 +76,8 @@ def raw_to_final_benchmark(benchmark_raw_dict):
{
"sample_milliseconds": time_series_sample_milliseconds,
"cpu_percentages": time_series_cpu_percentages,
"memory_bytes": time_series_memory_bytes
"memory_bytes": time_series_memory_bytes,
"points": points
}
}
# psutil io_counters() is not available on macos
Expand Down Expand Up @@ -300,9 +302,11 @@ def collect_time_series(shared_process_dict):
shared_process_dict["memory_values"] = memory_values

# Performs benchmarking on the command based on both /usr/bin/time and psutil library
def single_benchmark_command_raw(command):
def single_benchmark_command_raw(command, capture_items=set()):
# https://docs.python.org/3/library/shlex.html#shlex.split
commands_list = shlex.split(command)
# remove duplicates and convert to hash table
capture_items = set(capture_items)

time_tmp_output_file = None

Expand All @@ -315,6 +319,12 @@ def single_benchmark_command_raw(command):

# CPU
cpu_times = None

# POINTS
points = []
capture_points = "points" in capture_items
CMDBENCH_BYTE_STRING = str.encode("cmdbench", encoding=OutputCapture.output_encoding)
POINT_BYTE_STRING = str.encode("cmdbench point", encoding=OutputCapture.output_encoding)

# Disk
disk_io_counters = None
Expand Down Expand Up @@ -365,7 +375,10 @@ def single_benchmark_command_raw(command):

# Finally, run the command
# Master process could be GNU Time running target command or the target command itself
master_process = psutil.Popen(commands_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if not capture_items:
master_process = psutil.Popen(commands_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
else:
master_process = psutil.Popen(commands_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1, text='r')
execution_start = current_milli_time()

# Only in linux, we target command will be GNU Time's child process
Expand Down Expand Up @@ -394,12 +407,35 @@ def single_benchmark_command_raw(command):
if not shared_process_dict["skip_benchmarking"]:
shared_process_dict["target_process_pid"] = p.pid

# Wait for process to finish (time_series_exec and fixed_data_exec will be processing it in parallel)
outdata, errdata = master_process.communicate()
outdata, errdata = outdata.decode(sys.stdout.encoding), errdata.decode(sys.stderr.encoding)
# Wait for process to finish (time_series_exec and fixed_data_exec will be processing it in parallel)
if not capture_items:
outdata, errdata = master_process.communicate()
outdata, errdata = outdata.decode(sys.stdout.encoding), errdata.decode(sys.stderr.encoding)
else:
outdata=[]
errdata=[]
for line in master_process.stdout:
if not line:
break
if capture_points and line.startswith("cmdbench point"):
points.append(current_nano_time())
outdata.append(line)

for line in (iter(master_process.stderr.readline,b'')):
if not line:
break
errdata.append(line)


exection_end = current_milli_time()

# process points and err/out data outside to speed up code
if isinstance(outdata, list):
outdata = "\n".join([string for string in outdata])
if isinstance(errdata, list):
errdata = "\n".join([string for string in errdata])
points = [round(point/1_000_000, 2)-execution_start for point in points]

# Done with the master process, wait for the parallel (threads or processes) to finish up
time_series_exec.join()
fixed_data_exec.join()
Expand Down Expand Up @@ -550,7 +586,9 @@ def single_benchmark_command_raw(command):
{
"sample_milliseconds": np.array(sample_milliseconds),
"cpu_percentages": np.array(cpu_percentages),
"memory_bytes": np.array(memory_values)
"memory_bytes": np.array(memory_values),
"points": points

},

}
Expand Down
21 changes: 19 additions & 2 deletions cmdbench/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ def stats_replace_func(list_of_objects, key_path):
value_per_attribute_stats_dict = self._get_values_per_attribute(self.iterations, stats_replace_func)
return BenchmarkDict.from_dict(value_per_attribute_stats_dict)

def get_points(self):
return self._get_values_per_attribute(self.iterations)['time_series'].get('points', [])

def get_averages(self):
time_series_dict_key = "time_series"

Expand All @@ -80,7 +83,7 @@ def avg_replace_func(list_of_objects, key_path):
time_series_y_values = {}

for key, value in value_per_attribute_avgs_dict["time_series"].items():
if key != "sample_milliseconds":
if key not in ["sample_milliseconds", "points"]:
time_series_y_values[key] = value

if len(time_series_x_values) == 0 or len(time_series_x_values[0]) == 0:
Expand Down Expand Up @@ -114,12 +117,14 @@ def avg_replace_func(list_of_objects, key_path):
for key, value in time_series_y_values_out.items():
averaged_time_series[key] = value

# averaged_time_series['points'] = value_per_attribute_avgs_dict["time_series"]['points']

value_per_attribute_avgs_dict["time_series"] = averaged_time_series

return BenchmarkDict.from_dict(value_per_attribute_avgs_dict)


def get_resources_plot(self, width = 15, height = 3):
def get_resources_plot(self, width = 15, height = 3, capture_points=False):
if not matplotlib_available:
raise Exception("You need to install matplotlib before using this method")

Expand All @@ -128,6 +133,9 @@ def get_resources_plot(self, width = 15, height = 3):
time_series_obj = self.get_first_iteration()
else:
time_series_obj = self.get_averages()

if capture_points:
points = self.get_points()

time_series_obj = time_series_obj["time_series"]

Expand Down Expand Up @@ -179,6 +187,15 @@ def get_resources_plot(self, width = 15, height = 3):
ax_cpu.set_ylabel("CPU (%)", color=color)
ax_cpu.plot(x, cpu_y, color=color, alpha=0.75, linewidth=1)
ax_cpu.tick_params(axis="y", labelcolor=color)
if capture_points:
for ind, point_list in enumerate(points):
color = COLORS[(ind+3)%len(COLORS)]
ylen = 1 / len(points)
ymin = ind*ylen
ymax = ymin+ylen
for point in point_list:
if isinstance(point, float) or isinstance(point, int):
ax_cpu.axvline(x=point, ymin=ymin, ymax=ymax, linestyle="-", lw=1, alpha=0.2, color = color)
#plt.fill_between(x, cpu_y, alpha=0.2, color=color)

#plt.tight_layout()
Expand Down
7 changes: 7 additions & 0 deletions cmdbench/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from collections import defaultdict
import numpy as np
from beeprint import pp
from matplotlib import pyplot as plt

COLORS = plt.rcParams["axes.prop_cycle"].by_key()["color"]

# https://stackoverflow.com/a/41274937
# Allows attribute access through both obj["key"] (internal library convenience) and obj.key (external developer convenience)
Expand Down Expand Up @@ -94,8 +96,13 @@ def __repr__(self):
"min": self.min, "max": self.max
}

class OutputCapture:
output_encoding = "utf-8"


# https://stackoverflow.com/a/5998359
current_milli_time = lambda: int(round(time.time() * 1000))
current_nano_time = lambda: int(round(time.time_ns()))


def iterable(obj):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cmdbench"
version = "0.1.22"
version = "0.2.22"
description = "Quick and easy benchmarking for any command's CPU, memory, disk usage and runtime."
authors = ["Mohsen Yousefian <[email protected]>"]
license = "MIT"
Expand Down
Binary file added resources/plot delimiters.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.