Python script used in our experiment #
# import ----------------------------------------------------------------------
import statistics
from collections import Counter, defaultdict
from csv import DictReader
from os import _exit, fork, wait
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use("Agg")
# load ------------------------------------------------------------------------
if fork():
wait()
_exit(0)
with open("__outdir__/data.csv", encoding="utf-8", newline="") as f:
exam_data = list(DictReader(f))
# preprocess ------------------------------------------------------------------
if fork():
wait()
_exit(0)
subject_data = defaultdict(list)
for r in exam_data:
subject_data[r["subject"]].append(int(r["score"]))
# aggregate -------------------------------------------------------------------
if fork():
wait()
_exit(0)
subject_stats = {}
for subject, scores in subject_data.items():
subject_stats[subject] = {
"n": len(scores),
"mean": statistics.mean(scores),
"median": statistics.median(scores),
"stdev": statistics.stdev(scores),
"hist": Counter(scores),
}
# visualize -------------------------------------------------------------------
if fork():
wait()
_exit(0)
for subject, stats in subject_stats.items():
n = stats["n"]
mean = stats["mean"]
median = stats["median"]
stdev = stats["stdev"]
plt.title(f"{subject}: n={n}, mean={mean}, median={median}, stdev={stdev}")
plt.bar(stats["hist"].keys(), stats["hist"].values())
plt.savefig(f"__outdir__/{subject}.png")
_exit(0)
Additional notes #
“# of rows” in the poster abstract is the number of rows in
data.csv
in the script.if fork(): ...
in the script emulates the fork system call in Multiverse Notebook.