Python script used in our experiment #

# import ----------------------------------------------------------------------
import statistics
from collections import Counter, defaultdict
from csv import DictReader
from os import _exit, fork, wait

import matplotlib
import matplotlib.pyplot as plt

matplotlib.use("Agg")

# load ------------------------------------------------------------------------
if fork():
    wait()
    _exit(0)

with open("__outdir__/data.csv", encoding="utf-8", newline="") as f:
    exam_data = list(DictReader(f))

# preprocess ------------------------------------------------------------------
if fork():
    wait()
    _exit(0)

subject_data = defaultdict(list)
for r in exam_data:
    subject_data[r["subject"]].append(int(r["score"]))

# aggregate -------------------------------------------------------------------
if fork():
    wait()
    _exit(0)

subject_stats = {}
for subject, scores in subject_data.items():
    subject_stats[subject] = {
        "n": len(scores),
        "mean": statistics.mean(scores),
        "median": statistics.median(scores),
        "stdev": statistics.stdev(scores),
        "hist": Counter(scores),
    }

# visualize -------------------------------------------------------------------
if fork():
    wait()
    _exit(0)

for subject, stats in subject_stats.items():
    n = stats["n"]
    mean = stats["mean"]
    median = stats["median"]
    stdev = stats["stdev"]
    plt.title(f"{subject}: n={n}, mean={mean}, median={median}, stdev={stdev}")
    plt.bar(stats["hist"].keys(), stats["hist"].values())
    plt.savefig(f"__outdir__/{subject}.png")

_exit(0)

Additional notes #

  • “# of rows” in the poster abstract is the number of rows in data.csv in the script.

  • if fork(): ... in the script emulates the fork system call in Multiverse Notebook.