diff --git a/.play_minio.json b/.play_minio.json deleted file mode 100644 index 81a2b2b..0000000 --- a/.play_minio.json +++ /dev/null @@ -1 +0,0 @@ -{"access_key": "Q3AM3UQ867SPQQA43P2F", "secret_key": "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG"} \ No newline at end of file diff --git a/Makefile b/Makefile index abede70..3074d7a 100644 --- a/Makefile +++ b/Makefile @@ -16,19 +16,19 @@ # run_conda run conda backend with seeds + repeats # run_oras run oras backend with seeds + repeats # run_envs run envmodules backend with seeds + repeats -# knit_report generate RMarkdown reports - not fully tested +# knit_report generate RMarkdown reports and an aggregated CSV - not fully tested # # Environment: -# - MAX_CORES controls parallelism (default: 50). +# - MAX_CORES controls num concurrent rules # - EASYBUILD_PREFIX needs to be tuned to access the envmodules built extending EESSI <--------------!!!! # see: https://github.com/omnibenchmark/clustering_example/pull/43 # # ============================================================ -MAX_CORES ?= 50 +MAX_CORES ?= 250 -# EasyBuild installation prefix (imallona; edit accordingly) +# EasyBuild installation prefix (imallona; edit accordingly) ## <------------------------------------!!!! EASYBUILD_PREFIX ?= /data/imallona/.local/easybuild export EASYBUILD_PREFIX @@ -36,23 +36,30 @@ export EASYBUILD_PREFIX OB_CMD = ob run benchmark --local-storage --cores ${MAX_CORES} # actual benchmark plan repository - to be pinned (the commit/tag) -CLUSTERING_REPO = https://github.com/omnibenchmark/clustering_example -CLUSTERING_DIR = clustering_example +CLUSTERING_REPO = https://github.com/omnibenchmark/clustering_example +CLUSTERING_BRANCH = longer_yamls +CLUSTERING_DIR = clustering_example # legacy reports in the wrong repository; to be moved to this one REPORTS_REPO = https://github.com/imallona/clustering_report REPORTS_DIR = clustering_report +## seeds to explore +SEEDS := 2 54 546 744 1443 + +## repeated runs per seed +RUNS := 1 2 3 + all: clone_yamls clone_reports run_conda run_oras run_envs knit_report # clone the clustering_example repo if not already present clone_yamls: @if [ ! -d "$(CLUSTERING_DIR)" ]; then \ echo "Cloning clustering_example repo..."; \ - git clone --branch easyconfigs_py3126 $(CLUSTERING_REPO); \ + git clone --branch ${CLUSTERING_BRANCH} $(CLUSTERING_REPO); \ else \ echo "clustering_example repo already present, pulling latest..."; \ - cd $(CLUSTERING_DIR) && git fetch && git checkout easyconfigs_py3126 && git pull; \ + cd $(CLUSTERING_DIR) && git fetch && git checkout ${CLUSTERING_BRANCH} && git pull; \ fi # clone the clustering_report repo (mark branch) if not already present @@ -66,57 +73,52 @@ clone_reports: fi run_conda: clone_yamls - @for seed in 2 54 546 744 1443; do \ + mkdir -p results + @for seed in $(SEEDS); do \ echo "Running conda benchmark with seed $$seed..."; \ cp $(CLUSTERING_DIR)/Clustering_conda.yml $(CLUSTERING_DIR)/Clustering_conda_tmp.yml; \ - sed -i "s/--seed, [0-9]\+/--seed, $$seed/" $(CLUSTERING_DIR)/Clustering_conda_tmp.yml; \ - for i in 1 2 3; do \ - echo " Run $$i for seed $$seed..."; \ - ${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_conda_tmp.yml; \ - cp $(CLUSTERING_DIR)/Clustering_conda_tmp.yml out; \ - mv out out_conda_seed_$$seed\_run_$$i; \ + sed -i "s/--seed\",[[:space:]]*[0-9]\+/--seed\", $$seed/" $(CLUSTERING_DIR)/Clustering_conda_tmp.yml; \ + for i in $(RUNS); do \ + echo " Run $$i for seed $$seed and run $$i."; \ + echo "DEST: results/out_conda_seed_$$seed\_run_$$i" ;\ + ${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_conda_tmp.yml --out-dir results/out_conda_seed_$$seed\_run_$$i; \ + cp $(CLUSTERING_DIR)/Clustering_conda_tmp.yml results/out_conda_seed_$$seed\_run_$$i/; \ done; \ - rm $(CLUSTERING_DIR)/Clustering_conda_tmp.yml; \ done run_oras: clone_yamls - @for seed in 2 54 546 744 1443; do \ + @for seed in $(SEEDS); do \ echo "Running oras benchmark with seed $$seed..."; \ cp $(CLUSTERING_DIR)/Clustering_oras.yml $(CLUSTERING_DIR)/Clustering_oras_tmp.yml; \ - sed -i "s/--seed, [0-9]\+/--seed, $$seed/" $(CLUSTERING_DIR)/Clustering_oras_tmp.yml; \ - for i in 1 2 3; do \ - echo " Run $$i for seed $$seed..."; \ - ${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_oras_tmp.yml; \ - cp $(CLUSTERING_DIR)/Clustering_oras_tmp.yml out; \ - mv out out_oras_seed_$$seed\_run_$$i; \ + sed -i "s/--seed\",[[:space:]]*[0-9]\+/--seed\", $$seed/" $(CLUSTERING_DIR)/Clustering_oras_tmp.yml; \ + for i in $(RUNS); do \ + echo " Run $$i for seed $$seed and run $$i."; \ + ${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_oras_tmp.yml --out-dir results/out_oras_seed_$$seed\_run_$$i/; \ + cp $(CLUSTERING_DIR)/Clustering_oras_tmp.yml results/out_oras_seed_$$seed\_run_$$i/; \ done; \ - rm $(CLUSTERING_DIR)/Clustering_oras_tmp.yml; \ done run_envs: clone_yamls @bash -c '\ - source /cvmfs/software.eessi.io/versions/2025.06/init/lmod/bash && \ - module load EESSI-extend/2025.06-easybuild && \ - export MODULEPATH="$(EASYBUILD_PREFIX)/software/modules/all:$$MODULEPATH" && \ - module use $$MODULEPATH && \ - echo $$MODULEPATH && \ - for seed in 2 54 546 744 1443; do \ - echo "Running envmodules benchmark with seed $$seed..."; \ - cp $(CLUSTERING_DIR)/Clustering_envmodules.yml $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \ - sed -i "s/--seed, [0-9]\+/--seed, $$seed/" $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \ - for i in 1 2 3; do \ - echo " Run $$i for seed $$seed..."; \ - ${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \ - cp $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml out; \ - mv out out_envmodules_seed_$$seed\_run_$$i; \ - done; \ - rm $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \ - done \ - ' - + source /cvmfs/software.eessi.io/versions/2025.06/init/lmod/bash && \ + module load EESSI-extend/2025.06-easybuild && \ + export MODULEPATH="$(EASYBUILD_PREFIX)/software/modules/all:$$MODULEPATH" && \ + module use $$MODULEPATH && \ + echo $$MODULEPATH && \ + for seed in $(SEEDS); do \ + echo "Running envmodules benchmark with seed $$seed..."; \ + cp $(CLUSTERING_DIR)/Clustering_envmodules.yml $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \ + sed -i "s/--seed\",[[:space:]]*[0-9]\+/--seed\", $$seed/" $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml; \ + for i in $(RUNS); do \ + echo " Run $$i for seed $$seed and run $$i..."; \ + ${OB_CMD} -b $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml --out-dir results/out_envmodules_seed_$$seed\_run_$$i/; \ + cp $(CLUSTERING_DIR)/Clustering_envmodules_tmp.yml results/out_envmodules_seed_$$seed\_run_$$i/; \ + done; \ + done \ + ' knit_report: clone_reports - R -e 'rmarkdown::render("$(REPORTS_DIR)/07_metrics_across_backends.Rmd", params = list(performance_bn = "performance-results.rds", metrics_bn = "metrics-results.rds", clustering_dir = "."))' - R -e 'rmarkdown::render("$(REPORTS_DIR)/08_performances_across_backends.Rmd", params = list(performance_bn = "performance-results.rds", metrics_bn = "metrics-results.rds", clustering_dir = "."))' - python parse_results.py - R -e 'rmarkdown::render("analyze_results.Rmd")' + ## R -e 'rmarkdown::render("$(REPORTS_DIR)/07_metrics_across_backends.Rmd", params = list(performance_bn = "performance-results.rds", metrics_bn = "metrics-results.rds", clustering_dir = "."))' + ## R -e 'rmarkdown::render("$(REPORTS_DIR)/08_performances_across_backends.Rmd", params = list(performance_bn = "performance-results.rds", metrics_bn = "metrics-results.rds", clustering_dir = "."))' + python parse_results.py > aggregated_results.json + R -e 'rmarkdown::render("analyze_results_izaskun.Rmd")' diff --git a/analyze_results_izaskun.Rmd b/analyze_results_izaskun.Rmd new file mode 100644 index 0000000..700022f --- /dev/null +++ b/analyze_results_izaskun.Rmd @@ -0,0 +1,811 @@ +--- +title: "clustbench exploratory / fig 2 making" +author: "Izaskun Mallona" +output: + html_document: + theme: readable + toc: true + toc_float: true + code_folding: hide + code_download: true + number_sections: true + df_print: default + highlight: tango + keep_md: true + self_contained: false +date: "`r format(Sys.Date(), '%B %d, %Y')`" +--- + + +```{r setup, message = FALSE} +library(knitr) +library(tidyverse) +library(jsonlite) +library(ggplot2) +library(data.table) +library(parallel) + +knitr::opts_chunk$set( + echo = TRUE, + warning = TRUE, + message = TRUE, + fig.width = 10, + fig.height = 10, + fig.path = "plots/", + dev = c("png", "svg"), + cache.lazy = FALSE, + cache = FALSE) + +``` + +```{r} + +## given the way we have them, so we aim to provide 5 ks, middle being the true, but pad with 2s on the left +get_true_k <- function(x) { + n <- length(x) + if (n == 5) { + return(x[3]) + } else if (n == 4) { + return(x[3]) + } else if (n == 3) { + return(x[2]) + } else { + return(x[1]) + } +} + + +`%||%` <- function(a, b) if (!is.null(a)) a else b + +flatten_parameters <- function(params, parameter_dir) { + if (is.null(params) || length(params) == 0) { + return(data.frame(parameter_dir = parameter_dir, + param_name = NA_character_, + param_value = NA_character_, + stringsAsFactors = FALSE)) + } + flat <- unlist(params, use.names = TRUE) + data.frame( + parameter_dir = parameter_dir, + param_name = names(flat), + param_value = as.character(flat), + stringsAsFactors = FALSE + ) +} + +flatten_record <- function(rec) { + mets <- rec$metrics + perf <- rec$performance + params_df <- flatten_parameters(rec$parameters, rec$parameter_dir) + + rows <- list() + idx <- 0 + + for (family in names(mets)) { + fam_list <- mets[[family]] + if (is.null(fam_list)) next + + for (metric_name in names(fam_list)) { + metric_vals <- fam_list[[metric_name]] + if (is.null(metric_vals)) next + + ks <- names(metric_vals) + true_k <- get_true_k(ks) + + for (k in ks) { + for (i in seq_len(nrow(params_df))) { + idx <- idx + 1 + rows[[idx]] <- data.frame( + backend = rec$backend, + seed = rec$seed, + run = rec$run, + generator = rec$generator, + dataset_name = rec$dataset_name, + method = rec$method, # keep "sklearn" + method_full = rec$method_full, # "sklearn_method-birch" or "sklearn_method-kmeans" + parameter_dir = rec$parameter_dir, # "method-birch" or "method-kmeans" + param_name = params_df$param_name[i], + param_value = params_df$param_value[i], + metric_family = family, + metric_name = metric_name, + k = as.integer(k), + true_k = as.integer(true_k), + metric_value = metric_vals[[k]], + s = perf$s, + h_m_s = perf[["h:m:s"]], + max_rss = perf$max_rss, + max_vms = perf$max_vms, + max_uss = perf$max_uss, + max_pss = perf$max_pss, + io_in = perf$io_in, + io_out = perf$io_out, + mean_load = perf$mean_load, + cpu_time = perf$cpu_time, + stringsAsFactors = FALSE + ) + } + } + } + } + + do.call(rbind, rows) +} + + +``` + + +```{r} + +records <- fromJSON("aggregated_results.json", simplifyVector = FALSE) +##records <- fromJSON("aggregated_results_full.json", simplifyVector = FALSE) + +## fd_list <- vector("list", length(records)) +## for (i in seq_along(records)) { +## if (i == 1 || i == 2 || i == 100 || i %% 1000 == 0) +## cat("Processing record", i, "of", length(records), "\n") +## fd_list[[i]] <- flatten_record(records[[i]]) +## } +## fd <- data.table::rbindlist(fd_list, use.names = TRUE, fill = TRUE) + + +# parallel apply instead +fd_list <- mclapply(seq_along(records), function(i) { + if (i == 1 || i == 2 || i == 100 || i %% 1000 == 0) + cat("Processing record", i, "of", length(records), "\n") + flatten_record(records[[i]]) +}, mc.cores = detectCores()) + +fd <- rbindlist(fd_list, use.names = TRUE, fill = TRUE) + + +## dim(fd) + +## str(fd) +## table(is.na(fd$max_rss)) +## table(fd$method_full) +## table(vapply(records, function(x) x$method_full, character(1))) + +fd <- as.data.frame(fd) + +fd$k <- as.integer(fd$k) +fd$true_k <- as.integer(fd$true_k) + +cols_to_num <- c("max_rss","max_vms","max_uss","max_pss", + "io_in","io_out","mean_load","cpu_time", + 'metric_value') + +fd[cols_to_num] <- lapply(fd[cols_to_num], function(x) { + x[x == "NA"] <- NA_character_ + as.numeric(x) +}) + +fd <- fd[!is.na(fd$metric_value),] +fd$k_offset <- fd$k - fd$true_k +## write.csv(fd, file = 'aggregated_results.csv') ## later, this needs extra cleaning +``` + +# QC + +Is the speed of computing the metric, or of running the method? + +```{r} +print(ggplot(fd[fd$backend %in% c("conda","oras","envmodules"), ], + aes(x = backend, y = cpu_time, fill = backend)) + + geom_boxplot(outlier.alpha = 0.3) + + theme_minimal(base_size = 14) + + labs(title = "CPU time by backend", + x = "Backend", + y = "CPU Time (s)") + + scale_fill_brewer(palette = "Set2")) +``` + +Clearly the method, so all good. With segments following dataset/method/params. + +```{r} +print(ggplot(fd[fd$backend %in% c("conda","oras","envmodules"), ], + aes(x = backend, y = cpu_time, fill = backend)) + + geom_boxplot(outlier.alpha = 0.3) + + # add points for each method/params combination + geom_point(alpha = 0.6, position = position_jitter(width = 0.15)) + + # connect points across backends for same module+params + geom_line(aes(group = interaction(method, method_full, seed, run, generator, dataset_name, k)), + alpha = 0.1, color = "grey40") + + facet_wrap(~metric_name, scales = "free_y") + + theme_minimal(base_size = 14) + + labs(title = "CPU time by backend", + x = "Backend", + y = "CPU time (s)") + + scale_color_brewer(palette = "Set1") +) +``` + +# Consistency checks + +Are there cputimes that are redudant/incosistent for repeated runs? + +```{r} +keys <- c("dataset_name", "method", "method_full", "generator", + "backend", "k", "seed", "run") + +fd_inconsistencies <- fd %>% + group_by(across(all_of(keys))) %>% + summarise( + n_cpu_time = n_distinct(cpu_time, na.rm = TRUE), + cpu_times = list(unique(cpu_time)), + .groups = "drop" + ) %>% + filter(n_cpu_time > 1) + +stopifnot(nrow(fd_inconsistencies) == 0) +``` + +Seed consistencies + +```{r} +keys <- c("dataset_name", "method", "method_full", "generator", + "backend", "run", "k", "metric_name") + +fd_seed_diff <- fd %>% + group_by(across(all_of(keys))) %>% + summarise( + n_seeds = n_distinct(seed), + n_metric_values = n_distinct(metric_value, na.rm = TRUE), + metric_values = list(unique(metric_value)), + .groups = "drop" + ) %>% + filter(n_seeds > 1 & n_metric_values > 1) + +stopifnot(nrow(fd_seed_diff) == 0) +``` + +`k` inconsistencies + + +```{r} +fd <- fd[!is.na(fd$k),] +stopifnot(range(fd$k_offset) == c(-2, 2)) +``` + +We are running some methods more times with different seeds, even if they don't use seeds at all. Removing these different "seeds" that are not such but runs. We have repeated/controlled runs separately. + +```{r} + +## seed 2 for seed-unaware methods, all seed for seed-aware methods +fd_clean <- fd %>% + mutate(has_seed = grepl("seed-", method_full)) %>% + filter( + (has_seed) | (!has_seed & seed == 2) # keep all seeds if encoded, else only seed==2 + ) %>% + mutate(seed = ifelse(has_seed, as.character(seed), "none")) +``` + +Before + +```{r} +table(fd$seed, grepl('seed', fd$method_full), useNA = 'always') +``` + +After + +```{r} +table(fd_clean$seed, grepl('seed', fd_clean$method_full), useNA = 'always') + +fd <- fd_clean + +rm(fd_clean) + +write.csv(fd, file = 'aggregated_results.csv') +# write.csv(fd, file = 'aggregated_results_full.csv') + +``` + + + +# CPU by backend + +## QC by backend scatterplots + + +```{r, fig.width = 7, fig.height = 5} + +wide_cpu <- fd %>% + filter(backend %in% c("conda","envmodules","oras")) %>% + group_by(dataset_name, method, method_full, seed, run, backend) %>% + summarise(cpu_time = mean(as.numeric(cpu_time), na.rm = TRUE), .groups = "drop") %>% + pivot_wider(names_from = backend, values_from = cpu_time) + +ggplot(wide_cpu, aes(x = conda, y = oras, color = method)) + + geom_point(alpha = 0.6) + + labs(title = "CPU time: conda vs oras", + x = "conda", y = "oras") + + theme_minimal() + +ggplot(wide_cpu, aes(x = conda, y = envmodules, color = method)) + + geom_point(alpha = 0.6) + + labs(title = "CPU time: conda vs envmodules", + x = "conda", y = "envmodules") + + theme_minimal() + +ggplot(wide_cpu, aes(x = oras, y = envmodules, color = method)) + + geom_point(alpha = 0.6) + + labs(title = "CPU time: oras vs envmodules", + x = "oras", y = "envmodules") + + theme_minimal() + + +``` + +## Censoring aware : cpu_time < 0.05 plotted as 0.05 + +We impute NA cpu_time as 0.05 s + +```{r, fig.width = 12, fig.height = 8} + +min(fd$cpu_time, na.rm = TRUE) +fd$imputed_cpu_time <- ifelse(is.na(fd$cpu_time), no = fd$cpu_time, yes = 0.05) +fd$censored_cpu_time <- is.na(fd$cpu_time) + +## also, we average for repeated runs, seeds and ks + +fd_avg <- fd %>% + filter(backend %in% c("conda","oras","envmodules")) %>% + group_by(backend, method, method_full, generator, dataset_name) %>% + summarise(imputed_cpu_time = mean(imputed_cpu_time, na.rm = TRUE), + censored_cpu_time = any(censored_cpu_time), + .groups = "drop") + + +## str(fd_avg) +## head(fd_avg) + +ggplot(fd_avg, aes(x = backend, y = imputed_cpu_time)) + + geom_boxplot(outlier.alpha = 0.3) + + geom_point(aes(color = method_full, shape = censored_cpu_time), + alpha = 0.6, position = position_jitter(width = 0.15)) + + geom_line(aes(group = interaction(method, method_full, generator, dataset_name)), + alpha = 0.2, color = "grey40") + + theme_minimal(base_size = 14) + + facet_wrap(~method, scales = "free_y") + + labs(title = "CPU time by backend - censored", + x = "Backend", + y = "CPU time (s)") + + scale_y_sqrt() + + +ggplot(fd_avg, aes(x = backend, y = imputed_cpu_time)) + + geom_boxplot(outlier.alpha = 0.3) + + geom_point(aes(color = method_full, shape = censored_cpu_time), + alpha = 0.6, position = position_jitter(width = 0.15)) + + geom_line(aes(group = interaction(method, method_full, generator, dataset_name)), + alpha = 0.2, color = "grey40") + + theme_minimal(base_size = 14) + + facet_wrap(~method, scales = "free_y") + + labs(title = "CPU time by backend - censored", + x = "Backend", + y = "CPU time (s)") + +``` + +# Results consistency across backends + +# Pairwise correlations + +```{r, fig.width = 7, fig.height = 14} +wide_run <- fd %>% + filter(backend %in% c("conda","oras","envmodules")) %>% + group_by(dataset_name, method, k, metric_name, run, seed, backend) %>% + summarise(metric_value = mean(as.numeric(metric_value), na.rm = TRUE), .groups = "drop") %>% + tidyr::pivot_wider(names_from = backend, values_from = metric_value, values_fill = NA) + + +# compute correlations per metric_name AND seed and for pairwise complete obs +cors_seed <- wide_run %>% + group_by(metric_name, seed) %>% + summarise( + cor_conda_oras = if(sum(complete.cases(conda, oras)) > 1) + cor(conda, oras, use = "complete.obs") else NA_real_, + cor_conda_envmodules = if(sum(complete.cases(conda, envmodules)) > 1) + cor(conda, envmodules, use = "complete.obs") else NA_real_, + cor_oras_envmodules = if(sum(complete.cases(oras, envmodules)) > 1) + cor(oras, envmodules, use = "complete.obs") else NA_real_, + .groups = "drop") + + +print(cors_seed) + +cors_long <- cors_seed %>% + pivot_longer( + cols = starts_with("cor_"), + names_to = "pair", + values_to = "correlation") + + +ggplot(cors_long, aes(x = pair, y = interaction(metric_name, seed), fill = correlation)) + + geom_tile(color = "white") + + geom_text(aes(label = round(correlation, 2)), color = "black", size = 3) + + scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, + limits = c(-1, 1), na.value = "grey90") + + theme_minimal(base_size = 12) + + labs(title = "Backend correlations per metric and seed", + x = "Backend pair", + y = "Metric and seed", + fill = "cor coef") + +``` + +And the repeated runs? + +```{r} + +wide_run <- fd %>% + filter(backend %in% c("conda","oras","envmodules")) %>% + group_by(dataset_name, method, k, metric_name, run, seed, backend) %>% + summarise(metric_value = mean(as.numeric(metric_value), na.rm = TRUE), .groups = "drop") %>% + unite("backend_run", backend, run, sep = "_") %>% # combine backend and run + pivot_wider(names_from = backend_run, values_from = metric_value, values_fill = NA) + + +# only correlating aggregated metrics +num_cols <- wide_run %>% + select(where(is.numeric), -k, -seed) + + +# pairwise cors, so including repeated runs with the same backend +cors_all <- cor(num_cols, use = "pairwise.complete.obs") + +cors_df <- melt(cors_all) + +# heatmap with correlation coefficients +ggplot(cors_df, aes(x = Var1, y = Var2, fill = value)) + + geom_tile(color = "white") + + geom_text(aes(label = round(value, 2)), color = "black", size = 3) + + scale_fill_gradient2(low = "blue", mid = "white", high = "red", midpoint = 0, + limits = c(-1, 1), na.value = "grey90") + + theme_minimal(base_size = 12) + + labs(title = "Metric correlation per backend and repeated run", + x = "backend and run", y = "backend and run", fill = "Correlation") + + +``` + +# Computational performance metrics + +## Non-complete obs + +```{r, fig.width = 10, fig.height=10} + +perf_metrics <- c("cpu_time","max_rss","max_vms","max_uss", + "max_pss","io_in","io_out","mean_load", "imputed_cpu_time") + +fd_long <- fd %>% + filter(backend %in% c("conda","oras","envmodules")) %>% + pivot_longer(cols = all_of(perf_metrics), + names_to = "metric", + values_to = "value") + +## again, averaging repeated runs with the same seed +fd_avg <- fd_long %>% + group_by(backend, seed, run, metric, dataset_name, method_full) %>% + summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop") + +## head(fd_avg) + +# boxplots + jittered scatter, faceted by metric +ggplot(fd_avg, aes(x = backend, y = mean_value, fill = backend)) + + geom_boxplot(outlier.alpha = 0.3) + + geom_jitter(width = 0.2, alpha = 0.1, size = 1, color = "black") + + facet_wrap(~metric, scales = "free_y") + + theme_minimal(base_size = 14) + + labs(title = "Performance metrics by backend", + x = "Backend", + y = "Value") + + scale_fill_brewer(palette = "Set2") +``` + +## Complete obs + +Now the same but making sure only items that are measured across all backends (so no NAs because they're too quick and not profiled) are shown + +```{r, fig.width = 10, fig.height=10} + +fd_long_complete <- fd %>% + filter(backend %in% c("conda","oras","envmodules")) %>% + pivot_longer(cols = all_of(perf_metrics), + names_to = "metric", + values_to = "value") %>% + group_by(method_full, dataset_name, seed, run, generator, k, metric) %>% + filter(n_distinct(backend) == 3, !any(is.na(value))) %>% + ungroup() + +fd_avg <- fd_long_complete %>% + group_by(backend, seed, run, metric, dataset_name, method_full) %>% + summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop") + +ggplot(fd_avg, aes(x = backend, y = mean_value, fill = backend)) + + geom_boxplot(outlier.alpha = 0.3) + + geom_jitter(width = 0.2, alpha = 0.1, size = 1, color = "black") + + facet_wrap(~metric, scales = "free_y") + + theme_minimal(base_size = 14) + + labs(title = "Perf metrics by backend (only complete groups)", + x = "backend", + y = "perf value") + + scale_fill_brewer(palette = "Set2") +``` + +## Non-complete observations colored by method + +```{r, fig.width = 15, fig.height = 10} +fd_long <- fd %>% + pivot_longer(cols = all_of(perf_metrics), + names_to = "metric", + values_to = "value") + +fd_avg <- fd_long %>% + group_by(backend, seed, run, metric, dataset_name, method_full) %>% + summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop") + +## head(fd_avg) +## table(fd_avg$seed, fd_avg$run) +## unique(fd_avg$method_full) + +ggplot(fd_avg, aes(x = backend, y = mean_value)) + + # points colored by method, seed and other params + geom_point(aes(color = method_full), + alpha = 0.6, + position = position_jitter(width = 0.15)) + + # connect points across backends for same method/run/etc. + geom_line(aes(group = interaction(method_full, seed, run, dataset_name)), + alpha = 0.1) + + facet_wrap(~metric, scales = "free_y") + + theme_minimal(base_size = 14) + + labs(title = "Performance metrics by backend", + x = "Backend", + y = "Value") + +``` + +## Non-complete observations colored by method provider + +```{r, fig.width = 15, fig.height = 10} +fd_long <- fd %>% + pivot_longer(cols = all_of(perf_metrics), + names_to = "metric", + values_to = "value") + +fd_avg <- fd_long %>% + group_by(backend, seed, run, metric, dataset_name, method_full, method) %>% + summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop") + +## head(fd_avg) +## table(fd_avg$seed, fd_avg$run) +## unique(fd_avg$method_full) + +ggplot(fd_avg, aes(x = backend, y = mean_value)) + + # points colored by method, seed and other params + geom_point(aes(color = method), + alpha = 0.6, + position = position_jitter(width = 0.15)) + + # connect points across backends for same method/run/etc. + geom_line(aes(group = interaction(method, seed, run, dataset_name)), + alpha = 0.1) + + facet_wrap(~metric, scales = "free_y") + + theme_minimal(base_size = 14) + + labs(title = "Performance metrics by backend", + x = "Backend", + y = "Value") + +``` + +# Choice of `k` - and its misspecification + +## Deviation of performance at false k vs true k + +Mind there is no such a thing as true k, we use the first labelset. + +```{r, fig.height = 10, fig.width = 13} +fd$metric_value <- as.numeric(fd$metric_value) + +fd_dev_k <- fd %>% + group_by(method_full, dataset_name, metric_name) %>% + # get the metric_value at the true_k for this group + mutate(true_value = metric_value[k == true_k][1], + deviation_k = metric_value - true_value) %>% + ungroup() + +## summary(fd_dev_k$deviation_k) + +## str(fd_dev_k) +ggplot(fd_dev_k, aes(x = factor(k), y = deviation_k, color = method_full)) + + geom_point(alpha = 0.6, position = position_jitter(width = 0.15)) + + facet_wrap(~ metric_name, scales = "free_y") + + theme_minimal(base_size = 14) + + labs(title = "Deviation of metric across ks", + subtitle = "Deviation vs value at true k", + x = "k", + y = "Perf metric deviation from true k") +``` + +## By k offset + + +```{r, fig.width = 12, fig.height = 9} +## str(fd) +fd_dev_k <- fd %>% + group_by(method_full, dataset_name, metric_name) %>% + # get the metric_value at the true_k for this group + mutate(true_value = metric_value[k == true_k][1], + deviation_k = metric_value - true_value) %>% + ungroup() + +## again aggregate across repeated runs - not seeds, given the method_full has them in its values +fd_dev_k_avg <- fd_dev_k %>% + group_by(method_full, dataset_name, metric_name, k, k_offset) %>% + summarise( + mean_deviation_k = mean(deviation_k, na.rm = TRUE), + .groups = "drop" + ) + +ggplot(fd_dev_k_avg, aes(x = k_offset, y = mean_deviation_k, color = method_full)) + + geom_point(alpha = 0.6, position = position_jitter(width = 0.15, height = 0)) + + facet_wrap(~ metric_name, scales = "free_y") + + theme_minimal(base_size = 14) + + labs(title = "Performance impact of k offsets", + subtitle = "Deviation vs value at true k", + x = "Offset from true k", + y = "Perf metric deviation from true k") + +ggplot(fd_dev_k_avg, aes(x = k_offset, y = mean_deviation_k, color = dataset_name)) + + geom_point(alpha = 0.6, position = position_jitter(width = 0.15, height = 0)) + + facet_wrap(~ metric_name, scales = "free_y") + + theme_minimal(base_size = 14) + + labs(title = "Performance impact of k offsets", + x = "Offset from true k", + y = "Perf metric deviation from true k") + +``` + +adj Rand index only + +```{r, fig.height = 6, fig.width = 16} +# ilter to adjusted_rand_score only +fd_dev_k_ars <- fd_dev_k_avg %>% + filter(metric_name == "adjusted_rand_score") + +# by method_full +ggplot(fd_dev_k_ars, aes(x = k_offset, y = mean_deviation_k, color = method_full)) + + geom_point(alpha = 0.6, position = position_jitter(width = 0.15, height = 0)) + + theme_minimal(base_size = 14) + + labs(title = "Performance impact of k offsets", + subtitle = "Deviation vs value at true k", + x = "Offset from true k", + y = "Deviation in adjusted_rand_score") + +# by dataset_name +ggplot(fd_dev_k_ars, aes(x = k_offset, y = mean_deviation_k, color = dataset_name)) + + geom_point(alpha = 0.6, position = position_jitter(width = 0.15, height = 0)) + + theme_minimal(base_size = 14) + + labs(title = "Performance impact of k offsets", + x = "Offset from true k", + y = "Deviation in adjusted_rand_score") + +``` + + +# Comp vs clustering performance trade-offs + +Caution only CPU's NAs are handled, not other perf metrics + +```{r, fig.width = 12, fig.height = 12} +# aggregate across runs and ks again... +fd_avg <- fd %>% + group_by(method_full, backend, dataset_name, metric_name) %>% + summarise( + mean_imputed_cpu_time = mean(imputed_cpu_time, na.rm = TRUE), + mean_max_rss = mean(max_rss, na.rm = TRUE), + mean_metric = mean(metric_value, na.rm = TRUE), + .groups = "drop" + ) + +# imputed cpu time vs metric +print( + ggplot(fd_avg, aes(x = mean_imputed_cpu_time, y = mean_metric, + color = method_full, shape = backend)) + + geom_point(alpha = 0.6) + + facet_wrap(~metric_name, scales = "free_y") + + theme_minimal(base_size = 14) + + labs(title = "Clustering metrics vs runtime trade‑offs", + x = "Mean CPU time (s)", + y = "Mean clustering metric value") +) + +# RSS vs metric, caution here no imputation / no idea how to handle memory for censored cpu_time data +print( + ggplot(fd_avg, aes(x = mean_max_rss, y = mean_metric, + color = method_full, shape = backend)) + + geom_point(alpha = 0.6) + + facet_wrap(~metric_name, scales = "free_y") + + theme_minimal(base_size = 14) + + labs(title = "Clustering metrics vs RSS trade‑offs", + x = "Mean max RSS (MB)", + y = "Mean clustering metric value") +) + +``` + +# Bland Altmans for ks vs true k + +Again there is no such a thing as a true k + +## Colored by method + +```{r, fig.width = 15, fig.height = 15, warning = FALSE} +fd_dev_true <- fd %>% + group_by(method_full, dataset_name, metric_name) %>% + # get the metric value at true_k + mutate(true_value = metric_value[k == true_k][1], + diff_val = metric_value - true_value, + mean_val = (metric_value + true_value)/2) %>% + ungroup() + +## # Bland–Altman style plot: any k vs true_k +## ggplot(fd_dev_true, aes(x = mean_val, y = diff_val, color = method_full)) + +## geom_point(alpha = 0.6) + +## geom_hline(yintercept = mean(fd_dev_true$diff_val, na.rm = TRUE), +## linetype = "dashed", color = "blue") + +## geom_hline(yintercept = mean(fd_dev_true$diff_val, na.rm = TRUE) + 1.96*sd(fd_dev_true$diff_val, na.rm = TRUE), +## linetype = "dotted", color = "red") + +## geom_hline(yintercept = mean(fd_dev_true$diff_val, na.rm = TRUE) - 1.96*sd(fd_dev_true$diff_val, na.rm = TRUE), +## linetype = "dotted", color = "red") + +## facet_wrap(~metric_name, scales = "free") + +## theme_minimal(base_size = 14) + +## labs(title = "Bland–Altman: any k vs true_k", +## x = "Mean of k and true_k", +## y = "Difference (k - true_k)") + +# collapse across seeds and runs +fd_dev_true_avg <- fd_dev_true %>% + group_by(method_full, dataset_name, metric_name, k, k_offset) %>% + summarise( + mean_diff_val = mean(diff_val, na.rm = TRUE), + mean_mean_val = mean(mean_val, na.rm = TRUE), + .groups = "drop" + ) + +ggplot(fd_dev_true_avg, aes(x = mean_mean_val, y = mean_diff_val, color = method_full)) + + geom_point(alpha = 0.6) + + geom_hline(yintercept = mean(fd_dev_true_avg$mean_diff_val, na.rm = TRUE), + linetype = "dashed", color = "blue") + + geom_hline(yintercept = mean(fd_dev_true_avg$mean_diff_val, na.rm = TRUE) + 1.96*sd(fd_dev_true_avg$mean_diff_val, na.rm = TRUE), + linetype = "dotted", color = "red") + + geom_hline(yintercept = mean(fd_dev_true_avg$mean_diff_val, na.rm = TRUE) - 1.96*sd(fd_dev_true_avg$mean_diff_val, na.rm = TRUE), + linetype = "dotted", color = "red") + + facet_wrap(~metric_name, scales = "free") + + theme_minimal(base_size = 14) + + labs(title = "Bland Altman: any k vs true_k (averaged)", + x = "Mean of k and true_k (averaged)", + y = "Difference (k - true_k, averaged)") + + +``` + +## Colored by dataset + +```{r, fig.width = 15, fig.height = 15, warning = FALSE} + +ggplot(fd_dev_true_avg, aes(x = mean_mean_val, y = mean_diff_val, color = dataset_name)) + + geom_point(alpha = 0.6) + + geom_hline(yintercept = mean(fd_dev_true_avg$mean_diff_val, na.rm = TRUE), + linetype = "dashed", color = "blue") + + geom_hline(yintercept = mean(fd_dev_true_avg$mean_diff_val, na.rm = TRUE) + 1.96*sd(fd_dev_true_avg$mean_diff_val, na.rm = TRUE), + linetype = "dotted", color = "red") + + geom_hline(yintercept = mean(fd_dev_true_avg$mean_diff_val, na.rm = TRUE) - 1.96*sd(fd_dev_true_avg$mean_diff_val, na.rm = TRUE), + linetype = "dotted", color = "red") + + facet_wrap(~metric_name, scales = "free") + + theme_minimal(base_size = 14) + + labs(title = "Bland Altman: any k vs true_k (averaged)", + x = "Mean of k and true_k (averaged)", + y = "Difference (k - true_k, averaged)") + + +``` diff --git a/parse_results.py b/parse_results.py old mode 100755 new mode 100644 index a3e60a9..c07cebd --- a/parse_results.py +++ b/parse_results.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 """ -Simple script to parse clustbench results with glob pattern matching. +Parse clustbench results with glob pattern matching. -Pattern: out-{backend}-{rep}/data/clustbench/dataset_generator-{generator}_dataset_name-{name}/clustering/{method} +Pattern: +out-{backend}_seed-{seed}_run-{run}/data/clustbench/dataset_generator-{generator}_dataset_name-{name}/clustering/{method} """ import csv @@ -11,66 +12,92 @@ import re from pathlib import Path from typing import Dict, List, Optional +import sys - -def parse_result_path(path: Path) -> Dict[str, str]: +def parse_result_path(path: Path) -> List[Dict[str, str]]: """ - Parse a result path and extract components. + Parse a result path and extract components: + - backend, seed, run (from out_* directories) + - dataset generator and dataset name + - method (immediate folder after clustering/) + - method_full (method + variant symlink/subdir) - Pattern: out-{backend}-{rep}/data/clustbench/dataset_generator-{generator}_dataset_name-{name}/clustering/{method} + Returns a list of dicts, one per available variant directory under {method}. """ parts = path.parts - - result = {} - - # Parse out-{backend}-{rep} - out_match = re.match(r'out-([^-]+)-(\d+)', parts[0]) + base_result: Dict[str, str] = {} + + # print("DEBUG parts:", parts, file = sys.stderr) + # parse out_{backend}_seed_{seed}_run_{run} + out_match = re.match( + r"out_(?P[a-zA-Z0-9]+)_seed_(?P\d+)_run_(?P\d+)", + parts[1] + ) if out_match: - result['backend'] = out_match.group(1) - result['rep'] = out_match.group(2) + base_result["backend"] = out_match.group("backend") + base_result["seed"] = out_match.group("seed") + base_result["run"] = out_match.group("run") - # Find dataset_generator part + # find dataset_generator part for part in parts: - if part.startswith('dataset_generator-'): - # Parse dataset_generator-{generator}_dataset_name-{name} - dataset_match = re.match(r'dataset_generator-([^_]+)_dataset_name-(.+)', part) + if part.startswith("dataset_generator-"): + dataset_match = re.match( + r"dataset_generator-([^_]+)_dataset_name-(.+)", part + ) if dataset_match: - result['generator'] = dataset_match.group(1) - result['dataset_name'] = dataset_match.group(2) + base_result["generator"] = dataset_match.group(1) + base_result["dataset_name"] = dataset_match.group(2) break - # The method is the last part (after clustering/) - if 'clustering' in parts: - clustering_idx = parts.index('clustering') - if clustering_idx + 1 < len(parts): - result['method'] = parts[clustering_idx + 1] + results: List[Dict[str, str]] = [] - result['path'] = str(path) + # The method is the folder after clustering/ + if "clustering" in parts: + clustering_idx = parts.index("clustering") + if clustering_idx + 1 < len(parts): + method_dir = parts[clustering_idx + 1] + base_result["method"] = method_dir + + method_path = path + if method_path.is_dir(): + for child in method_path.iterdir(): + # skip hidden dirs, hashes, and metrics folder + if child.name.startswith("."): + continue + if re.fullmatch(r"[0-9a-f]{32,}", child.name): + continue + if re.fullmatch(r"[0-9a-f]{8,}", child.name): + continue + if child.name == "metrics": + continue + + if child.is_symlink() or child.is_dir(): + r = base_result.copy() + r["method_full"] = f"{method_dir}_{child.name}" + r["path"] = str(child) + results.append(r) + else: + r = base_result.copy() + r["method_full"] = "/".join(parts[clustering_idx + 1:]) + r["path"] = str(path) + results.append(r) - return result + return results def parse_performance_file(perf_file: Path) -> Optional[Dict]: - """ - Parse a clustbench_performance.txt file (TSV format). - - Returns: - Dictionary with performance metrics, or None if file doesn't exist - """ + """Parse a clustbench_performance.txt file (TSV format).""" if not perf_file.exists(): return None try: with open(perf_file, 'r') as f: reader = csv.DictReader(f, delimiter='\t') - # Get the first (and only) data row for row in reader: - # Convert values to appropriate types result = {} for key, value in row.items(): if value: value = value.strip() - # Keep h:m:s as string, convert others to float if key == 'h:m:s': result[key] = value else: @@ -88,208 +115,158 @@ def parse_performance_file(perf_file: Path) -> Optional[Dict]: def parse_metric_scores(scores_file: Path) -> Optional[Dict[str, float]]: - """ - Parse a clustbench.scores.gz file. - - Format: - k=2,k=2,k=2,k=3,k=4 - 1.0,1.0,1.0,0.7671742903354675,0.7289468426413069 - - Returns: - Dictionary mapping k values to scores, or None if file doesn't exist - """ + """Parse a clustbench.scores.gz file into {k: score} dict.""" if not scores_file.exists(): return None - + try: with gzip.open(scores_file, 'rt') as f: lines = f.readlines() - + if len(lines) != 2: return {'error': f'Expected 2 lines, got {len(lines)}'} - - # Parse header (k values) - extract integers from "k=2" format - k_strings = [k.strip() for k in lines[0].strip().split(',')] + + k_strings = [k.strip().strip('"') for k in lines[0].strip().split(',')] k_values = [] for k_str in k_strings: - match = re.match(r'k=(\d+)', k_str) - if match: - k_values.append(int(match.group(1))) + m = re.match(r'k=(\d+)', k_str) + if m: + k_values.append(int(m.group(1))) else: return {'error': f'Invalid k format: {k_str}'} - - # Parse scores - scores = [float(s.strip()) for s in lines[1].strip().split(',')] - + + score_strings = [s.strip().strip('"') for s in lines[1].strip().split(',')] + scores = [] + for s in score_strings: + try: + scores.append(float(s)) + except ValueError: + return {'error': f'Invalid score: {s}'} + if len(k_values) != len(scores): return {'error': f'Mismatch: {len(k_values)} k values, {len(scores)} scores'} - - # Build result dict, checking for duplicate k values with different scores + result = {} for k, score in zip(k_values, scores): - if k in result: - # Check if the score is different - if abs(result[k] - score) > 1e-10: - raise ValueError(f'Duplicate k value {k} with different scores: {result[k]} vs {score}') - else: - result[k] = score - + if k in result and abs(result[k] - score) > 1e-10: + return {'error': f'Duplicate k {k} with differing scores'} + result[k] = score + return result - + except Exception as e: return {'error': str(e)} -def parse_metrics(param_dir: Path) -> Dict[str, Dict[str, Dict[str, float]]]: - """ - Parse metrics from a parameter directory. - - Structure: {param_dir}/metrics/{metric_family}/metric-{metric_name}/clustbench.scores.gz - - Returns: - Nested dict: {metric_family: {metric_name: {k: score}}} - """ +def parse_metrics(config_dir: Path) -> Dict[str, Dict[str, Dict[str, float]]]: + """Parse metrics from a configuration directory.""" metrics = {} - metrics_dir = param_dir / 'metrics' - + metrics_dir = config_dir / 'metrics' if not metrics_dir.exists(): return metrics - - # Iterate over metric families + for family_dir in metrics_dir.iterdir(): if not family_dir.is_dir(): continue - family_name = family_dir.name metrics[family_name] = {} - - # Iterate over metrics in this family for metric_dir in family_dir.iterdir(): if not metric_dir.is_dir(): continue - - # Extract metric name from metric-{name} pattern metric_match = re.match(r'metric-(.+)', metric_dir.name) if not metric_match: continue - metric_name = metric_match.group(1) - - # Parse the scores file scores_file = metric_dir / 'clustbench.scores.gz' scores = parse_metric_scores(scores_file) - if scores: metrics[family_name][metric_name] = scores - return metrics -def find_results(base_dir: str = '.', pattern: str = 'out-*/data/clustbench/dataset_generator-*/clustering/*') -> List[Dict[str, str]]: +def find_results( + base_dir: str = ".", + pattern: str = "results/out_*/data/clustbench/dataset_generator-*/clustering/*" +) -> List[Dict[str, str]]: """ - Find all result directories matching the pattern. - - Args: - base_dir: Base directory to search from - pattern: Glob pattern to match - - Returns: - List of parsed result dictionaries + Return one record per configuration folder with parameters, performance, and metrics. """ base_path = Path(base_dir) - results = [] + results: List[Dict[str, str]] = [] for path in base_path.glob(pattern): - if path.is_dir(): - # Skip hidden directories (starting with .) - if not any(part.startswith('.') for part in path.parts): - parsed = parse_result_path(path) - - # Find all parameter directories (subdirectories with parameter patterns) - param_dirs = [d for d in path.iterdir() if d.is_dir() and not d.name.startswith('.')] - - if param_dirs: - # Parse configurations and their performance - parsed['configurations'] = [] - - # Assume first param_dir for method-level data - first_param_dir = param_dirs[0] - - # Parse performance file at method level - perf_file = first_param_dir / 'clustbench_performance.txt' - performance = parse_performance_file(perf_file) - if performance: - parsed['performance'] = performance - - # Parse metrics at method level - metrics = parse_metrics(first_param_dir) - if metrics: - parsed['metrics'] = metrics - - # Add method_params and method_full at method level - method_params = first_param_dir.name - - # Extract method from method-{method} pattern if present - method_match = re.match(r'method-([^_]+)', method_params) - if method_match: - extracted_method = method_match.group(1) - parsed['method'] = extracted_method - - method_full = f"{parsed.get('method', '')}_{method_params}" - parsed['method_params'] = method_params - parsed['method_full'] = method_full - - for param_dir in param_dirs: - # Load parameters.json if it exists - params_file = param_dir / 'parameters.json' - parameters = None - if params_file.exists(): - try: - with open(params_file, 'r') as f: - parameters = json.load(f) - except Exception as e: - parameters = {'error': str(e)} - - config = { - 'parameter_dir': param_dir.name, - 'parameters': parameters - } + if not path.is_dir(): + continue + if any(part.startswith(".") for part in path.parts): + continue - parsed['configurations'].append(config) + variants = parse_result_path(path) + for variant in variants: + config_dir = Path(variant["path"]) + if not config_dir.is_dir() or config_dir.name == "metrics": + continue - results.append(parsed) + record = variant.copy() + + # Parameters + params_file = config_dir / "parameters.json" + parameters = None + if params_file.exists(): + try: + with open(params_file, "r") as f: + parameters = json.load(f) + except Exception as e: + parameters = {"error": str(e)} + record["parameters"] = parameters + record["parameter_dir"] = config_dir.name + + # Performance + perf_file = config_dir / "clustbench_performance.txt" + performance = parse_performance_file(perf_file) + if performance: + record["performance"] = performance + + # Metrics + metrics = parse_metrics(config_dir) + if metrics: + record["metrics"] = metrics + + # Normalize method name + m = re.match(r"method-([^_]+)", config_dir.name) + if m: + record["method"] = m.group(1) + + # Ensure method_full includes config dir name once + variant_name = record["method_full"] + if config_dir.name not in variant_name: + record["method_full"] = f"{variant_name}_{config_dir.name}" + + record["path"] = str(config_dir) + results.append(record) return results def main(): - """Main function to run the parser.""" - # Find all matching results results = find_results() - - # Print as JSON print(json.dumps(results, indent=2)) - # Print summary - print(f"\n# Found {len(results)} result directories", file=__import__('sys').stderr) - - # Group by backend, generator, method - by_backend = {} - by_generator = {} - by_method = {} + # Summary to stderr + import sys + print(f"\n# Found {len(results)} result directories", file=sys.stderr) + by_backend, by_generator, by_method = {}, {}, {} for r in results: backend = r.get('backend', 'unknown') generator = r.get('generator', 'unknown') method = r.get('method', 'unknown') - by_backend[backend] = by_backend.get(backend, 0) + 1 by_generator[generator] = by_generator.get(generator, 0) + 1 by_method[method] = by_method.get(method, 0) + 1 - print(f"# By backend: {by_backend}", file=__import__('sys').stderr) - print(f"# By generator: {by_generator}", file=__import__('sys').stderr) - print(f"# By method: {by_method}", file=__import__('sys').stderr) + print(f"# By backend: {by_backend}", file=sys.stderr) + print(f"# By generator: {by_generator}", file=sys.stderr) + print(f"# By method: {by_method}", file=sys.stderr) if __name__ == '__main__':