Skip to content
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: stoner
Title: Support for Building VIMC Montagu Touchstones, using Dettl
Version: 0.1.21
Version: 0.1.22
Authors@R:
c(person("Wes", "Hinsley",role = c("aut", "cre", "cst", "dnc", "elg", "itr", "sng", "ard"),
email = "w.hinsley@imperial.ac.uk"),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(stone_load)
export(stone_stochastic_central)
export(stone_stochastic_cert_verify)
export(stone_stochastic_graph)
export(stone_stochastic_make_meta)
export(stone_stochastic_process)
export(stone_stochastic_standardise)
export(stone_stochastic_upload)
Expand Down
76 changes: 76 additions & 0 deletions R/stochastic_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,19 @@ stone_stochastic_standardise <- function(
}
}

# Note that for MenA, we want to keep the _cwyx outcomes.

if (missing_run_id_fix) {
if ((!"run_id" %in% names(d)) && (length(index) == 200)) d$run_id <- j
}

# Remove columns "X" and "X.1" that have crept in with some of the
# inputs saved with row.names

d[["X"]] <- NULL
d[["X.1"]] <- NULL



# Round to integer, as per guidance. (Not using as.integer, as that
# has limits on how large numbers can be, so we are just truncating
Expand Down Expand Up @@ -257,3 +266,70 @@ stone_stochastic_central <- function(base, touchstone, disease, group,
outfile <- sprintf("%s_%s_central.pq", group, scenario)
arrow::write_parquet(central, file.path(path, outfile))
}



##' Create a `meta.csv` file in the root of the standardised
##' stochastics. The columns contain scalars of `touchstone`,
##' `disease`, `group`, `scenario` - and for each row, a
##' semi-colon-separated lists for `countries` and `outcomes`.
##' This is useful for making the stochastic explorer faster
##' on startup (otherwise it has to sample all of the files
##' each time you run it) - and also it is a good general
##' record of all the stochastic data we have.
##'
##' This does mean that we should re-create the meta data
##' each time we make changes to the standardised stochastic
##' data though.
##'
##' @export
##' @title Produce `meta.csv` summary of the structure and
##' content of a standardised stochastic data folder.
##' @importFrom data.table rbindlist
##' @importFrom utils write.csv
##' @param path The root folder of the stochastic data.

stone_stochastic_make_meta <- function(path) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we check that path exists and is write-able? checkmate::test_path_for_output() or something similar would work.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


explore_files <- function(touchstone, folder, disease, group) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we get docs on the expectations for these args? How are these injected into this scope? Is the expectation that they're present in the global scope?

It might be worth adding defaults to the fn signature - perhaps drawing from package constants?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And perhaps some input checking so users know how and why their fn calls fail would be great.

Copy link
Copy Markdown
Contributor Author

@weshinsley weshinsley May 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

explore_files and touchstone_meta are only called from the code further down in stone_stochastic_make_meta - the user won't ever call them.

stone_stochastic_make_meta assumes the stochastic file share is perfectly curated (which is an interesting assumption), so it can work out all of these things automatically from the folder and file names.

I'll do some more commenting to clarify

files <- list.files(file.path(path, touchstone, folder))
first <- file.path(path, touchstone, folder, files[1])
ds <- arrow::open_dataset(first)
outcomes <- ds$schema$names
outcomes <- outcomes[!outcomes %in% c("run_id", "disease", "year", "age",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see a candidate for a package constant...

"country", "cohort_size")]
outcomes <- sort(unique(tolower(outcomes)))

files <- strsplit(list.files(file.path(path, touchstone, folder)), "_")

scenarios <- unique(unlist(lapply(files, `[[`, 2)))
df <- data.frame()
for (scenario in scenarios) {
matches <- files[unlist(lapply(files, `[[`, 2)) == scenario]
countries <- unique(unlist(lapply(matches, `[[`, 3)))
countries <- gsub(".pq", "", countries)
Comment on lines +305 to +310
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we get a comment or a section in the fn docs with what's intended here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some comments throughout that section...

df <- rbind(df, data.frame(
touchstone = touchstone,
disease = disease,
group = group,
scenario = scenario,
countries = paste0(countries, collapse = ";"),
outcomes = paste0(outcomes, collapse = ";")
))
}
df
}

touchstone_meta <- function(touchstone) {
entries <- list.files(file.path(path, touchstone))
data.table::rbindlist(lapply(entries, function(x) {
xs <- strsplit(x, "_")[[1]]
explore_files(touchstone, x, xs[1], xs[2])
}))
}

touchstones <- basename(list.dirs(paste0(path, "/"), recursive = FALSE))
res <- data.table::rbindlist(lapply(touchstones, touchstone_meta))
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we apply this pattern - rbindlist() applied to a functional over a list, to the loop above too? No real reason other than it's nice.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, why not. It's tidier...

write.csv(res, file.path(path, "meta.csv"),
row.names = FALSE, quote = FALSE)
}
Loading
Loading