| Title: | Tidy Import, Indexing, and Export of LAS Well Log Data |
|---|---|
| Description: | Provides tools for reading, parsing, indexing, and exporting LAS (Log ASCII Standard) well log files into tidy, analysis-ready tabular formats. The package separates LAS header information and log data into structured components, builds a searchable index across collections of LAS files, and enables reproducible subsetting of wells based on metadata or curve availability. Output tables can be written to CSV or Parquet formats to support large-scale statistical, machine learning, and earth science workflows. The tidy data structure follows Wickham (2014) <doi:10.18637/jss.v059.i10>. The LAS file structure follows the Canadian Well Logging Society LAS standard <https://www.cwls.org/wp-content/uploads/2017/02/Las2_Update_Jan2017.pdf>. |
| Authors: | Hope E. Omodolor [aut, cre] (ORCID: <https://orcid.org/0009-0005-7842-406X>) |
| Maintainer: | Hope E. Omodolor <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 0.1.2 |
| Built: | 2026-05-21 07:37:05 UTC |
| Source: | https://github.com/omodolor/tidylaslog |
List available curve mnemonics in an index
available_curves(index, county = NULL, top_n = NULL)available_curves(index, county = NULL, top_n = NULL)
index |
Output of index_laslogs() |
county |
Optional county filter (character vector) |
top_n |
If not NULL, return only the top N most common curves |
Tibble with MNEM and n (count of wells containing the curve)
td <- tempdir() f <- file.path(td, "a.las") las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1001:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " RHOB.G/C3:", " ~ASCII Log Data", " 1000 80 2.35", " 1001 82 2.36" ) writeLines(las_text, f) idx <- index_laslogs(td) available_curves(idx, top_n = 5)td <- tempdir() f <- file.path(td, "a.las") las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1001:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " RHOB.G/C3:", " ~ASCII Log Data", " 1000 80 2.35", " 1001 82 2.36" ) writeLines(las_text, f) idx <- index_laslogs(td) available_curves(idx, top_n = 5)
Index, filter, pull, and export LAS logs in one call
batch_export_laslogs( dir, out_dir, county = NULL, curves_any = NULL, curves_all = NULL, curves = NULL, output = c("wide", "long"), prefix = NULL, csv = TRUE, parquet = TRUE, write_index = TRUE, index_prefix = NULL )batch_export_laslogs( dir, out_dir, county = NULL, curves_any = NULL, curves_all = NULL, curves = NULL, output = c("wide", "long"), prefix = NULL, csv = TRUE, parquet = TRUE, write_index = TRUE, index_prefix = NULL )
dir |
Folder containing .las files |
out_dir |
Output directory (absolute path, or relative to dir) |
county |
Optional county filter (character vector) |
curves_any |
Optional: keep wells with at least one of these curves |
curves_all |
Optional: keep wells with all of these curves |
curves |
Optional: curves to actually export (defaults to curves_all, else curves_any, else NULL=all) |
output |
"wide" or "long" |
prefix |
Optional file prefix. If NULL, an informative prefix is built. |
csv |
Write CSV? |
parquet |
Write Parquet? |
write_index |
If TRUE, also export wells_index/curves_index/files_index tables |
index_prefix |
Optional prefix for index files (defaults to |
Invisibly returns a list with index, apis, data, output paths, and manifest
td <- tempdir() f <- file.path(td, "a.las") las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1001:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " ~ASCII Log Data", " 1000 80", " 1001 82" ) writeLines(las_text, f) res <- batch_export_laslogs( dir = td, out_dir = file.path(td, "exports"), county = "TEST", curves_any = "GR", output = "wide", csv = TRUE, parquet = FALSE, write_index = TRUE ) names(res)td <- tempdir() f <- file.path(td, "a.las") las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1001:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " ~ASCII Log Data", " 1000 80", " 1001 82" ) writeLines(las_text, f) res <- batch_export_laslogs( dir = td, out_dir = file.path(td, "exports"), county = "TEST", curves_any = "GR", output = "wide", csv = TRUE, parquet = FALSE, write_index = TRUE ) names(res)
Build a FAIR index for a folder of LAS files
index_laslogs(dir)index_laslogs(dir)
dir |
Folder containing .las files |
A list with wells_index, curves_index, files_index
td <- tempdir() f1 <- file.path(td, "a.las") f2 <- file.path(td, "b.las") las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1001:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " ~ASCII Log Data", " 1000 80", " 1001 82" ) writeLines(las_text, f1) writeLines(sub("1111111111", "2222222222", las_text), f2) idx <- index_laslogs(td) names(idx)td <- tempdir() f1 <- file.path(td, "a.las") f2 <- file.path(td, "b.las") las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1001:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " ~ASCII Log Data", " 1000 80", " 1001 82" ) writeLines(las_text, f1) writeLines(sub("1111111111", "2222222222", las_text), f2) idx <- index_laslogs(td) names(idx)
Pull log data for selected wells (optionally selected curves)
pull_laslogs(index, apis, curves = NULL, output = c("long", "wide"))pull_laslogs(index, apis, curves = NULL, output = c("long", "wide"))
index |
Output of index_laslogs() |
apis |
Character vector of API values to load |
curves |
Optional curve mnemonics to keep (e.g., c("GR","RHOB","NPHI")) |
output |
"long" (tidy) or "wide" (ML-ready) |
A tibble combining all selected wells
td <- tempdir() f <- file.path(td, "a.las") las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1001:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " ~ASCII Log Data", " 1000 80", " 1001 82" ) writeLines(las_text, f) idx <- index_laslogs(td) dat <- pull_laslogs(idx, apis = "1111111111", curves = "GR", output = "long") head(dat)td <- tempdir() f <- file.path(td, "a.las") las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1001:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " ~ASCII Log Data", " 1000 80", " 1001 82" ) writeLines(las_text, f) idx <- index_laslogs(td) dat <- pull_laslogs(idx, apis = "1111111111", curves = "GR", output = "long") head(dat)
tidylaslog supports two equivalent representations of LAS log data:
read_laslog(file, output = c("long", "wide"))read_laslog(file, output = c("long", "wide"))
file |
Path to a .las file |
output |
Output format:
|
Wide format: one row per depth step per well, with each curve stored as a separate column.
Long format: one row per measurement, with curve names stored in a mnemonic column and values in a value column.
Both formats contain the same information but are optimized for different workflows.
An S3 object of class "laslog" with VERSION/WELL/CURVE/PARAMETER/OTHER/LOG
las_text <- c( " ~Version Information", " VERS. 2.0: CWLS LOG ASCII STANDARD", " WRAP. NO:", " ~Well Information", " STRT.M 1000: Start depth", " STOP.M 1002: Stop depth", " STEP.M 1: Step", " NULL. -999.25: Null value", " API . 1111111111: API number", " CNTY. TEST: County", " ~Curve Information", " DEPT.M: Depth", " GR.API: Gamma Ray", " ~ASCII Log Data", " 1000 80", " 1001 82", " 1002 79" ) f <- tempfile(fileext = ".las") writeLines(las_text, f) x <- read_laslog(f, output = "long") head(x$LOG)las_text <- c( " ~Version Information", " VERS. 2.0: CWLS LOG ASCII STANDARD", " WRAP. NO:", " ~Well Information", " STRT.M 1000: Start depth", " STOP.M 1002: Stop depth", " STEP.M 1: Step", " NULL. -999.25: Null value", " API . 1111111111: API number", " CNTY. TEST: County", " ~Curve Information", " DEPT.M: Depth", " GR.API: Gamma Ray", " ~ASCII Log Data", " 1000 80", " 1001 82", " 1002 79" ) f <- tempfile(fileext = ".las") writeLines(las_text, f) x <- read_laslog(f, output = "long") head(x$LOG)
Read LAS header only (no ~A data)
read_laslog_header(file)read_laslog_header(file)
file |
Path to a .las file |
S3 object of class "laslog_header" with VERSION/WELL/CURVE/PARAMETER/OTHER plus provenance
las_text <- c( " ~Version Information", " VERS. 2.0: CWLS LOG ASCII STANDARD", " WRAP. NO:", " ~Well Information", " STRT.M 1000: Start depth", " STOP.M 1001: Stop depth", " STEP.M 1: Step", " NULL. -999.25: Null value", " API . 1111111111: API number", " CNTY. TEST: County", " ~Curve Information", " DEPT.M: Depth", " GR.API: Gamma Ray", " ~ASCII Log Data", " 1000 80", " 1001 82" ) f <- tempfile(fileext = ".las") writeLines(las_text, f) h <- read_laslog_header(f) names(h)las_text <- c( " ~Version Information", " VERS. 2.0: CWLS LOG ASCII STANDARD", " WRAP. NO:", " ~Well Information", " STRT.M 1000: Start depth", " STOP.M 1001: Stop depth", " STEP.M 1: Step", " NULL. -999.25: Null value", " API . 1111111111: API number", " CNTY. TEST: County", " ~Curve Information", " DEPT.M: Depth", " GR.API: Gamma Ray", " ~ASCII Log Data", " 1000 80", " 1001 82" ) f <- tempfile(fileext = ".las") writeLines(las_text, f) h <- read_laslog_header(f) names(h)
Select wells from an index by metadata and curve availability
select_laslogs(index, county = NULL, curves_any = NULL, curves_all = NULL)select_laslogs(index, county = NULL, curves_any = NULL, curves_all = NULL)
index |
Output of index_laslogs() |
county |
Character vector of counties to keep (optional) |
curves_any |
Keep wells that have at least one of these curves (optional) |
curves_all |
Keep wells that have all of these curves (optional) |
Character vector of API values
td <- tempdir() f <- file.path(td, "a.las") las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1001:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " ~ASCII Log Data", " 1000 80", " 1001 82" ) writeLines(las_text, f) idx <- index_laslogs(td) apis <- select_laslogs(idx, county = "TEST", curves_any = "GR") apistd <- tempdir() f <- file.path(td, "a.las") las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1001:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " ~ASCII Log Data", " 1000 80", " 1001 82" ) writeLines(las_text, f) idx <- index_laslogs(td) apis <- select_laslogs(idx, county = "TEST", curves_any = "GR") apis
tidylaslog() works with either a single LAS file or a directory of LAS files.
It can return data directly to R or export analysis-ready tables to disk.
tidylaslog( x, county = NULL, curves_any = NULL, curves_all = NULL, curves = NULL, output = c("wide", "long"), out_dir = NULL, prefix = NULL, formats = c("csv", "parquet"), write_index = TRUE, write_meta = TRUE, meta_sections = c("VERSION", "WELL", "CURVE", "PARAMETER", "OTHER"), manifest = TRUE )tidylaslog( x, county = NULL, curves_any = NULL, curves_all = NULL, curves = NULL, output = c("wide", "long"), out_dir = NULL, prefix = NULL, formats = c("csv", "parquet"), write_index = TRUE, write_meta = TRUE, meta_sections = c("VERSION", "WELL", "CURVE", "PARAMETER", "OTHER"), manifest = TRUE )
x |
Path to a |
county |
Optional county filter (directory mode only). |
curves_any |
Keep wells that contain at least one of these curves (directory mode). |
curves_all |
Keep wells that contain all of these curves (directory mode). |
curves |
Curves to actually keep/export. Defaults to |
output |
Output format:
|
out_dir |
If |
prefix |
Optional filename prefix for exported files. |
formats |
Output formats to write. One or both of |
write_index |
Write index tables (wells, curves, files) when exporting directories? |
write_meta |
Write metadata tables ( |
meta_sections |
Which metadata sections to export
( |
manifest |
Write a JSON manifest describing the export? |
The function supports two equivalent representations of LAS log data:
Wide format: one row per depth step per well, with each curve stored as a separate column.
Long format: one row per measurement, with curve names stored in a
mnemonic column and values in a value column.
Both formats contain the same information but are optimized for different workflows (machine learning vs tidy analysis).
If out_dir is NULL:
An S3 object of class "laslog" containing
VERSION, WELL, CURVE, PARAMETER, OTHER, and LOG.
A list with index, apis, and combined data.
If out_dir is provided:
A list containing exported data paths, metadata paths, and an optional manifest.
The full batch export result (see batch_export_laslogs()).
# ---- Single file mode (return to R) ---- las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1002:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " ~ASCII Log Data", " 1000 80", " 1001 82", " 1002 79" ) f <- tempfile(fileext = ".las") writeLines(las_text, f) obj <- tidylaslog(f, output = "long") head(obj$LOG) # ---- Directory mode (return to R) ---- td <- tempdir() f1 <- file.path(td, "a.las") f2 <- file.path(td, "b.las") writeLines(las_text, f1) writeLines(sub("1111111111", "2222222222", las_text), f2) res <- tidylaslog(td, county = "TEST", curves_any = "GR", output = "wide") names(res) # ---- Export mode (CSV only, no arrow needed) ---- out_dir <- file.path(td, "exports_demo") ex <- tidylaslog(td, county = "TEST", curves_any = "GR", output = "wide", out_dir = out_dir, formats = "csv", write_index = TRUE, manifest = FALSE ) names(ex)# ---- Single file mode (return to R) ---- las_text <- c( " ~Version Information", " VERS. 2.0:", " WRAP. NO:", " ~Well Information", " STRT.M 1000:", " STOP.M 1002:", " STEP.M 1:", " NULL. -999.25:", " API . 1111111111:", " CNTY. TEST:", " ~Curve Information", " DEPT.M:", " GR.API:", " ~ASCII Log Data", " 1000 80", " 1001 82", " 1002 79" ) f <- tempfile(fileext = ".las") writeLines(las_text, f) obj <- tidylaslog(f, output = "long") head(obj$LOG) # ---- Directory mode (return to R) ---- td <- tempdir() f1 <- file.path(td, "a.las") f2 <- file.path(td, "b.las") writeLines(las_text, f1) writeLines(sub("1111111111", "2222222222", las_text), f2) res <- tidylaslog(td, county = "TEST", curves_any = "GR", output = "wide") names(res) # ---- Export mode (CSV only, no arrow needed) ---- out_dir <- file.path(td, "exports_demo") ex <- tidylaslog(td, county = "TEST", curves_any = "GR", output = "wide", out_dir = out_dir, formats = "csv", write_index = TRUE, manifest = FALSE ) names(ex)
Write LAS logs to CSV and/or Parquet
write_laslogs(data, out_dir, prefix = "laslogs", csv = TRUE, parquet = TRUE)write_laslogs(data, out_dir, prefix = "laslogs", csv = TRUE, parquet = TRUE)
data |
Tibble returned by pull_laslogs() |
out_dir |
Output directory |
prefix |
File prefix (no extension) |
csv |
Write CSV file? |
parquet |
Write Parquet file? (requires arrow) |
Invisibly returns output paths
out_dir <- tempdir() df <- data.frame(api = "1111111111", depth = c(1000, 1001), GR = c(80, 82)) paths <- write_laslogs(df, out_dir = out_dir, prefix = "demo", csv = TRUE, parquet = FALSE) pathsout_dir <- tempdir() df <- data.frame(api = "1111111111", depth = c(1000, 1001), GR = c(80, 82)) paths <- write_laslogs(df, out_dir = out_dir, prefix = "demo", csv = TRUE, parquet = FALSE) paths