diff --git a/.Rbuildignore b/.Rbuildignore index 24ec5b01e..bb231a66a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -67,7 +67,9 @@ vignettes/changes_slides.Rmd vignettes/daily_data_statistics.Rmd vignettes/continuous_pr.Rmd vignettes/quick_slides.Rmd +vignettes/Reference_Lists.Rmd ^[.]?air[.]toml$ ^\.vscode$ +environment.yml ^\.positai$ ^\.claude$ diff --git a/.gitignore b/.gitignore index 0eade678e..892ca673e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,11 +12,9 @@ docs /doc/ /Meta/ /Temp/ +/public/ vignettes/*.html vignettes/*.R - - /.quarto/ - **/*.quarto_ipynb .positai diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4322313c2..afbf6bea1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -8,7 +8,6 @@ workflow: default: tags: - chs-shared - - dind stages: - build @@ -36,21 +35,21 @@ variables: build-image: stage: build cache: [] - image: ${DEVOPS_REGISTRY}usgs/docker:20 + tags: + - chs-shared + - dind + image: ${DEVOPS_REGISTRY}usgs/docker:29 services: - - name: ${DEVOPS_REGISTRY}usgs/docker:20-dind + - name: ${DEVOPS_REGISTRY}usgs/docker:29-dind alias: docker - rules: - - changes: - - docker/Dockerfile script: - echo ${CI_REGISTRY_PASSWORD} | docker login -u ${CI_REGISTRY_USER} --password-stdin $CI_REGISTRY - docker pull ${CI_REGISTRY_IMAGE}:latest || true - - cd docker - - docker build - -t ${CI_REGISTRY_IMAGE}:latest - . + - docker build + -f docker/Dockerfile -t ${CI_REGISTRY_IMAGE}:latest . - docker push --all-tags ${CI_REGISTRY_IMAGE} + artifacts: + expire_in: 5 minutes buildcheck: stage: check @@ -58,8 +57,19 @@ buildcheck: dependencies: - build-image script: - - Rscript -e 'devtools::install(quick = TRUE, upgrade = "never")' - - Rscript -e 'devtools::check(document = FALSE, args = "--no-tests", check_dir = Sys.getenv("BUILD_LOGS_DIR"), vignettes = FALSE)' + - | + Rscript -e " + rcmdcheck::rcmdcheck( + args = c('--no-manual', '--no-tests'), + build_args = c('--no-manual', '--no-resave-data'), + check_dir = '.', + error_on = 'warning' + ) + " + artifacts: + paths: + - "*.tar.gz" + expire_in: 1 hour unittests: stage: test @@ -68,11 +78,19 @@ unittests: - build-image - buildcheck script: - - R -e 'library(testthat); options(testthat.output_file = file.path(Sys.getenv("CI_PROJECT_DIR"), "test-out.xml")); devtools::test(reporter = "junit")' - - R -e 'x <- covr::package_coverage(); covr::to_cobertura(x); x; ' + - | + Rscript -e ' + library(testthat) + options(testthat.output_file = file.path(Sys.getenv("CI_PROJECT_DIR"), "test-out.xml")) + test_local(reporter = "junit")' + - | + Rscript -e ' + x <- covr::package_coverage() + covr::to_cobertura(x) + x' artifacts: when: always - expire_in: 1 week + expire_in: 1 hour paths: - test-out.xml reports: @@ -93,33 +111,39 @@ longtest: - if: $RUN_LONG_TESTS == "FALSE" when: always - when: never - + artifacts: + expire_in: 5 minutes + pages: stage: end dependencies: - build-image - buildcheck script: - - Rscript -e 'devtools::install(quick = TRUE, upgrade = "never")' - - Rscript -e 'pkgdown::build_site(override = list(destination = "public"))' - - Rscript -e 'file.copy(from = "./public/articles/logo.png", to = "./public/reference/logo.png")' + - | + Rscript -e ' + source_file <- list.files()[grep(".tar.gz", list.files())] + install.packages(source_file, type = "source", dependencies = FALSE) + pkgdown::build_site(override = list(destination = "public")) + file.copy(from = "./public/articles/logo.png", to = "./public/reference/logo.png")' - quarto render - artifacts: paths: - $PAGES_OUTDIR - expire_in: 1 week + expire_in: 1 hour Validate Inventory: - stage: end - only: - - main - image: ${INTERNAL_REGISTRY}software/software-management:latest - script: - - software-management review - --project "${CI_PROJECT_PATH}" - --ref "${CI_COMMIT_BRANCH}" - --type "provisional" - --token "${GIT_TOKEN_CUSTOM}" - tags: - - chs-shared + stage: end + only: + - main + image: ${INTERNAL_REGISTRY}software/software-management:latest + script: + - software-management review + --project "${CI_PROJECT_PATH}" + --ref "${CI_COMMIT_BRANCH}" + --type "provisional" + --token "${GIT_TOKEN_CUSTOM}" + tags: + - chs-shared + artifacts: + expire_in: 5 minutes diff --git a/DESCRIPTION b/DESCRIPTION index 97cf48a78..f9ba7fdf4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dataRetrieval Type: Package Title: Retrieval Functions for USGS and EPA Hydrology and Water Quality Data -Version: 2.7.24 +Version: 2.7.25 Authors@R: c( person("Laura", "DeCicco", role = c("aut","cre"), email = "ldecicco@usgs.gov", @@ -68,5 +68,5 @@ Encoding: UTF-8 BuildVignettes: true VignetteBuilder: knitr BugReports: https://github.com/DOI-USGS/dataRetrieval/issues -RoxygenNote: 7.3.3 Roxygen: list(markdown = TRUE) +Config/roxygen2/version: 8.0.0 diff --git a/NAMESPACE b/NAMESPACE index 67434a709..5db89fe84 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -55,6 +55,8 @@ export(read_waterdata_latest_daily) export(read_waterdata_metadata) export(read_waterdata_monitoring_location) export(read_waterdata_parameter_codes) +export(read_waterdata_peaks) +export(read_waterdata_ratings) export(read_waterdata_samples) export(read_waterdata_stats_daterange) export(read_waterdata_stats_por) diff --git a/NEWS b/NEWS index a8dc31176..4ea55cd3d 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,28 @@ +dataRetrieval 2.7.25 +=================== +* Added read_waterdata_ratings to access USGS rating curves with +new modern endpoint. +* Added read_waterdata_peaks to access USGS peak data with +new modern endpoint. +* Increase flexibility of chunking by monitoring_location_id by +including it as an argument in each relevant waterdata OGC function. +* Clean up deprecated code. +* Updated retry strategy to include retry_on_failure = TRUE. +* Added countries, methods, method-categories, method-citations, and +citations to possible values in read_waterdata_metadata. +* Added field_measurements_series_id argument to read_waterdata_field_measurement +* Removed NWIS tests +* Introduce an error if user inputs a numeric to any of the "time" arguments. +Because it is impossible to tell if they intended Dates or POSIX, we cannot +know for sure and therefore could add incorrect filters to the query. +* The "id" column that comes back from read_waterdata_fiel_meta was changed to +field_measurements_series_id to match the expectation of `read_waterdata_field_measurements` +* New argument added to read_waterdata_stats_por: "normal_type" accepts +"DOY" and "MOY" +* New argument added to read_waterdata_stats_daterange: "interval_type" accepts +"M" (month), "CY" (calendar year), and "WY" (water year). + + dataRetrieval 2.7.24 =================== * Let dataRetrieval take care of chunking up requests by monitoring_location_id. diff --git a/R/AAA.R b/R/AAA.R index 73c38fbbb..d2ed38956 100644 --- a/R/AAA.R +++ b/R/AAA.R @@ -7,6 +7,11 @@ pkg.env <- new.env() options("dataRetrieval.api_version" = "v0") options("dataRetrieval.api_version_stat" = "v0") options("dataRetrieval.attach_request" = TRUE) + options("dataRetrieval.convertType" = TRUE) + options("dataRetrieval.no_paging" = FALSE) + options("dataRetrieval.site_chunk_size_meta" = 250) + options("dataRetrieval.site_chunk_size_data" = 10) + options("dataRetrieval.limit" = 50000) services <- c( "server", @@ -19,7 +24,8 @@ pkg.env <- new.env() "continuous", "field-measurements-metadata", "combined-metadata", - "channel-measurements" + "channel-measurements", + "peaks" ) collections <- c( "parameter-codes", @@ -32,7 +38,12 @@ pkg.env <- new.env() "coordinate-method-codes", "medium-codes", "counties", + "countries", "hydrologic-unit-codes", + "methods", + "method-categories", + "method-citations", + "citations", "states", "national-aquifer-codes", "reliability-codes", @@ -52,7 +63,12 @@ pkg.env <- new.env() "coordinate_method_code", "medium_code", "county", + "country", "hydrologic_unit_code", + "methods", + "method_categories", + "method_citations", + "citations", "state", "national_aquifer_code", "reliability_code", @@ -90,20 +106,6 @@ wqp_message_beta <- function() { message("WQX3 services are in-development, use with caution.") } -only_legacy <- function(service) { - legacy <- service %in% - c( - "Organization", - "ActivityMetric", - "SiteSummary", - "Project", - "ProjectMonitoringLocationWeighting", - "ResultDetectionQuantitationLimit", - "BiologicalMetric" - ) - return(legacy) -} - is_legacy <- function(service) { legacy <- service %in% c( diff --git a/R/constructNWISURL.R b/R/constructNWISURL.R index 34d724eba..fd06ed227 100644 --- a/R/constructNWISURL.R +++ b/R/constructNWISURL.R @@ -144,26 +144,6 @@ constructNWISURL <- function( url <- get_or_post(url, POST = POST, end_date = endDate) } }, - measurements = { - url <- get_or_post( - baseURL, - POST = POST, - site_no = siteNumbers, - .multi = "comma" - ) - url <- get_or_post(url, POST = POST, range_selection = "date_range") - if (nzchar(startDate)) { - url <- get_or_post(url, POST = POST, begin_date = startDate) - } - if (nzchar(endDate)) { - url <- get_or_post(url, POST = POST, end_date = endDate) - } - if (expanded) { - url <- get_or_post(url, POST = POST, format = "rdb_expanded") - } else { - url <- get_or_post(url, POST = POST, format = "rdb") - } - }, stat = { # for statistics service diff --git a/R/construct_api_requests.R b/R/construct_api_requests.R index c1cf483b5..c0469f64f 100644 --- a/R/construct_api_requests.R +++ b/R/construct_api_requests.R @@ -5,26 +5,27 @@ #' #' @export #' @param service Which service available on . +#' @param output_id Name of id column to return #' @param ... Extra parameters from the specific services. #' @param bbox Only features that have a geometry that intersects the bounding #' box are selected.The bounding box is provided as four or six numbers, depending #' on whether the coordinate reference system includes a vertical axis (height or #' depth). -#' @param properties The properties that should be included for each feature. The -#' parameter value is a comma-separated list of property names which depend on the -#' service being called. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. #' @keywords internal +#' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api +#' #' @examples #' site <- "USGS-02238500" #' pcode <- "00060" #' req_dv <- construct_api_requests("daily", +#' output_id = "daily_id", #' monitoring_location_id = site, #' parameter_code = "00060") #' #' req_dv <- construct_api_requests("daily", +#' output_id = "daily_id", #' monitoring_location_id = site, #' parameter_code = c("00060", "00065")) #' @@ -32,21 +33,39 @@ #' start_date <- "2018-01-01" #' end_date <- "2022-01-01" #' req_dv <- construct_api_requests("daily", -#' monitoring_location_id = sites, -#' parameter_code = c("00060", "00065"), -#' datetime = c(start_date, end_date)) +#' output_id = "daily_id", +#' monitoring_location_id = sites, +#' parameter_code = c("00060", "00065"), +#' datetime = c(start_date, end_date)) #' construct_api_requests <- function( service, - properties = NA_character_, + output_id, + ..., bbox = NA, - skipGeometry = FALSE, - no_paging = FALSE, - ... + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) { POST <- FALSE full_list <- list(...) + full_list[["limit"]] <- limit + + check_arguments_non_api( + convertType = convertType, + no_paging = no_paging, + limit = full_list[["limit"]], + attach_request = attach_request, + chunk_size = chunk_size + ) + + check_arguments_api( + bbox = full_list[["bbox"]], + skipGeometry = full_list[["skipGeometry"]] + ) time_periods <- c( "last_modified", @@ -58,6 +77,20 @@ construct_api_requests <- function( "end_utc" ) + full_list <- switch_arg_id( + full_list, + id_name = output_id, + service = service + ) + + # Clean out non-API arguments: + properties <- switch_properties_id( + properties = full_list[["properties"]], + id = output_id + ) + + full_list[["properties"]] <- NULL + if (any(time_periods %in% names(full_list))) { for (i in time_periods[time_periods %in% names(full_list)]) { dates <- FALSE @@ -74,9 +107,10 @@ construct_api_requests <- function( "begin", "end", "time", - "limit", "begin_utc", - "end_utc" + "end_utc", + "limit", + "skipGeometry" ) comma_params <- c( @@ -85,7 +119,8 @@ construct_api_requests <- function( "statistic_id", "time_series_id", "computation_period_identifier", - "computation_identifier" + "computation_identifier", + "data_type" ) if ( @@ -110,6 +145,7 @@ construct_api_requests <- function( Negate(anyNA), lapply(full_list[comma_params], function(x) x[!is.na(x)]) ) + comma_params_filtered <- comma_params_filtered[ !sapply(comma_params_filtered, is.null) ] @@ -129,8 +165,6 @@ construct_api_requests <- function( get_list <- c(single_params_filtered, comma_params_filtered) } - get_list[["skipGeometry"]] <- skipGeometry - get_list <- get_list[!is.na(get_list)] format_type <- ifelse(isTRUE(no_paging), "csv", "json") @@ -207,15 +241,6 @@ construct_api_requests <- function( return(baseURL) } -check_limits <- function(args) { - current_api_limit <- 50000 - - if (is.na(args[["limit"]])) { - args[["limit"]] <- current_api_limit - } - - return(args) -} #' Setup the request for the OGC API requests #' @@ -315,7 +340,7 @@ switch_arg_id <- function(ls, id_name, service) { #' dataRetrieval:::format_api_dates(end) #' dataRetrieval:::format_api_dates(end, TRUE) #' -#' end <- c(NA, as.POSIXct("2021-01-01 12:15:00")) +#' end <- as.POSIXct(c(NA, "2021-01-01 12:15:00")) #' dataRetrieval:::format_api_dates(end) #' #' start_end <- as.POSIXct(c("2021-01-01 12:15:00", @@ -345,17 +370,24 @@ switch_arg_id <- function(ls, id_name, service) { #' start <- "2025-10-01" #' end <- Sys.Date() #' dataRetrieval:::format_api_dates(c(start, end), date = TRUE) +#' +#' # This is a problem because the first value forces the +#' # vector to be numeric, and then we don't really +#' # know if the 2nd value is a Date (number of days since 1970) +#' # or if it's a date/time (number of seconds..) +#' half_range <- c(NA, as.Date("2025-01-01")) +#' # Will error: +#' #dataRetrieval:::format_api_dates(half_range, date = FALSE) +#' # Better way to do it: +#' better_half <- as.Date(c(NA, "2025-01-01")) +#' dataRetrieval:::format_api_dates(better_half, date = FALSE) format_api_dates <- function(datetime, date = FALSE) { if (is.character(datetime)) { datetime[datetime == ""] <- NA datetime <- toupper(datetime) } - if (all(is.na(datetime))) { - return(NA) - } - - if (all(is.null(datetime))) { + if (all(is.na(datetime)) | all(is.null(datetime))) { return(NA) } @@ -363,6 +395,18 @@ format_api_dates <- function(datetime, date = FALSE) { stop("datetime should only include 1-2 values") } + if (is.numeric(datetime)) { + # Until we can figure out a way to know if the + # original input was suppose to be Date or Posix + # We can't determine what the user meant. + stop( + "A time query was entered as numeric. This could lead to errors. +Check any time queries that might have been automatically converted to numeric. +This could happen if you use c(NA, as.Date(Sys.Date())) instead of +as.Date(c(NA, Sys.Date()) for example." + ) + } + if (length(datetime) == 1) { # If the user has "P" or the "/" we assume they know what they are doing if ( @@ -370,13 +414,13 @@ format_api_dates <- function(datetime, date = FALSE) { grepl("/", datetime) ) { return(datetime) + } + + if (date) { + datetime <- get_Date(datetime) } else { - if (date) { - datetime <- get_Date(datetime) - } else { - datetime1 <- get_dateTime(datetime) - datetime <- lubridate::format_ISO8601(datetime1, usetz = "Z") - } + datetime1 <- get_dateTime(datetime) + datetime <- lubridate::format_ISO8601(datetime1, usetz = "Z") } } else if (length(datetime) == 2) { if (date) { @@ -385,11 +429,9 @@ format_api_dates <- function(datetime, date = FALSE) { } datetime <- paste0(datetime, collapse = "/") } else { - for (i in seq_along(datetime)) { - datetime1 <- get_dateTime(datetime) - } + datetime1 <- lapply(datetime, get_dateTime) datetime <- paste0( - lubridate::format_ISO8601(datetime1, usetz = "Z"), + vapply(datetime1, lubridate::format_ISO8601, character(1), usetz = "Z"), collapse = "/" ) } @@ -597,14 +639,19 @@ basic_request <- function(url_base, format = "json") { httr2::req_headers(`Accept-Encoding` = c("compress", "gzip")) |> httr2::req_url_query(f = format, lang = "en-US") |> httr2::req_error(body = error_body) |> + httr2::req_retry(max_tries = 3, retry_on_failure = TRUE) |> httr2::req_timeout(seconds = 180) - token <- Sys.getenv("API_USGS_PAT") + req <- add_api_token(req) + return(req) +} + +add_api_token <- function(req) { + token <- Sys.getenv("API_USGS_PAT") if (token != "") { req <- req |> httr2::req_headers_redacted(`X-Api-Key` = token) } - - return(req) + req } diff --git a/R/dataRetrieval-package.R b/R/dataRetrieval-package.R index a899f4831..de5ae89e9 100644 --- a/R/dataRetrieval-package.R +++ b/R/dataRetrieval-package.R @@ -207,11 +207,11 @@ NULL # "monitoring-locations", "latest-continuous", # "field-measurements", "latest-daily", # "continuous", "field-measurements-metadata", -# "combined-metadata", "channel-measurements") +# "combined-metadata", "channel-measurements", "peaks") # # property_list <- list() # for(service in services){ -# property_list[[service]] <- get_properties_for_docs(service) +# property_list[[service]] <- dataRetrieval:::get_properties_for_docs(service) # } # # num_cols <- c("value", "contributing_drainage_area", "drainage_area", diff --git a/R/getWebServiceData.R b/R/getWebServiceData.R index 836a4ee3b..98759426d 100644 --- a/R/getWebServiceData.R +++ b/R/getWebServiceData.R @@ -30,7 +30,11 @@ getWebServiceData <- function(obs_url, ...) { obs_url <- httr2::req_user_agent(obs_url, default_ua()) obs_url <- httr2::req_throttle(obs_url, rate = 30 / 60) - obs_url <- httr2::req_retry(obs_url, max_tries = 3, max_seconds = 180) + obs_url <- httr2::req_retry( + obs_url, + max_tries = 3, + retry_on_failure = TRUE + ) obs_url <- httr2::req_headers( obs_url, `Accept-Encoding` = c("compress", "gzip") @@ -98,6 +102,10 @@ check_non_200s <- function(returnedList) { #' #' @keywords internal default_ua <- function() { + if (!is.null(pkg.env$ua)) { + return(pkg.env$ua) + } + versions <- c( libcurl = curl::curl_version()$version, httr2 = as.character(utils::packageVersion("httr2")), @@ -110,6 +118,7 @@ default_ua <- function() { ua <- paste0(ua, "/", Sys.getenv("CUSTOM_DR_UA")) } + pkg.env$ua <- ua return(ua) } diff --git a/R/get_ogc_data.R b/R/get_ogc_data.R index e0a915ac1..89ebb6306 100644 --- a/R/get_ogc_data.R +++ b/R/get_ogc_data.R @@ -3,15 +3,16 @@ #' @param args arguments from individual functions #' @param output_id Name of id column to return #' @param service Endpoint name. -#' @param \dots Used to force users to fully name the details argument. -#' @param chunk_size Number of monitoring_location_ids to chunk requests into. #' #' @noRd #' @return data.frame with attributes -get_ogc_data <- function(args, output_id, service, ..., chunk_size = 250) { - rlang::check_dots_empty() +get_ogc_data <- function(args, output_id, service) { + chunk_size <- args[["chunk_size"]] + args[["..."]] <- NULL - if (length(args[["monitoring_location_id"]]) > chunk_size) { + if ( + !is.na(chunk_size) & length(args[["monitoring_location_id"]]) > chunk_size + ) { ml_splits <- split( args[["monitoring_location_id"]], ceiling(seq_along(args[["monitoring_location_id"]]) / chunk_size) @@ -32,16 +33,7 @@ get_ogc_data <- function(args, output_id, service, ..., chunk_size = 250) { ignore.attr = TRUE )) } else { - args[["chunk_sites_by"]] <- NULL - - args <- switch_arg_id(args, id_name = output_id, service = service) - - args <- check_limits(args) - - properties <- args[["properties"]] - args[["properties"]] <- switch_properties_id(properties, id = output_id) - convertType <- args[["convertType"]] - args[["convertType"]] <- NULL + args[["output_id"]] <- output_id args[["service"]] <- service req <- do.call(construct_api_requests, args) @@ -56,24 +48,18 @@ get_ogc_data <- function(args, output_id, service, ..., chunk_size = 250) { return_list <- walk_pages(req) } - if (is.na(args[["skipGeometry"]])) { - skipGeometry <- FALSE - } else { - skipGeometry <- args[["skipGeometry"]] - } - return_list <- deal_with_empty( return_list, - properties, + args[["properties"]], service, - skipGeometry, - convertType, + isTRUE(args[["skipGeometry"]]), + args[["convertType"]], no_paging ) - return_list <- rejigger_cols(return_list, properties, output_id) + return_list <- rejigger_cols(return_list, args[["properties"]], output_id) - if (convertType) { + if (args[["convertType"]]) { return_list <- cleanup_cols(return_list, service) return_list <- order_results(return_list) @@ -96,11 +82,25 @@ get_ogc_data <- function(args, output_id, service, ..., chunk_size = 250) { return_list <- move_id_col(return_list, output_id) } - if (getOption("dataRetrieval.attach_request")) { + if (args[["attach_request"]]) { attr(return_list, "request") <- req } } + if ( + !isTRUE(args[["skipGeometry"]]) & + "geometry" %in% names(return_list) + ) { + if ( + all(sf::st_is_empty(return_list[["geometry"]])) & + !"geometry" %in% args[["properties"]] + ) { + return_list <- sf::st_drop_geometry(return_list) + } else { + return_list <- sf::st_as_sf(return_list) + } + } + attr(return_list, "queryTime") <- Sys.time() return(return_list) } @@ -179,3 +179,103 @@ switch_properties_id <- function(properties, id) { return(properties) } + +#' Check non-API arguments +#' +#' Function to check types and create parameter descriptions. +#' +#' @param convertType logical, defaults to `r getOption("dataRetrieval.convertType")`. +#' If `TRUE`, the function will convert the data to dates, any qualifiers to string +#' vector and reorder the returned data frame. +#' @param no_paging logical, defaults to `r getOption("dataRetrieval.no_paging")`. +#' If `TRUE`, the data will +#' be requested from a native csv format. This can be dangerous because the +#' data will cut off at 50,000 rows without indication that more data +#' is available. Use `TRUE` with caution. +#' @param limit numeric, The optional limit parameter is used to control the subset of the +#' selected features that should be returned in each page. The maximum allowable +#' limit is 50,000. It may be beneficial to set this number lower if your internet +#' connection is spotty. The default (`NA`) will set the limit to the maximum +#' allowable limit for the service. +#' @param attach_request logical, defaults to `r getOption("dataRetrieval.attach_request")`. +#' If set to `TRUE`, the full request sent to the Water Data API is attached +#' as an attribute to the data set. +#' @param chunk_size Number of monitoring_location_ids to chunk requests into. +#' The default for functions that don't generally return long-term data records +#' is `r getOption("dataRetrieval.site_chunk_size_meta")`, while +#' the default for time series functions is +#' `r getOption("dataRetrieval.site_chunk_size_data")`. +#' Setting to `NA` will eliminate site chunking, giving users full control. +#' @param \dots Not used. Included to help differentiate official Water Data API arguments +#' from more seldom used, optional dataRetrieval-specific arguments. +#' @keywords internal +check_arguments_non_api <- function( + convertType, + no_paging, + limit, + attach_request, + chunk_size, + ... +) { + if (!is.null(convertType)) { + if (!is.na(convertType) & !is.logical(convertType)) { + stop("convertType should be a logical TRUE/FALSE") + } + } + + if (!is.null(no_paging)) { + if (!is.na(no_paging) & !is.logical(no_paging)) { + stop("no_paging should be a logical TRUE/FALSE") + } + } + + if (!is.null(attach_request)) { + if (!is.na(attach_request) & !is.logical(attach_request)) { + stop("attach_request should be a logical TRUE/FALSE") + } + } + + if (!is.null(limit)) { + if (!is.na(limit) & !is.numeric(limit)) { + stop("limit should be an integer") + } + } + + if (!is.null(chunk_size)) { + if (!is.na(chunk_size) & !is.numeric(chunk_size)) { + stop("chunk_size should be an integer") + } + } +} + +#' Check other arguments +#' +#' Additional functions to check types and create parameter descriptions. +#' +#' @param bbox Only features that have a geometry that intersects the bounding +#' box are selected.The bounding box is provided as four or six numbers, depending +#' on whether the coordinate reference system includes a vertical axis (height or +#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +#' vector structured: c(xmin,ymin,xmax,ymax). +#' Another way to think of it is c(Western-most longitude, +#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). +#' @param skipGeometry This parameter can be used to skip response geometries for +#' each feature. The returning object will be a data frame with no spatial +#' information. The default `NA` will not specify the argument in the request. +#' +#' @keywords internal +check_arguments_api <- function(bbox, skipGeometry) { + if (!is.null(skipGeometry)) { + if (!is.na(skipGeometry) & !is.logical(skipGeometry)) { + stop("skipGeometry should be a logical TRUE/FALSE") + } + } + + if (!is.null(bbox)) { + if (!all(is.na(bbox))) { + if (!length(bbox) %in% c(1, 4)) { + stop("bbox is not set up correctly") + } + } + } +} diff --git a/R/importWQP.R b/R/importWQP.R index 8f435a259..30e78c887 100644 --- a/R/importWQP.R +++ b/R/importWQP.R @@ -277,23 +277,3 @@ create_dateTime <- function(df, date_col, time_col, tz_col, tz) { return(df) } -post_url <- function(obs_url, csv = FALSE) { - split <- strsplit(obs_url, "?", fixed = TRUE) - - url <- split[[1]][1] - if (csv) { - url <- paste0(url, "?mimeType=csv") - } else { - url <- paste0(url, "?mimeType=tsv") - } - - if (grepl("sorted", split[[1]][2])) { - url <- paste0( - url, - "&sorted=", - strsplit(split[[1]][2], "sorted=", fixed = TRUE)[[1]][2] - ) - } - - return(url) -} diff --git a/R/readNGWMNdata.R b/R/readNGWMNdata.R index dc4a86aed..5c01cecdf 100644 --- a/R/readNGWMNdata.R +++ b/R/readNGWMNdata.R @@ -50,9 +50,6 @@ readNGWMNdata <- function(service, ..., asDateTime = TRUE, tz = "UTC") { match.arg(service, c("observation", "featureOfInterest")) if (service == "observation") { - allObs <- data.frame() - allAttrs <- data.frame() - # these attributes are pulled out and saved when doing binds to be reattached attrs <- c( "url", @@ -63,13 +60,17 @@ readNGWMNdata <- function(service, ..., asDateTime = TRUE, tz = "UTC") { ) featureID <- stats::na.omit(gsub(":", ".", dots[["siteNumbers"]])) - for (f in featureID) { - obsFID <- retrieveObservation(featureID = f, asDateTime, attrs, tz = tz) - obsFIDattr <- saveAttrs(attrs, obsFID) - obsFID <- removeAttrs(attrs, obsFID) - allObs <- r_bind_dr(allObs, obsFID) - allAttrs <- r_bind_dr(allAttrs, obsFIDattr) + obs_list <- vector("list", length(featureID)) + attr_list <- vector("list", length(featureID)) + for (idx in seq_along(featureID)) { + obsFID <- retrieveObservation( + featureID = featureID[idx], asDateTime, attrs, tz = tz + ) + attr_list[[idx]] <- saveAttrs(attrs, obsFID) + obs_list[[idx]] <- removeAttrs(attrs, obsFID) } + allObs <- Reduce(r_bind_dr, obs_list, init = data.frame()) + allAttrs <- Reduce(r_bind_dr, attr_list, init = data.frame()) allSites <- tryCatch( { diff --git a/R/readNWISunit.R b/R/readNWISunit.R index ae117ea13..fd6f5c88c 100644 --- a/R/readNWISunit.R +++ b/R/readNWISunit.R @@ -156,12 +156,12 @@ readNWISuv <- function( #' @seealso [constructNWISURL()], [importRDB1()] #' @export #' @examplesIf is_dataRetrieval_user() -#' site_ids <- c("01594440", "040851325") +#' #site_ids <- c("01594440", "040851325") #' \donttest{ -#' data <- readNWISpeak(site_ids) -#' data2 <- readNWISpeak(site_ids, asDateTime = FALSE) -#' stations <- c("06011000") -#' peakdata <- readNWISpeak(stations, convertType = FALSE) +#' #data <- readNWISpeak(site_ids) +#' #data2 <- readNWISpeak(site_ids, asDateTime = FALSE) +#' #stations <- c("06011000") +#' #peakdata <- readNWISpeak(stations, convertType = FALSE) #' } readNWISpeak <- function( siteNumbers, @@ -170,8 +170,11 @@ readNWISpeak <- function( asDateTime = TRUE, convertType = TRUE ) { - message(new_nwis_message()) - + .Deprecated( + new = "read_waterdata_peaks", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_peaks." + ) # Doesn't seem to be a peak xml service url <- constructNWISURL( siteNumbers = siteNumbers, @@ -261,20 +264,22 @@ readNWISpeak <- function( #' @examplesIf is_dataRetrieval_user() #' site_id <- "01594440" #' \donttest{ -#' data <- readNWISrating(site_id, "base") -#' attr(data, "RATING") +#' #data <- readNWISrating(site_id, "base") +#' #attr(data, "RATING") #' } readNWISrating <- function(siteNumber, type = "base", convertType = TRUE) { - message(new_nwis_message()) + .Deprecated( + new = "read_waterdata_ratings", + package = "dataRetrieval", + msg = "NWIS servers are slated for decommission. Please begin to migrate to read_waterdata_ratings." + ) + # No rating xml service url <- constructNWISURL(siteNumber, service = "rating", ratingType = type) data <- importRDB1(url, asDateTime = FALSE, convertType = convertType) if ("current_rating_nu" %in% names(data)) { - intColumns <- intColumns[ - !("current_rating_nu" %in% names(data)[intColumns]) - ] data$current_rating_nu <- gsub(" ", "", data$current_rating_nu) } @@ -464,10 +469,3 @@ readNWISuse <- function( ) return(NULL) } - -.capitalALL <- function(input) { - if (any(grepl("(?i)all", input))) { - input <- toupper(input) - } - return(input) -} diff --git a/R/read_waterdata.R b/R/read_waterdata.R index 2a5df774b..815f5510c 100644 --- a/R/read_waterdata.R +++ b/R/read_waterdata.R @@ -10,6 +10,8 @@ #' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function #' will convert the data to dates and qualifier to string vector. #' @param \dots Additional arguments to send to the request. +#' @inheritParams check_arguments_non_api +#' #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -51,12 +53,26 @@ #' CQL = cql_huc_wildcard) #' #' } -read_waterdata <- function(service, CQL, ..., convertType = TRUE) { +read_waterdata <- function( + service, + CQL, + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) { match.arg(service, pkg.env$api_endpoints) args <- list(...) - output_id <- switch( + args[["convertType"]] <- convertType + args[["limit"]] <- limit + args[["attach_request"]] <- attach_request + args[["bbox"]] <- NA + args[["no_paging"]] <- FALSE # drops id if TRUE + args[["chunk_size"]] <- NA # Chunking doesn't make sense. + + args[["output_id"]] <- switch( service, "daily" = "daily_id", "latest-daily" = "latest_daily_id", @@ -72,12 +88,7 @@ read_waterdata <- function(service, CQL, ..., convertType = TRUE) { args[["properties"]] <- NA_character_ } - if (!"limit" %in% names(args)) { - args[["limit"]] <- NA_character_ - } - args[["service"]] <- service - args <- check_limits(args) data_req <- suppressWarnings(do.call(construct_api_requests, args)) @@ -87,28 +98,24 @@ read_waterdata <- function(service, CQL, ..., convertType = TRUE) { return_list <- walk_pages(data_req) - if (is.null(args[["skipGeometry"]])) { - skipGeometry <- FALSE - } else if (is.na(args[["skipGeometry"]])) { - skipGeometry <- FALSE - } else { - skipGeometry <- args[["skipGeometry"]] - } - return_list <- deal_with_empty( return_list, args[["properties"]], service, - skipGeometry, + isTRUE(args[["skipGeometry"]]), convertType ) - return_list <- rejigger_cols(return_list, args[["properties"]], output_id) + return_list <- rejigger_cols( + return_list, + args[["properties"]], + args[["output_id"]] + ) if (convertType) { return_list <- cleanup_cols(return_list, service) return_list <- order_results(return_list) - return_list <- move_id_col(return_list, output_id) + return_list <- move_id_col(return_list, args[["output_id"]]) } return(return_list) diff --git a/R/read_waterdata_channel.R b/R/read_waterdata_channel.R index 62c0376c4..74e92840c 100644 --- a/R/read_waterdata_channel.R +++ b/R/read_waterdata_channel.R @@ -34,26 +34,8 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("channel-measurements", "channel_measurements_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @@ -94,13 +76,17 @@ read_waterdata_channel <- function( properties = NA_character_, skipGeometry = NA, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "channel-measurements" output_id <- "channel_measurements_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_combined_meta.R b/R/read_waterdata_combined_meta.R index d72d4471a..beb3f1189 100644 --- a/R/read_waterdata_combined_meta.R +++ b/R/read_waterdata_combined_meta.R @@ -82,27 +82,8 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("combined-metadata", "field_measurement_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. -#' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' @inherit read_waterdata_continuous details #' #' @@ -202,12 +183,16 @@ read_waterdata_combined_meta <- function( properties = NA_character_, skipGeometry = NA, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "combined-metadata" output_id <- "combined_meta_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_continuous.R b/R/read_waterdata_continuous.R index c63e6d87f..d9291e560 100644 --- a/R/read_waterdata_continuous.R +++ b/R/read_waterdata_continuous.R @@ -32,18 +32,9 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("continuous", "continuous_id")`. #' The default (`NA`) will return all columns of the data. -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector, and sepcifically -#' order the returning data frame by time and monitoring_location_id. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' @param \dots Not used. Included to help differentiate official Water Data API arguments +#' from more seldom used, optional dataRetrieval-specific arguments. +#' @inheritParams check_arguments_non_api #' #' @details #' You can also use a vector of length 2 for any time queries (such as time @@ -129,12 +120,16 @@ read_waterdata_continuous <- function( value = NA, last_modified = NA_character_, time = NA_character_, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "continuous" output_id <- "continuous_id" + rlang::check_dots_empty() args <- mget(names(formals())) args[["skipGeometry"]] <- TRUE diff --git a/R/read_waterdata_daily.R b/R/read_waterdata_daily.R index a05186746..d13072c8f 100644 --- a/R/read_waterdata_daily.R +++ b/R/read_waterdata_daily.R @@ -25,26 +25,9 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("daily", "daily_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @@ -105,12 +88,16 @@ read_waterdata_daily <- function( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "daily" output_id <- "daily_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_field_measurements.R b/R/read_waterdata_field_measurements.R index b4cb90f4a..25d822569 100644 --- a/R/read_waterdata_field_measurements.R +++ b/R/read_waterdata_field_measurements.R @@ -20,6 +20,12 @@ #' See also Details below for more information. #' @param qualifier `r get_ogc_params("field-measurements")$qualifier` #' @param field_visit_id `r get_ogc_params("field-measurements")$field_visit_id` +#' @param field_measurements_series_id A unique identifier representing a single +#' collection series. This corresponds to the `field_measurements_series_id` field in the +#' `read_waterdata_field_meta` endpoint. Collection series are defined as the +#' set of field measurements at a given monitoring location for a single parameter +#' code using a single reading type. +#' #' @param vertical_datum `r get_ogc_params("field-measurements")$vertical_datum` #' @param measuring_agency `r get_ogc_params("field-measurements")$measuring_agency` #' @param control_condition `r get_ogc_params("field-measurements")$control_condition` @@ -31,26 +37,8 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("field-measurements", "field_measurement_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @@ -83,6 +71,9 @@ #' old_df <- read_waterdata_field_measurements(monitoring_location_id = "USGS-425957088141001", #' time = c("1980-01-01", NA)) #' +#' new_df <- read_waterdata_field_measurements(monitoring_location_id = "USGS-425957088141001", +#' time = c(NA, "2020-01-01")) +#' #' surface_water <- read_waterdata_field_measurements( #' monitoring_location_id = c("USGS-07069000", #' "USGS-07064000", @@ -98,6 +89,7 @@ read_waterdata_field_measurements <- function( observing_procedure_code = NA_character_, properties = NA_character_, field_visit_id = NA_character_, + field_measurements_series_id = NA_character_, approval_status = NA_character_, unit_of_measure = NA_character_, qualifier = NA_character_, @@ -111,12 +103,16 @@ read_waterdata_field_measurements <- function( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "field-measurements" output_id <- "field_measurement_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_field_meta.R b/R/read_waterdata_field_meta.R index b4df45b6a..b45df0154 100644 --- a/R/read_waterdata_field_meta.R +++ b/R/read_waterdata_field_meta.R @@ -24,26 +24,9 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("field-measurements-metadata", "field_measurement_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @@ -95,11 +78,15 @@ read_waterdata_field_meta <- function( skipGeometry = NA, bbox = NA, limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "field-measurements-metadata" - output_id <- "field_series_id" + output_id <- "field_measurements_series_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_latest_continuous.R b/R/read_waterdata_latest_continuous.R index b015c9192..e450561ae 100644 --- a/R/read_waterdata_latest_continuous.R +++ b/R/read_waterdata_latest_continuous.R @@ -23,26 +23,8 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("latest-continuous", "latest_continuous_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @examplesIf is_dataRetrieval_user() @@ -92,12 +74,16 @@ read_waterdata_latest_continuous <- function( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "latest-continuous" output_id <- "latest_continuous_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_latest_daily.R b/R/read_waterdata_latest_daily.R index dfb5deb38..85bc6c11f 100644 --- a/R/read_waterdata_latest_daily.R +++ b/R/read_waterdata_latest_daily.R @@ -25,27 +25,9 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("latest-daily", "latest_daily_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. #' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' @inherit read_waterdata_continuous details #' #' @examplesIf is_dataRetrieval_user() @@ -71,7 +53,7 @@ #' skipGeometry = TRUE) #' #' multi_site <- read_waterdata_latest_daily(monitoring_location_id = c("USGS-01491000", -#' "USGS-01645000"), +#' "USGS-01645000"), #' parameter_code = c("00060", "00010")) #' #' } @@ -89,12 +71,16 @@ read_waterdata_latest_daily <- function( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "latest-daily" output_id <- "latest_daily_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/read_waterdata_metadata.R b/R/read_waterdata_metadata.R index f2c673701..5021817d8 100644 --- a/R/read_waterdata_metadata.R +++ b/R/read_waterdata_metadata.R @@ -10,13 +10,16 @@ #' "coordinate-method-codes", "hydrologic-unit-codes", "medium-codes", #' "national-aquifer-codes", "reliability-codes", "site-types", "statistic-codes", #' "topographic-codes", "time-zone-codes". +#' @param \dots Optional arguments to pass to the query. Available parameters +#' can be found with the \code{get_ogc_params} function. #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable #' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. -#' @param \dots Optional arguments to pass to the query. Available parameters -#' can be found with the \code{get_ogc_params} function. +#' @param attach_request logical, defaults to `r getOption("dataRetrieval.attach_request")`. +#' If set to `TRUE`, the full request sent to the Water Data API is attached +#' as an attribute to the data set. #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -25,11 +28,16 @@ #' aquifer_codes <- read_waterdata_metadata("aquifer-codes") #' aquifer_types <- read_waterdata_metadata("aquifer-types") #' counties <- read_waterdata_metadata("counties") +#' countries <- read_waterdata_metadata("countries") #' us_counties <- read_waterdata_metadata("counties", country_code = "US") #' coordinate_accuracy_codes <- read_waterdata_metadata("coordinate-accuracy-codes") #' coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") #' coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") #' huc_codes <- read_waterdata_metadata("hydrologic-unit-codes") +#' methods <- read_waterdata_metadata("methods") +#' method_categories <- read_waterdata_metadata("method-categories") +#' method_citations <- read_waterdata_metadata("method-citations") +#' citations <- read_waterdata_metadata("citations") #' national_aquifer_codes <- read_waterdata_metadata("national-aquifer-codes") #' parameter_codes <- read_waterdata_metadata("parameter-codes") #' reliability_codes <- read_waterdata_metadata("reliability-codes") @@ -42,7 +50,12 @@ #' time_zone_limited <- read_waterdata_metadata("time-zone-codes", #' time_zone_description = c("Alaska", "Hawaii", "Pacific North America")) #' } -read_waterdata_metadata <- function(collection, limit = NA, ...) { +read_waterdata_metadata <- function( + collection, + ..., + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) { match.arg(collection, pkg.env$metadata) output_id <- names(pkg.env$metadata)[pkg.env$metadata == collection] @@ -57,12 +70,13 @@ read_waterdata_metadata <- function(collection, limit = NA, ...) { stop(paste0("Unknown argument: ", wrong_args)) } } - + args[["attach_request"]] <- attach_request args[["limit"]] <- limit args[["convertType"]] <- FALSE - args[["skipGeometry"]] <- TRUE + args[["skipGeometry"]] <- NA args[["bbox"]] <- NA args[["no_paging"]] <- FALSE # drops id if TRUE + args[["chunk_size"]] <- NA # Chunking doesn't make sense. return_list <- get_ogc_data( args = args, diff --git a/R/read_waterdata_monitoring_location.R b/R/read_waterdata_monitoring_location.R index 7ec6d4b25..8561f98a8 100644 --- a/R/read_waterdata_monitoring_location.R +++ b/R/read_waterdata_monitoring_location.R @@ -48,20 +48,8 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("monitoring-locations", "monitoring_location_id")`. #' The default (`NA`) will return all columns of the data. -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -91,6 +79,8 @@ #' #' bbox_vals = c(-94.00, 35.0, -93.5, 35.5) #' multi_site <- read_waterdata_monitoring_location(bbox = bbox_vals) +#' +#' #' } read_waterdata_monitoring_location <- function( monitoring_location_id = NA_character_, @@ -135,11 +125,16 @@ read_waterdata_monitoring_location <- function( depth_source_code = NA_character_, properties = NA_character_, bbox = NA, - limit = NA, - skipGeometry = NA + skipGeometry = NA, + ..., + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "monitoring-locations" output_id <- "monitoring_location_id" + rlang::check_dots_empty() args <- mget(names(formals())) args[["convertType"]] <- FALSE diff --git a/R/read_waterdata_parameter_codes.R b/R/read_waterdata_parameter_codes.R index 6876c3197..c098e3999 100644 --- a/R/read_waterdata_parameter_codes.R +++ b/R/read_waterdata_parameter_codes.R @@ -18,11 +18,16 @@ #' Available options are: #' `r dataRetrieval:::get_properties_for_docs("parameter-codes", "parameter_code_id")`. #' The default (`NA`) will return all columns of the data. +#' @param \dots Not used. Included to help differentiate official Water Data API arguments +#' from more seldom used, optional dataRetrieval-specific arguments. #' @param limit The optional limit parameter is used to control the subset of the #' selected features that should be returned in each page. The maximum allowable #' limit is 50000. It may be beneficial to set this number lower if your internet #' connection is spotty. The default (`NA`) will set the limit to the maximum #' allowable limit for the service. +#' @param attach_request logical, defaults to `r getOption("dataRetrieval.attach_request")`. +#' If set to `TRUE`, the full request sent to the Water Data API is attached +#' as an attribute to the data set. #' @examplesIf is_dataRetrieval_user() #' #' \donttest{ @@ -56,16 +61,20 @@ read_waterdata_parameter_codes <- function( temperature_basis = NA_character_, epa_equivalence = NA_character_, properties = NA_character_, - limit = NA + ..., + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) { service <- "parameter-codes" output_id <- "parameter_code" + rlang::check_dots_empty() args <- mget(names(formals())) args[["convertType"]] <- FALSE - args[["skipGeometry"]] <- TRUE + args[["skipGeometry"]] <- NA args[["bbox"]] <- NA args[["no_paging"]] <- FALSE # drops id if TRUE + args[["chunk_size"]] <- NA return_list <- get_ogc_data( args = args, diff --git a/R/read_waterdata_peaks.R b/R/read_waterdata_peaks.R new file mode 100644 index 000000000..3837a4c9a --- /dev/null +++ b/R/read_waterdata_peaks.R @@ -0,0 +1,81 @@ +#' Get USGS Peak Data +#' +#' @description `r get_description("peaks")` +#' +#' @export +#' @param monitoring_location_id `r get_ogc_params("peaks")$monitoring_location_id` +#' Multiple monitoring_location_ids can be requested as a character vector. +#' @param parameter_code `r get_ogc_params("peaks")$parameter_code` +#' Multiple parameter_codes can be requested as a character vector. +#' @param time `r get_ogc_params("peaks")$time` +#' +#' See also Details below for more information. +#' @param value `r get_ogc_params("peaks")$value` +#' @param unit_of_measure `r get_ogc_params("peaks")$unit_of_measure` +#' @param time_series_id `r get_ogc_params("peaks")$time_series_id` +#' @param last_modified `r get_ogc_params("peaks")$last_modified` +#' +#' See also Details below for more information. +#' @param water_year `r get_ogc_params("peaks")$water_year` +#' @param year `r get_ogc_params("peaks")$year` +#' @param month `r get_ogc_params("peaks")$month` +#' @param day `r get_ogc_params("peaks")$day` +#' @param time_of_day `r get_ogc_params("peaks")$time_of_day` +#' @param peak_since `r get_ogc_params("peaks")$peak_since` +#' @param properties A vector of requested columns to be returned from the query. +#' Available options are: +#' `r dataRetrieval:::get_properties_for_docs("peaks", "peak_id")`. +#' The default (`NA`) will return all columns of the data. +#' +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api +#' +#' @inherit read_waterdata_continuous details +#' +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' wi_peaks <- read_waterdata_combined_meta( +#' state_name = "Wisconsin", +#' data_type = "Peaks", +#' parameter_code = "00060") +#' +#' +#' dv_data_sf <- read_waterdata_peaks( +#' monitoring_location_id = wi_peaks$monitoring_location_id[1], +#' parameter_code = "00060") +#' +#' } +read_waterdata_peaks <- function( + monitoring_location_id = NA_character_, + parameter_code = NA_character_, + properties = NA_character_, + time_series_id = NA_character_, + unit_of_measure = NA_character_, + value = NA, + last_modified = NA_character_, + water_year = NA_character_, + year = NA_character_, + month = NA_character_, + day = NA_character_, + time_of_day = NA_character_, + peak_since = NA_character_, + skipGeometry = NA, + time = NA_character_, + bbox = NA, + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) { + service <- "peaks" + output_id <- "peak_id" + rlang::check_dots_empty() + + args <- mget(names(formals())) + return_list <- get_ogc_data(args, output_id, service) + + return(return_list) +} diff --git a/R/read_waterdata_ratings.R b/R/read_waterdata_ratings.R new file mode 100644 index 000000000..3663592af --- /dev/null +++ b/R/read_waterdata_ratings.R @@ -0,0 +1,188 @@ +#' Get USGS Rating Curve Data +#' +#' Reads current rating table for an active USGS streamgages. More information +#' can be found at https://api.waterdata.usgs.gov/docs/stac/. +#' +#' @param monitoring_location_id A unique identifier representing a single +#' monitoring location. Monitoring location IDs are created by combining the +#' agency code of the agency responsible for the monitoring location (e.g. USGS) +#' with the ID number of the monitoring location (e.g. 02238500), separated by +#' a hyphen (e.g. USGS-02238500). +#' @param file_type Rating file time. Could be any of "exsa", "corr", or "base". +#' If `file_type` is "base" then the columns are +#' INDEP, typically the gage height, in feet; DEP, typically the streamflow, +#' in cubic feet per second; and STOR, where "*" indicates that the pair are +#' a fixed point of the rating curve. If `file_type` is "exsa" then an +#' additional column, SHIFT, is included that indicates the current shift in +#' the rating for that value of INDEP. If `file_type` is "corr" then the +#' columns are INDEP, typically the gage height, in feet; CORR, the correction +#' for that value; and CORRINDEP, the corrected value for CORR. +#' @param file_path Path to save the rating curve rdb files. The +#' default is `tempdir()`, which will wipe out the files. +#' @param datetime Only return items that have a temporal property that +#' intersects this value. Either a date-time or an interval, open or closed. +#' See Details below. +#' @param bbox Only features that have a geometry that intersects the bounding +#' box are selected.The bounding box is provided as four or six numbers, depending +#' on whether the coordinate reference system includes a vertical axis (height or +#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +#' vector structured: c(xmin,ymin,xmax,ymax). +#' Another way to think of it is c(Western-most longitude, +#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). +#' @param \dots Not used. +#' @param limit Limits the number of results that are included in each page of +#' the response (capped at the default 10,000). +#' @param download_and_parse Logical to define whether or not to download, parse, +#' and return a list of data frames with rating curve data (`TRUE`), or to return +#' just a list of available rating curve files (`FALSE`). Default is `TRUE`. +#' @export +#' @inherit read_waterdata_continuous details +#' +#' @return List of data frames which contain the requested rating curves. +#' +#' @examplesIf is_dataRetrieval_user() +#' +#' \donttest{ +#' +#' monitoring_location_id <- c("USGS-01104475", "USGS-01104460") +#' ratings_exsa <- read_waterdata_ratings( +#' monitoring_location_id = monitoring_location_id, +#' file_type = "exsa") +#' +#' head(ratings_exsa[["USGS-01104475.exsa.rdb"]]) +#' comment(ratings_exsa[["USGS-01104475.exsa.rdb"]])[1:15] +#' +#' ratings_corr <- read_waterdata_ratings( +#' monitoring_location_id = monitoring_location_id, +#' file_type = "corr") +#' +#' head(ratings_corr[["USGS-01104460.corr.rdb"]]) +#' comment(ratings_corr[["USGS-01104460.corr.rdb"]])[1:15] +#' +#' rating_2 <- read_waterdata_ratings( +#' monitoring_location_id = monitoring_location_id, +#' file_type = c("corr", "exsa")) +#' names(rating_2) +#' +#' bbox <- c(-95.00, 40.0, -92.0, 42) +#' +#' bbox_query <- read_waterdata_ratings(bbox = bbox, +#' download_and_parse = FALSE) +#' length(bbox_query) +#' recent_query <- read_waterdata_ratings(bbox = bbox, +#' datetime = c(Sys.Date()-7, NA), +#' download_and_parse = FALSE) +#' length(recent_query) +#'} +read_waterdata_ratings <- function( + monitoring_location_id = NA_character_, + file_type = c("exsa", "base", "corr"), + file_path = tempdir(), + bbox = NA, + datetime = NA_character_, + ..., + limit = 10000, + download_and_parse = TRUE +) { + match.arg( + arg = file_type, + choices = c("exsa", "base", "corr"), + several.ok = TRUE + ) + rlang::check_dots_empty() + + request <- httr2::request("https://api.waterdata.usgs.gov/stac/v0/") |> + httr2::req_url_path_append("search") + + filter <- NA_character_ + + if (!all(is.na(monitoring_location_id))) { + if (length(monitoring_location_id) > 1) { + monitoring_location_id <- paste0( + monitoring_location_id, + collapse = "', '" + ) + } + + filter <- sprintf( + "monitoring_location_id IN ('%s')", + monitoring_location_id + ) + } + + if (length(file_type) == 1) { + filter <- sprintf("%s AND file_type = '%s'", filter, file_type) + } + + if (!is.na(filter)) { + if (substr(filter, 1, 3) == "AND") { + filter <- substr(filter, 4, nchar(filter)) + } + + request <- request |> + httr2::req_url_query(filter = filter) + } + + if (!all(is.na(datetime))) { + if (any(grepl("P", datetime))) { + stop( + "Periods are not supported in datetime argument in the rating curve service." + ) + } + datetime <- format_api_dates(datetime, date = FALSE) + + request <- request |> + httr2::req_url_query(datetime = datetime) + } + + if (all(!is.na(bbox))) { + request <- httr2::req_url_query( + request, + bbox = as.numeric(bbox), + .multi = "comma" + ) + } + + request <- request |> + httr2::req_url_query(limit = limit) |> + basic_request() + + resp <- httr2::req_perform(request) + log_rate_limit(resp) + + features <- httr2::resp_body_json(resp)[["features"]] + + if (download_and_parse) { + return_list <- list() + for (feature in features) { + id <- feature$id + df <- download_convert(feature, file_path, file_type) + if (!is.null(df)) { + return_list[[id]] <- df + } + } + + return(return_list) + } else { + return(features) + } +} + +download_convert <- function(feature, file_path, file_type) { + links <- feature$links + id <- feature$id + url <- feature$assets$data$href + + req <- httr2::request(url) |> + basic_request() + + if (any(sapply(file_type, function(x) grepl(x, url)))) { + full_file_path <- file.path(file_path, id) + message("Requesting: \n", url) + resp <- httr2::req_perform(req, path = full_file_path) + rating <- importRDB1(full_file_path) + return(rating) + } + + return(NULL) +} diff --git a/R/read_waterdata_samples.R b/R/read_waterdata_samples.R index f18738b99..3ef74e805 100644 --- a/R/read_waterdata_samples.R +++ b/R/read_waterdata_samples.R @@ -66,7 +66,7 @@ #' @param recordIdentifierUserSupplied Record identifier, user supplied identifier. This #' information would be needed from the data supplier. #' @param siteTypeName Site type name query parameter. See available -#' options by running `check_param("sitetype")$typeName`. +#' options by running `check_waterdata_sample_params("sitetype")$typeName`. #' @param usgsPCode USGS parameter code. See available options by running #' `check_waterdata_sample_params("characteristics")$parameterCode`. #' @param pointLocationLatitude Latitude for a point/radius query (decimal degrees). Must be used @@ -129,14 +129,8 @@ construct_waterdata_sample_request <- function( baseURL <- httr2::request("https://api.waterdata.usgs.gov") |> httr2::req_url_path_append("samples-data") |> - httr2::req_url_query(mimeType = "text/csv") - - token <- Sys.getenv("API_USGS_PAT") - - if (token != "") { - baseURL <- baseURL |> - httr2::req_headers_redacted(`X-Api-Key` = token) - } + httr2::req_url_query(mimeType = "text/csv") |> + add_api_token() switch( dataType, @@ -387,14 +381,8 @@ check_waterdata_sample_params <- function( match.arg(service, choices = service_options, several.ok = FALSE) check_group_req <- httr2::request("https://api.waterdata.usgs.gov") |> - httr2::req_url_path_append("samples-data") - - token <- Sys.getenv("API_USGS_PAT") - - if (token != "") { - check_group_req <- check_group_req |> - httr2::req_headers_redacted(`X-Api-Key` = token) - } + httr2::req_url_path_append("samples-data") |> + add_api_token() if (service != "reference-list") { check_group_req <- check_group_req |> @@ -521,10 +509,13 @@ read_waterdata_samples <- function( #' This function creates the call and gets the data for discrete water quality samples summary data #' service described at . #' -#' @param monitoringLocationIdentifier A monitoring location identifier has two parts, -#' separated by a dash (-): the agency code and the location number. Location identifiers should be separated with commas, -#' for example: AZ014-320821110580701, CAX01-15304600, USGS-040851385. Location -#' numbers without an agency prefix are assumed to have the prefix USGS. +#' @param monitoringLocationIdentifier A single monitoring location identifier +#' with two parts, separated by a dash (-): the agency code and the location +#' number. Examples: USGS-040851385, AZ014-320821110580701, CAX01-15304600. +#' The summary service accepts only one site at a time; supplying a vector of +#' length > 1 raises an error. The agency prefix is required: bare location +#' numbers (e.g. "040851385") are accepted by the service but return an empty +#' result. #' @export #' @return data frame with summary of data available based on the monitoringLocationIdentifier #' @rdname summarize_waterdata_samples @@ -544,14 +535,8 @@ summarize_waterdata_samples <- function(monitoringLocationIdentifier) { baseURL <- httr2::request("https://api.waterdata.usgs.gov") |> httr2::req_url_path_append("samples-data") |> httr2::req_url_path_append("summary", monitoringLocationIdentifier) |> - httr2::req_url_query(mimeType = "text/csv") - - token <- Sys.getenv("API_USGS_PAT") - - if (token != "") { - baseURL <- baseURL |> - httr2::req_headers_redacted(`X-Api-Key` = token) - } + httr2::req_url_query(mimeType = "text/csv") |> + add_api_token() df <- importWQP(baseURL) @@ -567,66 +552,20 @@ summarize_waterdata_samples <- function(monitoringLocationIdentifier) { } -#' @rdname read_waterdata_samples +#' @title Deprecated: Use \code{read_waterdata_samples} instead +#' @description This function has been renamed to \code{\link{read_waterdata_samples}}. +#' @param ... Arguments passed to \code{\link{read_waterdata_samples}}. +#' @return data frame returned from web service call. #' @export -read_USGS_samples <- function( - monitoringLocationIdentifier = NA, - siteTypeCode = NA, - boundingBox = NA, - hydrologicUnit = NA, - activityMediaName = NA, - characteristicGroup = NA, - characteristic = NA, - characteristicUserSupplied = NA, - activityStartDateLower = NA, - activityStartDateUpper = NA, - countryFips = NA, - stateFips = NA, - countyFips = NA, - projectIdentifier = NA, - recordIdentifierUserSupplied = NA, - siteTypeName = NA, - usgsPCode = NA, - pointLocationLatitude = NA, - pointLocationLongitude = NA, - pointLocationWithinMiles = NA, - dataType = "results", - dataProfile = NA, - tz = "UTC", - convertType = TRUE -) { +#' @keywords internal +read_USGS_samples <- function(...) { .Deprecated( new = "read_waterdata_samples", package = "dataRetrieval", msg = "Function has been renamed. Please begin to migrate to read_waterdata_samples" ) - read_waterdata_samples( - monitoringLocationIdentifier = monitoringLocationIdentifier, - siteTypeCode = siteTypeCode, - boundingBox = boundingBox, - hydrologicUnit = hydrologicUnit, - activityMediaName = activityMediaName, - characteristicGroup = characteristicGroup, - characteristic = characteristic, - characteristicUserSupplied = characteristicUserSupplied, - activityStartDateLower = activityStartDateLower, - activityStartDateUpper = activityStartDateUpper, - countryFips = countryFips, - stateFips = stateFips, - countyFips = countyFips, - projectIdentifier = projectIdentifier, - recordIdentifierUserSupplied = recordIdentifierUserSupplied, - siteTypeName = siteTypeName, - usgsPCode = usgsPCode, - pointLocationLatitude = pointLocationLatitude, - pointLocationLongitude = pointLocationLongitude, - pointLocationWithinMiles = pointLocationWithinMiles, - dataType = dataType, - dataProfile = dataProfile, - tz = tz, - convertType = convertType - ) + read_waterdata_samples(...) } diff --git a/R/read_waterdata_stats.R b/R/read_waterdata_stats.R index 3fb0911fc..d7a96e8e4 100644 --- a/R/read_waterdata_stats.R +++ b/R/read_waterdata_stats.R @@ -39,6 +39,13 @@ #' supplied then statistics will be supplied for the entire period of record. #' @param end_date End Date Query Parameter. The logic is inclusive i.e., it will #' also return records that match the date. +#' @param normal_type Normal Type Query Parameter. If unspecified, all matching data +#' will be returned. Otherwise, it will filter the results to one of the following +#' normals: day-of-year, month-of-year. Available values: "DOY", "MOY". +#' @param interval_type Interval Type Query Parameter. If unspecified, all matching +#' data will be returned. Otherwise, it will filter the results to one or more of +#' the following intervals: month, calendar year, water year. +#' Available values: "M", "CY", "WY". #' @param monitoring_location_id Each monitoring location has been assigned a #' unique station number that places them in downstream order. Accepts #' multiple values in a character vector. @@ -69,6 +76,12 @@ #' monitoring_location_id = c("USGS-02319394", "USGS-02171500") #' ) #' +#' # Request only month-of-year statistics using normal_type arg +#' x1 <- read_waterdata_stats_por( +#' monitoring_location_id = c("USGS-02319394", "USGS-02171500"), +#' normal_type = "MOY" +#' ) +#' #' # Request temperature percentiles for specific month-day range #' # Returns: #' # - Day-of-year temperature percentiles for each day between June 1 through June 15. @@ -88,6 +101,12 @@ #' monitoring_location_id = c("USGS-02319394", "USGS-02171500") #' ) #' +#' # Request only calendar year statistics +#' x3 <- read_waterdata_stats_daterange( +#' monitoring_location_id = c("USGS-02319394", "USGS-02171500"), +#' interval_type = "CY" +#' ) +#' #' # Request specific gage height and discharge summaries for a limited date range #' # Returns: #' # - calendar month summaries for each month between January, 2010 through December, 2011 @@ -112,6 +131,7 @@ read_waterdata_stats_por <- function( county_code = NA_character_, start_date = NA_character_, end_date = NA_character_, + normal_type = NA_character_, monitoring_location_id = NA_character_, parent_time_series_id = NA_character_, site_type_code = NA_character_, @@ -134,6 +154,7 @@ read_waterdata_stats_daterange <- function( county_code = NA_character_, start_date = NA_character_, end_date = NA_character_, + interval_type = NA_character_, monitoring_location_id = NA_character_, parent_time_series_id = NA_character_, site_type_code = NA_character_, @@ -163,12 +184,7 @@ construct_statistics_request <- function(service = "Normals") { httr2::req_url_path_append(getOption("dataRetrieval.api_version_stat")) |> httr2::req_url_path_append(paste0("observation", service)) - token <- Sys.getenv("API_USGS_PAT") - - if (token != "") { - base_request <- base_request |> - httr2::req_headers_redacted(`X-Api-Key` = token) - } + base_request <- add_api_token(base_request) return(base_request) } @@ -338,11 +354,12 @@ deal_with_empty_stats <- function( "parameter_code", "unit_of_measure", "parent_time_series_id", + "parent_statistics_id", + "parent_statistics_name", "value", "percentile", "start_date", "end_date", - "interval_type", "sample_count", "approval_status", "computation_id", diff --git a/R/read_waterdata_ts_meta.R b/R/read_waterdata_ts_meta.R index dd9777841..b3f869dec 100644 --- a/R/read_waterdata_ts_meta.R +++ b/R/read_waterdata_ts_meta.R @@ -45,28 +45,8 @@ #' `r dataRetrieval:::get_properties_for_docs("time-series-metadata", "time_series_id")`. #' The default (`NA`) will return all columns of the data. #' @param time_series_id `r get_ogc_params("time-series-metadata")$id` -#' @param bbox Only features that have a geometry that intersects the bounding -#' box are selected.The bounding box is provided as four or six numbers, depending -#' on whether the coordinate reference system includes a vertical axis (height or -#' depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -#' vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, -#' Southern-most latitude, Eastern-most longitude, Northern-most longitude). -#' @param limit The optional limit parameter is used to control the subset of the -#' selected features that should be returned in each page. The maximum allowable -#' limit is 50000. It may be beneficial to set this number lower if your internet -#' connection is spotty. The default (`NA`) will set the limit to the maximum -#' allowable limit for the service. -#' @param max_results The optional maximum number of rows to return. This value -#' must be less than the requested limit. -#' @param convertType logical, defaults to `TRUE`. If `TRUE`, the function -#' will convert the data to dates and qualifier to string vector. -#' @param skipGeometry This option can be used to skip response geometries for -#' each feature. The returning object will be a data frame with no spatial -#' information. -#' @param no_paging logical, defaults to `FALSE`. If `TRUE`, the data will -#' be requested from a native csv format. This can be dangerous because the -#' data will cut off at 50,000 rows without indication that more data -#' is available. Use `TRUE` with caution. +#' @inheritParams check_arguments_api +#' @inheritParams check_arguments_non_api #' #' @inherit read_waterdata_continuous details #' @@ -77,7 +57,7 @@ #' meta_1 <- read_waterdata_ts_meta(monitoring_location_id = site) #' #' meta_multi <- read_waterdata_ts_meta(monitoring_location_id = c("USGS-01491000", -#' "USGS-01645000"), +#' "USGS-01645000"), #' parameter_code = c("00060", "00010"), #' properties = c("monitoring_location_id", #' "parameter_code", @@ -109,16 +89,19 @@ read_waterdata_ts_meta <- function( time_series_id = NA_character_, web_description = NA_character_, skipGeometry = NA, - limit = NA, - max_results = NA, bbox = NA, begin = NA_character_, end = NA_character_, - convertType = TRUE, - no_paging = FALSE + ..., + limit = getOption("dataRetrieval.limit"), + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) { - service = "time-series-metadata" + service <- "time-series-metadata" output_id <- "time_series_id" + rlang::check_dots_empty() args <- mget(names(formals())) return_list <- get_ogc_data(args, output_id, service) diff --git a/R/sysdata.rda b/R/sysdata.rda index d984e226a..76776c7db 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/R/walk_pages.R b/R/walk_pages.R index 7b00adb65..c703bd3af 100644 --- a/R/walk_pages.R +++ b/R/walk_pages.R @@ -12,6 +12,8 @@ walk_pages <- function(req) { on_error = "stop" ) + failures <- resps |> httr2::resps_failures() + return_list <- resps |> httr2::resps_successes() |> httr2::resps_data(\(resp) get_resp_data(resp)) @@ -39,19 +41,7 @@ get_resp_data <- function(resp) { return_df <- sf::read_sf(httr2::resp_body_string(resp)) - included_num_cols <- names(return_df)[names(return_df) %in% num_cols] - - if ( - !all(sapply( - sf::st_drop_geometry(return_df[, included_num_cols]), - is.numeric - )) - ) { - return_df[, included_num_cols] <- lapply( - sf::st_drop_geometry(return_df[, included_num_cols]), - as.numeric - ) - } + return_df <- coerce_num_cols(return_df, is_sf = TRUE) if ("qualifier" %in% names(return_df)) { return_df$qualifier <- as.character(vapply( @@ -97,14 +87,7 @@ next_req_url <- function(resp, req) { return(NULL) } - header_info <- httr2::resp_headers(resp) - if (Sys.getenv("API_USGS_PAT") != "") { - message( - "Remaining requests this hour:", - header_info$`x-ratelimit-remaining`, - " " - ) - } + log_rate_limit(resp) if ("links" %in% names(body)) { links <- body$links if (any(sapply(links, function(x) x$rel) == "next")) { @@ -126,30 +109,25 @@ get_csv <- function(req, limit) { skip_geo <- grepl("skipGeometry=true", req$url, ignore.case = TRUE) resp <- httr2::req_perform(req) - header_info <- httr2::resp_headers(resp) - if (Sys.getenv("API_USGS_PAT") != "") { - message( - "Remaining requests this hour:", - header_info$`x-ratelimit-remaining`, - " " - ) - } + log_rate_limit(resp) if (httr2::resp_has_body(resp)) { return_list <- httr2::resp_body_string(resp) - df <- data.table::fread(input = return_list, data.table = FALSE) - - included_num_cols <- names(df)[names(df) %in% num_cols] + df <- data.table::fread( + input = return_list, + data.table = FALSE, + colClasses = "character" + ) - if (!all(sapply(df[, included_num_cols], is.numeric))) { - df[, included_num_cols] <- lapply(df[, included_num_cols], as.numeric) - } + df <- coerce_num_cols(df) if (skip_geo) { df <- df[, names(df)[!names(df) %in% c("x", "y")]] } else { - df <- sf::st_as_sf(df, coords = c("x", "y")) - sf::st_crs(df) <- 4269 + if (all(c("x", "y") %in% names(df))) { + df <- sf::st_as_sf(df, coords = c("x", "y")) + sf::st_crs(df) <- 4269 + } } if (nrow(df) == limit) { @@ -164,3 +142,32 @@ ensure all requested data is returned." return(df) } + +coerce_num_cols <- function(df, is_sf = FALSE) { + included_num_cols <- names(df)[names(df) %in% num_cols] + if (length(included_num_cols) == 0) { + return(df) + } + + check_df <- if (is_sf) { + sf::st_drop_geometry(df[, included_num_cols, drop = FALSE]) + } else { + df[, included_num_cols, drop = FALSE] + } + + if (!all(vapply(check_df, is.numeric, logical(1)))) { + df[, included_num_cols] <- lapply(check_df, as.numeric) + } + df +} + +log_rate_limit <- function(resp) { + if (Sys.getenv("API_USGS_PAT") != "") { + header_info <- httr2::resp_headers(resp) + message( + "Remaining requests this hour:", + header_info$`x-ratelimit-remaining`, + " " + ) + } +} diff --git a/_pkgdown.yml b/_pkgdown.yml index f16382908..6dc8a7711 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -46,6 +46,8 @@ navbar: href: articles/daily_data_statistics.html - text: Continuous Data href: articles/continuous_pr.html + - text: USGS Reference Lists + href: articles/Reference_Lists.html - text: Changes to QW href: articles/qwdata_changes.html - text: Background @@ -87,6 +89,8 @@ reference: - read_waterdata_channel - read_waterdata_field_meta - read_waterdata_combined_meta + - read_waterdata_ratings + - read_waterdata_peaks - title: National Water Information System (NWIS) desc: Functions to retrieve (USGS) NWIS data. These will be slowly phased out and replaced with the read_waterdata family of functions. contents: diff --git a/docker/Dockerfile b/docker/Dockerfile index 404040c33..34cf659fb 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,31 +1,48 @@ -FROM code.chs.usgs.gov:5001/ctek/docker/r-lang/r-base:4.4 +FROM code.chs.usgs.gov:5001/ctek/docker/r-lang/r-base:4.6 -# Necessary R libraries -RUN apt-get update -qq && apt-get -y --no-install-recommends install \ - r-cran-oce \ - r-cran-devtools \ - r-cran-here \ - r-cran-rmarkdown \ - r-cran-knitr \ - r-cran-dt \ - r-cran-data.table \ - r-cran-gridextra \ - r-cran-tidyverse \ - r-cran-jsonlite \ - r-cran-readr \ - r-cran-xml2 \ - r-cran-httr2 \ - r-cran-rsconnect \ - r-cran-connectapi \ - r-cran-covr \ - r-cran-sf \ - r-cran-zoo \ - r-cran-patchwork \ - r-cran-maps \ - r-cran-leaflet \ - r-cran-readxl \ - r-cran-whisker \ - r-cran-ggplot2 \ +# Change the name of this environment to something which pleases you, if you +# so please. But the name will not be relevant for most cases, as reticulate +# will be pointed to the environment no matter what it is named. +ARG CONDA_ENVIRONMENT_NAME=dataretrieval + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + wget \ && rm -rf /var/lib/apt/lists/* + +RUN wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" && \ + bash Miniforge3.sh -b -p "${HOME}/conda" && \ + rm Miniforge3.sh +ENV CONDA_DIR="/root/conda" +ENV PATH=$CONDA_DIR/bin:$PATH +COPY environment.yml / +RUN conda env create -n ${CONDA_ENVIRONMENT_NAME} -f /environment.yml && conda clean -a -y +# Necessary R libraries +RUN apt-get update -qq && apt-get -y --no-install-recommends install \ + r-cran-rcmdcheck \ + r-cran-testthat \ + r-cran-pkgdown \ + r-cran-rmarkdown \ + r-cran-knitr \ + r-cran-dt \ + r-cran-data.table \ + r-cran-dplyr \ + r-cran-purrr \ + r-cran-lubridate \ + r-cran-tidyr \ + r-cran-jsonlite \ + r-cran-readr \ + r-cran-xml2 \ + r-cran-httr2 \ + r-cran-covr \ + r-cran-sf \ + r-cran-patchwork \ + r-cran-leaflet \ + r-cran-readxl \ + r-cran-whisker \ + r-cran-ggplot2 \ + r-cran-reticulate \ + && rm -rf /var/lib/apt/lists/* +ENV RETICULATE_PYTHON=/root/conda/envs/${CONDA_ENVIRONMENT_NAME}/bin/python diff --git a/inst/CITATION b/inst/CITATION index 0a45adf0b..a77feb353 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -37,9 +37,9 @@ bibentry(bibtype = "Manual", title = "dataRetrieval: R packages for discovering and retrieving water data available from U.S. federal hydrologic web services", publisher = "U.S. Geological Survey", address="Reston, VA", - version = "2.7.24", + version = "2.7.25", institution = "U.S. Geological Survey", year = 2026, doi = "10.5066/P9X4L3GE", - textVersion = "De Cicco, L.A., Hirsch, R.M., Lorenz, D., Watkins, W.D., Johnson, M., Blodgett, D.L., Hinman, E.D., Zemmels, J., 2026, dataRetrieval: R packages for discovering and retrieving water data available from Federal hydrologic web services, v.2.7.24, doi:10.5066/P9X4L3GE" + textVersion = "De Cicco, L.A., Hirsch, R.M., Lorenz, D., Watkins, W.D., Johnson, M., Blodgett, D.L., Hinman, E.D., Zemmels, J., 2026, dataRetrieval: R packages for discovering and retrieving water data available from Federal hydrologic web services, v.2.7.25, doi:10.5066/P9X4L3GE" ) diff --git a/man/check_arguments_api.Rd b/man/check_arguments_api.Rd new file mode 100644 index 000000000..d53e12ce6 --- /dev/null +++ b/man/check_arguments_api.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_ogc_data.R +\name{check_arguments_api} +\alias{check_arguments_api} +\title{Check other arguments} +\usage{ +check_arguments_api(bbox, skipGeometry) +} +\arguments{ +\item{bbox}{Only features that have a geometry that intersects the bounding +box are selected.The bounding box is provided as four or six numbers, depending +on whether the coordinate reference system includes a vertical axis (height or +depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, +Southern-most latitude, Eastern-most longitude, Northern-most longitude).} + +\item{skipGeometry}{This parameter can be used to skip response geometries for +each feature. The returning object will be a data frame with no spatial +information. The default \code{NA} will not specify the argument in the request.} +} +\description{ +Additional functions to check types and create parameter descriptions. +} +\keyword{internal} diff --git a/man/check_arguments_non_api.Rd b/man/check_arguments_non_api.Rd new file mode 100644 index 000000000..193ab821a --- /dev/null +++ b/man/check_arguments_non_api.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_ogc_data.R +\name{check_arguments_non_api} +\alias{check_arguments_non_api} +\title{Check non-API arguments} +\usage{ +check_arguments_non_api( + convertType, + no_paging, + limit, + attach_request, + chunk_size, + ... +) +} +\arguments{ +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{\dots}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} +} +\description{ +Function to check types and create parameter descriptions. +} +\keyword{internal} diff --git a/man/construct_api_requests.Rd b/man/construct_api_requests.Rd index 3e691d751..cbad05506 100644 --- a/man/construct_api_requests.Rd +++ b/man/construct_api_requests.Rd @@ -6,30 +6,54 @@ \usage{ construct_api_requests( service, - properties = NA_character_, + output_id, + ..., bbox = NA, - skipGeometry = FALSE, - no_paging = FALSE, - ... + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ \item{service}{Which service available on \url{https://api.waterdata.usgs.gov/ogcapi/v0/}.} -\item{properties}{The properties that should be included for each feature. The -parameter value is a comma-separated list of property names which depend on the -service being called.} +\item{output_id}{Name of id column to return} + +\item{...}{Extra parameters from the specific services.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth).} -\item{skipGeometry}{This option can be used to skip response geometries for -each feature. The returning object will be a data frame with no spatial -information.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{...}{Extra parameters from the specific services.} +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Main documentation: \url{https://api.waterdata.usgs.gov/ogcapi/v0/}, @@ -39,10 +63,12 @@ Swagger docs: \url{https://api.waterdata.usgs.gov/ogcapi/v0/openapi?f=html}. site <- "USGS-02238500" pcode <- "00060" req_dv <- construct_api_requests("daily", + output_id = "daily_id", monitoring_location_id = site, parameter_code = "00060") req_dv <- construct_api_requests("daily", + output_id = "daily_id", monitoring_location_id = site, parameter_code = c("00060", "00065")) @@ -50,9 +76,10 @@ sites <- c("USGS-01491000", "USGS-01645000") start_date <- "2018-01-01" end_date <- "2022-01-01" req_dv <- construct_api_requests("daily", - monitoring_location_id = sites, - parameter_code = c("00060", "00065"), - datetime = c(start_date, end_date)) + output_id = "daily_id", + monitoring_location_id = sites, + parameter_code = c("00060", "00065"), + datetime = c(start_date, end_date)) } \keyword{internal} diff --git a/man/construct_waterdata_sample_request.Rd b/man/construct_waterdata_sample_request.Rd index e6489e925..0ab3f968b 100644 --- a/man/construct_waterdata_sample_request.Rd +++ b/man/construct_waterdata_sample_request.Rd @@ -100,7 +100,7 @@ would be needed from prior project information.} information would be needed from the data supplier.} \item{siteTypeName}{Site type name query parameter. See available -options by running \code{check_param("sitetype")$typeName}.} +options by running \code{check_waterdata_sample_params("sitetype")$typeName}.} \item{usgsPCode}{USGS parameter code. See available options by running \code{check_waterdata_sample_params("characteristics")$parameterCode}.} diff --git a/man/readNWISpeak.Rd b/man/readNWISpeak.Rd index f36a4007b..c07c8a96a 100644 --- a/man/readNWISpeak.Rd +++ b/man/readNWISpeak.Rd @@ -77,12 +77,12 @@ R Date objects. } \examples{ \dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} -site_ids <- c("01594440", "040851325") +#site_ids <- c("01594440", "040851325") \donttest{ -data <- readNWISpeak(site_ids) -data2 <- readNWISpeak(site_ids, asDateTime = FALSE) -stations <- c("06011000") -peakdata <- readNWISpeak(stations, convertType = FALSE) +#data <- readNWISpeak(site_ids) +#data2 <- readNWISpeak(site_ids, asDateTime = FALSE) +#stations <- c("06011000") +#peakdata <- readNWISpeak(stations, convertType = FALSE) } \dontshow{\}) # examplesIf} } diff --git a/man/readNWISrating.Rd b/man/readNWISrating.Rd index 0aaafcb90..cf520780a 100644 --- a/man/readNWISrating.Rd +++ b/man/readNWISrating.Rd @@ -48,8 +48,8 @@ relate flow to stage. \dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} site_id <- "01594440" \donttest{ -data <- readNWISrating(site_id, "base") -attr(data, "RATING") +#data <- readNWISrating(site_id, "base") +#attr(data, "RATING") } \dontshow{\}) # examplesIf} } diff --git a/man/read_USGS_samples.Rd b/man/read_USGS_samples.Rd new file mode 100644 index 000000000..9729aac1e --- /dev/null +++ b/man/read_USGS_samples.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata_samples.R +\name{read_USGS_samples} +\alias{read_USGS_samples} +\title{Deprecated: Use \code{read_waterdata_samples} instead} +\usage{ +read_USGS_samples(...) +} +\arguments{ +\item{...}{Arguments passed to \code{\link{read_waterdata_samples}}.} +} +\value{ +data frame returned from web service call. +} +\description{ +This function has been renamed to \code{\link{read_waterdata_samples}}. +} +\keyword{internal} diff --git a/man/read_waterdata.Rd b/man/read_waterdata.Rd index 6b78e0bbf..942c5dcfd 100644 --- a/man/read_waterdata.Rd +++ b/man/read_waterdata.Rd @@ -4,7 +4,14 @@ \alias{read_waterdata} \title{Generalized USGS Water Data API retrieval function} \usage{ -read_waterdata(service, CQL, ..., convertType = TRUE) +read_waterdata( + service, + CQL, + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) } \arguments{ \item{service}{character, can be any existing collection.} @@ -15,6 +22,16 @@ read_waterdata(service, CQL, ..., convertType = TRUE) \item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function will convert the data to dates and qualifier to string vector.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Function that allows complex CQL queries. diff --git a/man/read_waterdata_channel.Rd b/man/read_waterdata_channel.Rd index 537a7fdf6..e9bee3b98 100644 --- a/man/read_waterdata_channel.Rd +++ b/man/read_waterdata_channel.Rd @@ -33,9 +33,12 @@ read_waterdata_channel( properties = NA_character_, skipGeometry = NA, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -108,8 +111,7 @@ Examples: Only features that have a \code{last_modified} that intersects the value of datetime are selected. -\if{html}{\out{
}}\preformatted{ field_visit_id = NA_character_, -}\if{html}{\out{
}}} +field_visit_id = NA_character_,} \item{channel_measurement_type}{The channel measurement type.} @@ -118,30 +120,47 @@ Available options are: geometry, channel_measurements_id, monitoring_location_id, field_visit_id, measurement_number, time, channel_name, channel_flow, channel_flow_unit, channel_width, channel_width_unit, channel_area, channel_area_unit, channel_velocity, channel_velocity_unit, channel_location_distance, channel_location_distance_unit, channel_stability, channel_material, channel_evenness, horizontal_velocity_description, vertical_velocity_description, longitudinal_velocity_description, measurement_type, last_modified, channel_measurement_type, channel_location_direction. The default (\code{NA}) will return all columns of the data.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Channel measurements taken as part of streamflow field measurements. diff --git a/man/read_waterdata_combined_meta.Rd b/man/read_waterdata_combined_meta.Rd index 5ce5b3193..efab47c84 100644 --- a/man/read_waterdata_combined_meta.Rd +++ b/man/read_waterdata_combined_meta.Rd @@ -64,9 +64,12 @@ read_waterdata_combined_meta( properties = NA_character_, skipGeometry = NA, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -234,33 +237,50 @@ See also Details below for more information.} \item{properties}{A vector of requested columns to be returned from the query. Available options are: -geometry, monitoring_location_id, agency_code, agency_name, monitoring_location_number, monitoring_location_name, district_code, country_code, country_name, state_code, state_name, county_code, county_name, minor_civil_division_code, site_type_code, site_type, hydrologic_unit_code, basin_code, altitude, altitude_accuracy, altitude_method_code, altitude_method_name, vertical_datum, vertical_datum_name, horizontal_positional_accuracy_code, horizontal_positional_accuracy, horizontal_position_method_code, horizontal_position_method_name, original_horizontal_datum, original_horizontal_datum_name, drainage_area, contributing_drainage_area, time_zone_abbreviation, uses_daylight_savings, construction_date, aquifer_code, national_aquifer_code, aquifer_type_code, well_constructed_depth, hole_constructed_depth, depth_source_code, field_measurement_id, unit_of_measure, parameter_name, parameter_code, statistic_id, last_modified, begin, end, data_type, computation_identifier, thresholds, sublocatio +geometry, monitoring_location_id, agency_code, agency_name, monitoring_location_number, monitoring_location_name, district_code, country_code, country_name, state_code, state_name, county_code, county_name, minor_civil_division_code, site_type_code, site_type, hydrologic_unit_code, basin_code, altitude, altitude_accuracy, altitude_method_code, altitude_method_name, vertical_datum, vertical_datum_name, horizontal_positional_accuracy_code, horizontal_positional_accuracy, horizontal_position_method_code, horizontal_position_method_name, original_horizontal_datum, original_horizontal_datum_name, drainage_area, contributing_drainage_area, time_zone_abbreviation, uses_daylight_savings, construction_date, aquifer_code, national_aquifer_code, aquifer_type_code, well_constructed_depth, hole_constructed_depth, depth_source_code, field_measurement_id, unit_of_measure, parameter_name, parameter_code, statistic_id, last_modified, begin, end, data_type, computation_identifier, thresholds, sublocation_identifier, primary, web_description, parameter_description, parent_time_series_id. The default (\code{NA}) will return all columns of the data.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet +limit is 50,000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} - -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ This endpoint combines metadata from timeseries and field measurements collections by site. diff --git a/man/read_waterdata_continuous.Rd b/man/read_waterdata_continuous.Rd index 8b66cbb29..b35db8666 100644 --- a/man/read_waterdata_continuous.Rd +++ b/man/read_waterdata_continuous.Rd @@ -15,9 +15,12 @@ read_waterdata_continuous( value = NA, last_modified = NA_character_, time = NA_character_, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -73,20 +76,35 @@ Only features that have a \code{time} that intersects the value of datetime are See also Details below for more information.} -\item{limit}{The optional limit parameter is used to control the subset of the +\item{\dots}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet +limit is 50,000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector, and sepcifically -order the returning data frame by time and monitoring_location_id.} - -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Continuous data are collected via automated sensors installed at a monitoring location. They are collected at a high frequency and often at a fixed 15-minute interval. Depending on the specific monitoring location, the data may be transmitted automatically via telemetry and be available on WDFN within minutes of collection, while other times the delivery of data may be delayed if the monitoring location does not have the capacity to automatically transmit data. Continuous data are described by parameter name and parameter code (pcode). These data might also be referred to as "instantaneous values" or "IV". diff --git a/man/read_waterdata_daily.Rd b/man/read_waterdata_daily.Rd index 9f3fa8a33..ef3d7eede 100644 --- a/man/read_waterdata_daily.Rd +++ b/man/read_waterdata_daily.Rd @@ -18,9 +18,12 @@ read_waterdata_daily( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -67,9 +70,9 @@ Only features that have a \code{last_modified} that intersects the value of date See also Details below for more information.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: @@ -88,22 +91,39 @@ See also Details below for more information.} box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Daily data provide one data value to represent water conditions for the day. Throughout much of the history of the USGS, the primary water data available was daily data collected manually at the monitoring location once each day. With improved availability of computer storage and automated transmission of data, the daily data published today are generally a statistical summary or metric of the continuous data collected each day, such as the daily mean, minimum, or maximum value. Daily data are automatically calculated from the continuous data of the same parameter code and are described by parameter code and a statistic code. These data have also been referred to as “daily values” or “DV”. diff --git a/man/read_waterdata_field_measurements.Rd b/man/read_waterdata_field_measurements.Rd index 8242d19e6..b9543526c 100644 --- a/man/read_waterdata_field_measurements.Rd +++ b/man/read_waterdata_field_measurements.Rd @@ -10,6 +10,7 @@ read_waterdata_field_measurements( observing_procedure_code = NA_character_, properties = NA_character_, field_visit_id = NA_character_, + field_measurements_series_id = NA_character_, approval_status = NA_character_, unit_of_measure = NA_character_, qualifier = NA_character_, @@ -23,9 +24,12 @@ read_waterdata_field_measurements( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_data"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -46,6 +50,12 @@ The default (\code{NA}) will return all columns of the data.} \item{field_visit_id}{A universally unique identifier (UUID) for the field visit. Multiple measurements may be made during a single field visit.} +\item{field_measurements_series_id}{A unique identifier representing a single +collection series. This corresponds to the \code{field_measurements_series_id} field in the +\code{read_waterdata_field_meta} endpoint. Collection series are defined as the +set of field measurements at a given monitoring location for a single parameter +code using a single reading type.} + \item{approval_status}{Some of the data that you have obtained from this U.S. Geological Survey database may not have received Director's approval. Any such data values are qualified as provisional and are subject to revision. Provisional data are released on the condition that neither the USGS nor the United States Government may be held liable for any damages resulting from its use. This field reflects the approval status of each record, and is either "Approved", meaining processing review has been completed and the data is approved for publication, or "Provisional" and subject to revision. For more information about provisional data, go to \url{https://waterdata.usgs.gov/provisional-data-statement/}.} \item{unit_of_measure}{A human-readable description of the units of measurement associated with an observation.} @@ -83,9 +93,9 @@ What and where the control of flow is for the gage pool.} Rated measurement based on the hydrologic/hydraulic conditions in which the measurement was made (excellent (2 percent), good (5 percent), fair (8 percent), or poor (more than 8 percent). percent)} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: @@ -104,22 +114,39 @@ See also Details below for more information.} box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Field measurements are physically measured values collected during a visit to the monitoring location. Field measurements consist of measurements of gage height and discharge, and readings of groundwater levels, and are primarily used as calibration readings for the automated sensors collecting continuous data. They are collected at a low frequency, and delivery of the data in WDFN may be delayed due to data processing time. @@ -169,6 +196,9 @@ multi_site <- read_waterdata_field_measurements( old_df <- read_waterdata_field_measurements(monitoring_location_id = "USGS-425957088141001", time = c("1980-01-01", NA)) +new_df <- read_waterdata_field_measurements(monitoring_location_id = "USGS-425957088141001", + time = c(NA, "2020-01-01")) + surface_water <- read_waterdata_field_measurements( monitoring_location_id = c("USGS-07069000", "USGS-07064000", diff --git a/man/read_waterdata_field_meta.Rd b/man/read_waterdata_field_meta.Rd index 83d6bc752..1ecfe588f 100644 --- a/man/read_waterdata_field_meta.Rd +++ b/man/read_waterdata_field_meta.Rd @@ -16,8 +16,11 @@ read_waterdata_field_meta( skipGeometry = NA, bbox = NA, limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -83,30 +86,47 @@ Available options are: geometry, field_measurement_id, monitoring_location_id, parameter_code, parameter_name, parameter_description, begin, end, last_modified. The default (\code{NA}) will return all columns of the data.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the +\item{limit}{numeric, The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet +limit is 50,000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ This endpoint provides metadata about field measurement collections, including when the earliest and most recent observations for a parameter occurred at a monitoring location and its units. diff --git a/man/read_waterdata_latest_continuous.Rd b/man/read_waterdata_latest_continuous.Rd index 14b211de5..e980a60e6 100644 --- a/man/read_waterdata_latest_continuous.Rd +++ b/man/read_waterdata_latest_continuous.Rd @@ -17,9 +17,12 @@ read_waterdata_latest_continuous( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -62,9 +65,9 @@ Only features that have a \code{last_modified} that intersects the value of date See also Details below for more information.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: @@ -83,22 +86,39 @@ See also Details below for more information.} box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ This endpoint provides the most recent observation for each time series of continuous data. Continuous data are collected via automated sensors installed at a monitoring location. They are collected at a high frequency and often at a fixed 15-minute interval. Depending on the specific monitoring location, the data may be transmitted automatically via telemetry and be available on WDFN within minutes of collection, while other times the delivery of data may be delayed if the monitoring location does not have the capacity to automatically transmit data. Continuous data are described by parameter name and parameter code. These data might also be referred to as "instantaneous values" or "IV" diff --git a/man/read_waterdata_latest_daily.Rd b/man/read_waterdata_latest_daily.Rd index a50f727e9..6eb5a386b 100644 --- a/man/read_waterdata_latest_daily.Rd +++ b/man/read_waterdata_latest_daily.Rd @@ -18,9 +18,12 @@ read_waterdata_latest_daily( skipGeometry = NA, time = NA_character_, bbox = NA, - limit = NA, - convertType = TRUE, - no_paging = FALSE + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + limit = getOption("dataRetrieval.limit"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -67,9 +70,9 @@ Only features that have a \code{last_modified} that intersects the value of date See also Details below for more information.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} +information. The default \code{NA} will not specify the argument in the request.} \item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). Examples: @@ -88,22 +91,39 @@ See also Details below for more information.} box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Daily data provide one data value to represent water conditions for the day. Throughout much of the history of the USGS, the primary water data available was daily data collected manually at the monitoring location once each day. With improved availability of computer storage and automated transmission of data, the daily data published today are generally a statistical summary or metric of the continuous data collected each day, such as the daily mean, minimum, or maximum value. Daily data are automatically calculated from the continuous data of the same parameter code and are described by parameter code and a statistic code. These data have also been referred to as “daily values” or “DV”. @@ -148,7 +168,7 @@ dv_data <- read_waterdata_latest_daily(monitoring_location_id = site, skipGeometry = TRUE) multi_site <- read_waterdata_latest_daily(monitoring_location_id = c("USGS-01491000", - "USGS-01645000"), + "USGS-01645000"), parameter_code = c("00060", "00010")) } diff --git a/man/read_waterdata_metadata.Rd b/man/read_waterdata_metadata.Rd index 868df0652..54f50ce01 100644 --- a/man/read_waterdata_metadata.Rd +++ b/man/read_waterdata_metadata.Rd @@ -4,7 +4,12 @@ \alias{read_waterdata_metadata} \title{Generalized USGS Water Meta Data API retrieval function} \usage{ -read_waterdata_metadata(collection, limit = NA, ...) +read_waterdata_metadata( + collection, + ..., + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) } \arguments{ \item{collection}{character, can be any existing collection such @@ -14,14 +19,18 @@ as "parameter-codes", "agency-codes", "altitude-datums", "aquifer-codes", "national-aquifer-codes", "reliability-codes", "site-types", "statistic-codes", "topographic-codes", "time-zone-codes".} +\item{\dots}{Optional arguments to pass to the query. Available parameters +can be found with the \code{get_ogc_params} function.} + \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} -\item{\dots}{Optional arguments to pass to the query. Available parameters -can be found with the \code{get_ogc_params} function.} +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Function to get metadata from Water Data API. These are useful to get the @@ -36,11 +45,16 @@ altitude_datums <- read_waterdata_metadata("altitude-datums") aquifer_codes <- read_waterdata_metadata("aquifer-codes") aquifer_types <- read_waterdata_metadata("aquifer-types") counties <- read_waterdata_metadata("counties") +countries <- read_waterdata_metadata("countries") us_counties <- read_waterdata_metadata("counties", country_code = "US") coordinate_accuracy_codes <- read_waterdata_metadata("coordinate-accuracy-codes") coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") huc_codes <- read_waterdata_metadata("hydrologic-unit-codes") +methods <- read_waterdata_metadata("methods") +method_categories <- read_waterdata_metadata("method-categories") +method_citations <- read_waterdata_metadata("method-citations") +citations <- read_waterdata_metadata("citations") national_aquifer_codes <- read_waterdata_metadata("national-aquifer-codes") parameter_codes <- read_waterdata_metadata("parameter-codes") reliability_codes <- read_waterdata_metadata("reliability-codes") diff --git a/man/read_waterdata_monitoring_location.Rd b/man/read_waterdata_monitoring_location.Rd index 178cad7a9..5072a644f 100644 --- a/man/read_waterdata_monitoring_location.Rd +++ b/man/read_waterdata_monitoring_location.Rd @@ -47,8 +47,12 @@ read_waterdata_monitoring_location( depth_source_code = NA_character_, properties = NA_character_, bbox = NA, - limit = NA, - skipGeometry = NA + skipGeometry = NA, + ..., + limit = getOption("dataRetrieval.limit"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -143,18 +147,39 @@ The default (\code{NA}) will return all columns of the data.} box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} -\item{limit}{The optional limit parameter is used to control the subset of the +\item{skipGeometry}{This parameter can be used to skip response geometries for +each feature. The returning object will be a data frame with no spatial +information. The default \code{NA} will not specify the argument in the request.} + +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet +limit is 50,000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} -\item{skipGeometry}{This option can be used to skip response geometries for -each feature. The returning object will be a data frame with no spatial -information.} +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Location information is basic information about the monitoring location including the name, identifier, agency responsible for data collection, and the date the location was established. It also includes information about the type of location, such as stream, lake, or groundwater, and geographic information about the location, such as state, county, latitude and longitude, and hydrologic unit code (HUC). @@ -189,6 +214,8 @@ site_info_no_sf <- read_waterdata_monitoring_location( bbox_vals = c(-94.00, 35.0, -93.5, 35.5) multi_site <- read_waterdata_monitoring_location(bbox = bbox_vals) + + } \dontshow{\}) # examplesIf} } diff --git a/man/read_waterdata_parameter_codes.Rd b/man/read_waterdata_parameter_codes.Rd index 95f9d0901..69cca65f2 100644 --- a/man/read_waterdata_parameter_codes.Rd +++ b/man/read_waterdata_parameter_codes.Rd @@ -17,7 +17,9 @@ read_waterdata_parameter_codes( temperature_basis = NA_character_, epa_equivalence = NA_character_, properties = NA_character_, - limit = NA + ..., + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -48,11 +50,18 @@ Available options are: geometry, parameter_code_id, parameter_name, unit_of_measure, parameter_group_code, parameter_description, medium, statistical_basis, time_basis, weight_basis, particle_size_basis, sample_fraction, temperature_basis, epa_equivalence. The default (\code{NA}) will return all columns of the data.} +\item{\dots}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + \item{limit}{The optional limit parameter is used to control the subset of the selected features that should be returned in each page. The maximum allowable limit is 50000. It may be beneficial to set this number lower if your internet connection is spotty. The default (\code{NA}) will set the limit to the maximum allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Parameter codes are 5-digit codes and associated descriptions used to identify the constituent measured and the units of measure. Some parameter code definitions include information about the sampling matrix, fraction, and methods used to measure the constituent. Some parameters are fixed-value (fxd) numeric codes having textual meaning (for example: parameter code 00041 is a weather code parameter, code of 60 means rain), but more commonly represent a numeric value for chemical, physical, or biological data. diff --git a/man/read_waterdata_peaks.Rd b/man/read_waterdata_peaks.Rd new file mode 100644 index 000000000..b809a24c6 --- /dev/null +++ b/man/read_waterdata_peaks.Rd @@ -0,0 +1,168 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata_peaks.R +\name{read_waterdata_peaks} +\alias{read_waterdata_peaks} +\title{Get USGS Peak Data} +\usage{ +read_waterdata_peaks( + monitoring_location_id = NA_character_, + parameter_code = NA_character_, + properties = NA_character_, + time_series_id = NA_character_, + unit_of_measure = NA_character_, + value = NA, + last_modified = NA_character_, + water_year = NA_character_, + year = NA_character_, + month = NA_character_, + day = NA_character_, + time_of_day = NA_character_, + peak_since = NA_character_, + skipGeometry = NA, + time = NA_character_, + bbox = NA, + ..., + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + limit = getOption("dataRetrieval.limit"), + attach_request = getOption("dataRetrieval.attach_request") +) +} +\arguments{ +\item{monitoring_location_id}{A unique identifier representing a single monitoring location. This corresponds to the \code{id} field in the \code{monitoring-locations} endpoint. Monitoring location IDs are created by combining the agency code of the agency responsible for the monitoring location (e.g. USGS) with the ID number of the monitoring location (e.g. 02238500), separated by a hyphen (e.g. USGS-02238500). + +Multiple monitoring_location_ids can be requested as a character vector.} + +\item{parameter_code}{Parameter codes are 5-digit codes used to identify the constituent measured and the units of measure. A complete list of parameter codes and associated groupings can be found at \url{https://api.waterdata.usgs.gov/ogcapi/v0/collections/parameter-codes/items}. + +Multiple parameter_codes can be requested as a character vector.} + +\item{properties}{A vector of requested columns to be returned from the query. +Available options are: +geometry, time_series_id, monitoring_location_id, parameter_code, peak_id, unit_of_measure, value, last_modified, time, water_year, year, month, day, time_of_day, peak_since. +The default (\code{NA}) will return all columns of the data.} + +\item{time_series_id}{A unique identifier representing a single time series. This corresponds to the \code{id} field in the \code{time-series-metadata} endpoint.} + +\item{unit_of_measure}{A human-readable description of the units of measurement associated with an observation.} + +\item{value}{The value of the observation. Values are transmitted as strings in the JSON response format in order to preserve precision.} + +\item{last_modified}{The last time a record was refreshed in our database. This may happen due to regular operational processes and does not necessarily indicate anything about the measurement has changed. +You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). +Examples: +\itemize{ +\item A date-time: "2018-02-12T23:20:50Z" +\item A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" +\item Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" +\item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours +} + +Only features that have a \code{last_modified} that intersects the value of datetime are selected. + +See also Details below for more information.} + +\item{water_year}{The water year (running from October 1st to September 30th) a peak occurred.} + +\item{year}{The calendar year a peak occurred.} + +\item{month}{The calendar month a peak occurred. If null, the month a peak occurred is unknown.} + +\item{day}{The day of the month a peak occurred. If null, the day a peak occurred is unknown.} + +\item{time_of_day}{The time of day a peak occurred. If null, the time of day a peak occurred is unknown.} + +\item{peak_since}{If not null, this record represents the peak value for the parameter code since the year contained in "peak_since".} + +\item{skipGeometry}{This parameter can be used to skip response geometries for +each feature. The returning object will be a data frame with no spatial +information. The default \code{NA} will not specify the argument in the request.} + +\item{time}{The date an observation represents. You can query this field using date-times or intervals, adhering to RFC 3339, or using ISO 8601 duration objects. Intervals may be bounded or half-bounded (double-dots at start or end). +Examples: +\itemize{ +\item A date-time: "2018-02-12T23:20:50Z" +\item A bounded interval: "2018-02-12T00:00:00Z/2018-03-18T12:31:12Z" +\item Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z" +\item Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours +} + +Only features that have a \code{time} that intersects the value of datetime are selected. If a feature has multiple temporal properties, it is the decision of the server whether only a single temporal property is used to determine the extent or all relevant temporal properties. + +See also Details below for more information.} + +\item{bbox}{Only features that have a geometry that intersects the bounding +box are selected.The bounding box is provided as four or six numbers, depending +on whether the coordinate reference system includes a vertical axis (height or +depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, +Southern-most latitude, Eastern-most longitude, Northern-most longitude).} + +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will +be requested from a native csv format. This can be dangerous because the +data will cut off at 50,000 rows without indication that more data +is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} +} +\description{ +Annual peak flow values are the maximum instantaneous streamflow values recorded at a particular site for the entire water year from October 1 to September 30. Note that the annual peak flow value may not occur at the same time the maximum water level occurs due to conditions such as backwater, tidal fluctuations, etc. +} +\details{ +You can also use a vector of length 2 for any time queries (such as time +or last_modified). The first value is the starting date (or datetime), +the second value is the ending date(or datetime). +NA's within the vector indicate a half-bound date. +For example, \code{time = c("2024-01-01", NA)} will return all data starting +at 2024-01-01. +\code{time = c(NA, "2024-01-01")} will return all data from the beginning of +the timeseries until 2024-01-01. +By default, time is assumed UTC, although time zone attributes +will be accommodated. As an example, setting \code{time = as.POSIXct(c("2021-01-01 12:00:00", +"2021-01-01 14:00"), tz = "America/New_York")} will request data that between +noon and 2pm eastern time on 2021-01-01. +All time values RETURNED from the service are UTC with the exception of +daily data, which returns time values in local dates. +} +\examples{ +\dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} + +\donttest{ +wi_peaks <- read_waterdata_combined_meta( + state_name = "Wisconsin", + data_type = "Peaks", + parameter_code = "00060") + + +dv_data_sf <- read_waterdata_peaks( + monitoring_location_id = wi_peaks$monitoring_location_id[1], + parameter_code = "00060") + +} +\dontshow{\}) # examplesIf} +} diff --git a/man/read_waterdata_ratings.Rd b/man/read_waterdata_ratings.Rd new file mode 100644 index 000000000..4b516e345 --- /dev/null +++ b/man/read_waterdata_ratings.Rd @@ -0,0 +1,118 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_waterdata_ratings.R +\name{read_waterdata_ratings} +\alias{read_waterdata_ratings} +\title{Get USGS Rating Curve Data} +\usage{ +read_waterdata_ratings( + monitoring_location_id = NA_character_, + file_type = c("exsa", "base", "corr"), + file_path = tempdir(), + bbox = NA, + datetime = NA_character_, + ..., + limit = 10000, + download_and_parse = TRUE +) +} +\arguments{ +\item{monitoring_location_id}{A unique identifier representing a single +monitoring location. Monitoring location IDs are created by combining the +agency code of the agency responsible for the monitoring location (e.g. USGS) +with the ID number of the monitoring location (e.g. 02238500), separated by +a hyphen (e.g. USGS-02238500).} + +\item{file_type}{Rating file time. Could be any of "exsa", "corr", or "base". +If \code{file_type} is "base" then the columns are +INDEP, typically the gage height, in feet; DEP, typically the streamflow, +in cubic feet per second; and STOR, where "*" indicates that the pair are +a fixed point of the rating curve. If \code{file_type} is "exsa" then an +additional column, SHIFT, is included that indicates the current shift in +the rating for that value of INDEP. If \code{file_type} is "corr" then the +columns are INDEP, typically the gage height, in feet; CORR, the correction +for that value; and CORRINDEP, the corrected value for CORR.} + +\item{file_path}{Path to save the rating curve rdb files. The +default is \code{tempdir()}, which will wipe out the files.} + +\item{bbox}{Only features that have a geometry that intersects the bounding +box are selected.The bounding box is provided as four or six numbers, depending +on whether the coordinate reference system includes a vertical axis (height or +depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, +Southern-most latitude, Eastern-most longitude, Northern-most longitude).} + +\item{datetime}{Only return items that have a temporal property that +intersects this value. Either a date-time or an interval, open or closed. +See Details below.} + +\item{\dots}{Not used.} + +\item{limit}{Limits the number of results that are included in each page of +the response (capped at the default 10,000).} + +\item{download_and_parse}{Logical to define whether or not to download, parse, +and return a list of data frames with rating curve data (\code{TRUE}), or to return +just a list of available rating curve files (\code{FALSE}). Default is \code{TRUE}.} +} +\value{ +List of data frames which contain the requested rating curves. +} +\description{ +Reads current rating table for an active USGS streamgages. More information +can be found at https://api.waterdata.usgs.gov/docs/stac/. +} +\details{ +You can also use a vector of length 2 for any time queries (such as time +or last_modified). The first value is the starting date (or datetime), +the second value is the ending date(or datetime). +NA's within the vector indicate a half-bound date. +For example, \code{time = c("2024-01-01", NA)} will return all data starting +at 2024-01-01. +\code{time = c(NA, "2024-01-01")} will return all data from the beginning of +the timeseries until 2024-01-01. +By default, time is assumed UTC, although time zone attributes +will be accommodated. As an example, setting \code{time = as.POSIXct(c("2021-01-01 12:00:00", +"2021-01-01 14:00"), tz = "America/New_York")} will request data that between +noon and 2pm eastern time on 2021-01-01. +All time values RETURNED from the service are UTC with the exception of +daily data, which returns time values in local dates. +} +\examples{ +\dontshow{if (is_dataRetrieval_user()) withAutoprint(\{ # examplesIf} + +\donttest{ + +monitoring_location_id <- c("USGS-01104475", "USGS-01104460") +ratings_exsa <- read_waterdata_ratings( + monitoring_location_id = monitoring_location_id, + file_type = "exsa") + +head(ratings_exsa[["USGS-01104475.exsa.rdb"]]) +comment(ratings_exsa[["USGS-01104475.exsa.rdb"]])[1:15] + +ratings_corr <- read_waterdata_ratings( + monitoring_location_id = monitoring_location_id, + file_type = "corr") + +head(ratings_corr[["USGS-01104460.corr.rdb"]]) +comment(ratings_corr[["USGS-01104460.corr.rdb"]])[1:15] + +rating_2 <- read_waterdata_ratings( + monitoring_location_id = monitoring_location_id, + file_type = c("corr", "exsa")) +names(rating_2) + +bbox <- c(-95.00, 40.0, -92.0, 42) + +bbox_query <- read_waterdata_ratings(bbox = bbox, + download_and_parse = FALSE) +length(bbox_query) +recent_query <- read_waterdata_ratings(bbox = bbox, + datetime = c(Sys.Date()-7, NA), + download_and_parse = FALSE) +length(recent_query) +} +\dontshow{\}) # examplesIf} +} diff --git a/man/read_waterdata_samples.Rd b/man/read_waterdata_samples.Rd index 9a6f502b6..224ca4fc8 100644 --- a/man/read_waterdata_samples.Rd +++ b/man/read_waterdata_samples.Rd @@ -2,7 +2,6 @@ % Please edit documentation in R/read_waterdata_samples.R \name{read_waterdata_samples} \alias{read_waterdata_samples} -\alias{read_USGS_samples} \title{USGS Samples Data} \usage{ read_waterdata_samples( @@ -31,33 +30,6 @@ read_waterdata_samples( tz = "UTC", convertType = TRUE ) - -read_USGS_samples( - monitoringLocationIdentifier = NA, - siteTypeCode = NA, - boundingBox = NA, - hydrologicUnit = NA, - activityMediaName = NA, - characteristicGroup = NA, - characteristic = NA, - characteristicUserSupplied = NA, - activityStartDateLower = NA, - activityStartDateUpper = NA, - countryFips = NA, - stateFips = NA, - countyFips = NA, - projectIdentifier = NA, - recordIdentifierUserSupplied = NA, - siteTypeName = NA, - usgsPCode = NA, - pointLocationLatitude = NA, - pointLocationLongitude = NA, - pointLocationWithinMiles = NA, - dataType = "results", - dataProfile = NA, - tz = "UTC", - convertType = TRUE -) } \arguments{ \item{monitoringLocationIdentifier}{A monitoring location identifier has two parts: the agency code @@ -130,7 +102,7 @@ would be needed from prior project information.} information would be needed from the data supplier.} \item{siteTypeName}{Site type name query parameter. See available -options by running \code{check_param("sitetype")$typeName}.} +options by running \code{check_waterdata_sample_params("sitetype")$typeName}.} \item{usgsPCode}{USGS parameter code. See available options by running \code{check_waterdata_sample_params("characteristics")$parameterCode}.} diff --git a/man/read_waterdata_stats.Rd b/man/read_waterdata_stats.Rd index ead6991bc..b016b69c8 100644 --- a/man/read_waterdata_stats.Rd +++ b/man/read_waterdata_stats.Rd @@ -13,6 +13,7 @@ read_waterdata_stats_por( county_code = NA_character_, start_date = NA_character_, end_date = NA_character_, + normal_type = NA_character_, monitoring_location_id = NA_character_, parent_time_series_id = NA_character_, site_type_code = NA_character_, @@ -29,6 +30,7 @@ read_waterdata_stats_daterange( county_code = NA_character_, start_date = NA_character_, end_date = NA_character_, + interval_type = NA_character_, monitoring_location_id = NA_character_, parent_time_series_id = NA_character_, site_type_code = NA_character_, @@ -65,6 +67,10 @@ supplied then statistics will be supplied for the entire period of record.} \item{end_date}{End Date Query Parameter. The logic is inclusive i.e., it will also return records that match the date.} +\item{normal_type}{Normal Type Query Parameter. If unspecified, all matching data +will be returned. Otherwise, it will filter the results to one of the following +normals: day-of-year, month-of-year. Available values: "DOY", "MOY".} + \item{monitoring_location_id}{Each monitoring location has been assigned a unique station number that places them in downstream order. Accepts multiple values in a character vector.} @@ -91,6 +97,11 @@ returned. All statistics within the period of record will be returned if no parameter code or monitoring location identifier are specified.} \item{page_size}{Return a defined number of results (default: 1000).} + +\item{interval_type}{Interval Type Query Parameter. If unspecified, all matching +data will be returned. Otherwise, it will filter the results to one or more of +the following intervals: month, calendar year, water year. +Available values: "M", "CY", "WY".} } \description{ This service provides endpoints for access to computations on the historical @@ -117,6 +128,12 @@ x1 <- read_waterdata_stats_por( monitoring_location_id = c("USGS-02319394", "USGS-02171500") ) +# Request only month-of-year statistics using normal_type arg +x1 <- read_waterdata_stats_por( + monitoring_location_id = c("USGS-02319394", "USGS-02171500"), + normal_type = "MOY" +) + # Request temperature percentiles for specific month-day range # Returns: # - Day-of-year temperature percentiles for each day between June 1 through June 15. @@ -136,6 +153,12 @@ x3 <- read_waterdata_stats_daterange( monitoring_location_id = c("USGS-02319394", "USGS-02171500") ) +# Request only calendar year statistics +x3 <- read_waterdata_stats_daterange( + monitoring_location_id = c("USGS-02319394", "USGS-02171500"), + interval_type = "CY" +) + # Request specific gage height and discharge summaries for a limited date range # Returns: # - calendar month summaries for each month between January, 2010 through December, 2011 diff --git a/man/read_waterdata_ts_meta.Rd b/man/read_waterdata_ts_meta.Rd index 7a878d240..3188d4b8b 100644 --- a/man/read_waterdata_ts_meta.Rd +++ b/man/read_waterdata_ts_meta.Rd @@ -25,13 +25,15 @@ read_waterdata_ts_meta( time_series_id = NA_character_, web_description = NA_character_, skipGeometry = NA, - limit = NA, - max_results = NA, bbox = NA, begin = NA_character_, end = NA_character_, - convertType = TRUE, - no_paging = FALSE + ..., + limit = getOption("dataRetrieval.limit"), + convertType = getOption("dataRetrieval.convertType"), + no_paging = getOption("dataRetrieval.no_paging"), + chunk_size = getOption("dataRetrieval.site_chunk_size_meta"), + attach_request = getOption("dataRetrieval.attach_request") ) } \arguments{ @@ -127,37 +129,51 @@ this system for 120 days.} \item{web_description}{An optional description of the time series. WDFN and other USGS data dissemination products use this field, in combination with sublocation_identifier, to distinguish the differences between multiple time series for the same parameter code, statistic code, and monitoring location.} -\item{skipGeometry}{This option can be used to skip response geometries for +\item{skipGeometry}{This parameter can be used to skip response geometries for each feature. The returning object will be a data frame with no spatial -information.} - -\item{limit}{The optional limit parameter is used to control the subset of the -selected features that should be returned in each page. The maximum allowable -limit is 50000. It may be beneficial to set this number lower if your internet -connection is spotty. The default (\code{NA}) will set the limit to the maximum -allowable limit for the service.} - -\item{max_results}{The optional maximum number of rows to return. This value -must be less than the requested limit.} +information. The default \code{NA} will not specify the argument in the request.} \item{bbox}{Only features that have a geometry that intersects the bounding box are selected.The bounding box is provided as four or six numbers, depending on whether the coordinate reference system includes a vertical axis (height or depth). Coordinates are assumed to be in crs 4326. The expected format is a numeric -vector structured: c(xmin,ymin,xmax,ymax). Another way to think of it is c(Western-most longitude, +vector structured: c(xmin,ymin,xmax,ymax). +Another way to think of it is c(Western-most longitude, Southern-most latitude, Eastern-most longitude, Northern-most longitude).} \item{begin}{This field contains the same information as "begin_utc", but in the local time of the monitoring location. It is retained for backwards compatibility, but will be removed in V1 of these APIs.} \item{end}{This field contains the same information as "end_utc", but in the local time of the monitoring location. It is retained for backwards compatibility, but will be removed in V1 of these APIs.} -\item{convertType}{logical, defaults to \code{TRUE}. If \code{TRUE}, the function -will convert the data to dates and qualifier to string vector.} +\item{...}{Not used. Included to help differentiate official Water Data API arguments +from more seldom used, optional dataRetrieval-specific arguments.} + +\item{limit}{numeric, The optional limit parameter is used to control the subset of the +selected features that should be returned in each page. The maximum allowable +limit is 50,000. It may be beneficial to set this number lower if your internet +connection is spotty. The default (\code{NA}) will set the limit to the maximum +allowable limit for the service.} -\item{no_paging}{logical, defaults to \code{FALSE}. If \code{TRUE}, the data will +\item{convertType}{logical, defaults to TRUE. +If \code{TRUE}, the function will convert the data to dates, any qualifiers to string +vector and reorder the returned data frame.} + +\item{no_paging}{logical, defaults to FALSE. +If \code{TRUE}, the data will be requested from a native csv format. This can be dangerous because the data will cut off at 50,000 rows without indication that more data is available. Use \code{TRUE} with caution.} + +\item{chunk_size}{Number of monitoring_location_ids to chunk requests into. +The default for functions that don't generally return long-term data records +is 250, while +the default for time series functions is +10. +Setting to \code{NA} will eliminate site chunking, giving users full control.} + +\item{attach_request}{logical, defaults to TRUE. +If set to \code{TRUE}, the full request sent to the Water Data API is attached +as an attribute to the data set.} } \description{ Daily data and continuous measurements are grouped into time series, which represent a collection of observations of a single parameter, potentially aggregated using a standard statistic, at a single monitoring location. This endpoint provides metadata about those time series, including their operational thresholds, units of measurement, and when the earliest and most recent observations in a time series occurred. @@ -186,7 +202,7 @@ site <- "USGS-02238500" meta_1 <- read_waterdata_ts_meta(monitoring_location_id = site) meta_multi <- read_waterdata_ts_meta(monitoring_location_id = c("USGS-01491000", - "USGS-01645000"), + "USGS-01645000"), parameter_code = c("00060", "00010"), properties = c("monitoring_location_id", "parameter_code", diff --git a/man/summarize_waterdata_samples.Rd b/man/summarize_waterdata_samples.Rd index 3b5b7f17c..6afda5abd 100644 --- a/man/summarize_waterdata_samples.Rd +++ b/man/summarize_waterdata_samples.Rd @@ -10,10 +10,13 @@ summarize_waterdata_samples(monitoringLocationIdentifier) summarize_USGS_samples(monitoringLocationIdentifier) } \arguments{ -\item{monitoringLocationIdentifier}{A monitoring location identifier has two parts, -separated by a dash (-): the agency code and the location number. Location identifiers should be separated with commas, -for example: AZ014-320821110580701, CAX01-15304600, USGS-040851385. Location -numbers without an agency prefix are assumed to have the prefix USGS.} +\item{monitoringLocationIdentifier}{A single monitoring location identifier +with two parts, separated by a dash (-): the agency code and the location +number. Examples: USGS-040851385, AZ014-320821110580701, CAX01-15304600. +The summary service accepts only one site at a time; supplying a vector of +length > 1 raises an error. The agency prefix is required: bare location +numbers (e.g. "040851385") are accepted by the service but return an empty +result.} } \value{ data frame with summary of data available based on the monitoringLocationIdentifier diff --git a/tests/testthat/test_waterdata_stats.R b/tests/testthat/test_waterdata_stats.R index 792abf3bf..9da5ab901 100644 --- a/tests/testthat/test_waterdata_stats.R +++ b/tests/testthat/test_waterdata_stats.R @@ -284,6 +284,24 @@ test_that("read_waterdata_stats_por returns data", { expect_true(nrow(out) > 0) }) +test_that("normal_type arg works in read_waterdata_stats_por", { + skip_on_cran() + skip_if_offline() + + out <- read_waterdata_stats_por( + monitoring_location_id = "USGS-01646500", + parameter_code = "00060", + computation_type = "median", + page_size = 5, + normal_type = "MOY" + ) + + expect_s3_class(out, "sf") + expect_true(nrow(out) > 0) + # time_of_year should have 12 months of data + expect_true(length(out$time_of_year) == 12) +}) + test_that("read_waterdata_stats_daterange returns data", { skip_on_cran() skip_if_offline() @@ -297,4 +315,15 @@ test_that("read_waterdata_stats_daterange returns data", { expect_s3_class(out, "sf") expect_true(nrow(out) > 0) + + # setting interval_type arg returns fewer rows than unset + out1 <- read_waterdata_stats_daterange( + monitoring_location_id = "USGS-01646500", + parameter_code = "00060", + computation_type = "maximum", + interval_type = "CY", + page_size = 5 + ) + + expect_true(nrow(out1) < nrow(out)) }) diff --git a/tests/testthat/tests_general.R b/tests/testthat/tests_general.R index 43ff223bc..25d272e57 100644 --- a/tests/testthat/tests_general.R +++ b/tests/testthat/tests_general.R @@ -97,41 +97,6 @@ test_that("General NWIS retrievals working", { expect_is(timeseriesInfo$begin, "POSIXct") # nolint start: line_length_linter - url <- httr2::request( - "https://waterservices.usgs.gov/nwis/dv/?site=09037500&format=rdb&ParameterCd=00060&StatCd=00003&startDT=1985-10-02&endDT=2012-09-06" - ) - dv <- importRDB1(url, asDateTime = FALSE) - # nolint end - dailyStat <- readNWISdata( - site = c("03112500", "03111520", "02319394"), - service = "stat", - statReportType = "daily", - statType = c("p25", "p50", "p75", "min", "max"), - parameterCd = "00065", - convertType = FALSE - ) - expect_true(length(dailyStat$min_va) > 1) - expect_is(dailyStat$p25_va, "character") - - waterYearStat <- readNWISdata( - site = c("01646500"), - service = "stat", - statReportType = "annual", - statYearType = "water", - missingData = "on" - ) - expect_is(waterYearStat$mean_va, "numeric") - expect_is(waterYearStat$parameter_cd, "character") - - # Empty data - # note....not empty anymore! - # nolint start: line_length_linter - urlTest <- httr2::request( - "https://nwis.waterservices.usgs.gov/nwis/iv/?site=11447650&format=waterml,1.1&ParameterCd=63680&startDT=2016-12-13&endDT=2016-12-13" - ) - x <- importWaterML1(urlTest) - expect_true(all(c("agency_cd", "site_no", "dateTime", "tz_cd") %in% names(x))) - # nolint end # Test list: args <- list( @@ -190,37 +155,20 @@ test_that("General NWIS retrievals working", { AS <- read_waterdata_monitoring_location(state_name = "American Samoa") expect_gt(nrow(AS), 0) - site_id <- "01594440" - rating_curve <- readNWISdata( - service = "rating", - site_no = site_id, + site_id <- "USGS-01594440" + rating_curve <- read_waterdata_ratings( + monitoring_location_id = site_id, file_type = "base" ) - rating_curve2 <- readNWISrating( - siteNumber = site_id, - type = "base" - ) - expect_equal( - attr(rating_curve, "url"), - "https://waterdata.usgs.gov/nwisweb/get_ratings/?site_no=01594440&file_type=base" - ) - expect_equal(rating_curve$INDEP, rating_curve2$INDEP) - state_rating_list <- readNWISdata( - service = "rating", - file_type = "base", - period = 24 + expect_equal(names(rating_curve), "USGS-01594440.base.rdb") + + state_rating_list <- read_waterdata_ratings( + datetime = c(Sys.Date() - 1, NA), + download_and_parse = FALSE ) - expect_true(all( - names(state_rating_list) %in% - c( - "agency_cd", - "site_no", - "type", - "update_time", - "url" - ) - )) + + expect_true(length(state_rating_list) > 0) multi_hucs <- c("07130007", "07130011") multi_huc_sites <- read_waterdata_monitoring_location( diff --git a/tests/testthat/tests_imports.R b/tests/testthat/tests_imports.R index 509eede51..a8916640d 100644 --- a/tests/testthat/tests_imports.R +++ b/tests/testthat/tests_imports.R @@ -1,55 +1,3 @@ -context("importRDB_noCRAN") - -test_that("External importRDB1 tests", { - testthat::skip_on_cran() - - siteNumber <- "02177000" - startDate <- "2012-09-01" - endDate <- "2012-10-01" - offering <- "00003" - property <- "00060" - - obs_url <- constructNWISURL( - siteNumber, - property, - startDate, - endDate, - "dv", - format = "tsv" - ) - data <- importRDB1(obs_url) - expect_is(data$datetime, "Date") - - urlMultiPcodes <- constructNWISURL( - "04085427", - c("00060", "00010"), - startDate, - endDate, - "dv", - statCd = c("00003", "00001"), - "tsv" - ) - multiData <- importRDB1(urlMultiPcodes) - pCodeCols <- grep("X", colnames(multiData)) - expect_true(length(pCodeCols) / 2 > 2) - - unitDataURL <- constructNWISURL( - siteNumber, - property, - "2013-11-03", - "2013-11-03", - "uv", - format = "tsv" - ) # includes timezone switch - unitData <- importRDB1(unitDataURL, asDateTime = TRUE) - - # Need to think of a way to automatically check timezone conversion: - # expect_that(as.numeric(unitData[which(unitData$tz_cd == "EST")[1],"datetime"]), - # equals(as.numeric(as.POSIXct("2013-11-03 01:00:00", tz="UTC")+60*60*5))) - - site <- "05427850" -}) - context("importRDB") test_that("CRAN-friendly importRDB test", { filePath <- system.file("extdata", package = "dataRetrieval") @@ -71,146 +19,6 @@ test_that("CRAN-friendly importWaterML1 test", { expect_is(importUserWML1$dateTime, "POSIXct") }) -test_that("External importWaterML1 test", { - testthat::skip_on_cran() - - siteNumber <- "02177000" - startDate <- "2012-09-01" - endDate <- "2012-10-01" - offering <- "00003" - property <- "00060" - obs_url <- constructNWISURL(siteNumber, property, startDate, endDate, "dv") - - data <- importWaterML1(obs_url, TRUE) - expect_is(data$dateTime, "POSIXct") - - unitDataURL <- constructNWISURL( - siteNumber, - property, - "2020-10-30", - "2020-11-01", - "uv" - ) - unitData <- importWaterML1(unitDataURL, TRUE) - expect_is(unitData$dateTime, "POSIXct") - - # Two sites, two pcodes, one site has two data descriptors - siteNumber <- c("01480015", "04085427") # one site seems to have lost it"s 2nd dd - obs_url <- constructNWISURL( - siteNumber, - c("00060", "00010"), - startDate, - endDate, - "dv" - ) - data <- importWaterML1(obs_url) - - expect_true(length(unique(data$site_no)) == 2) - expect_true(ncol(data) == 8) # 3 data, 3 remark codes, and 4 (agency, site, dateTime, tz) - - inactiveSite <- "05212700" - inactiveSite <- constructNWISURL( - inactiveSite, - "00060", - "2014-01-01", - "2014-01-10", - "dv" - ) - inactiveSite <- importWaterML1(inactiveSite) - expect_true(nrow(inactiveSite) == 0) - - inactiveAndActive <- c("07334200", "05212700") - inactiveAndActive <- constructNWISURL( - inactiveAndActive, - "00060", - "2014-01-01", - "2014-12-31", - "dv" - ) - inactiveAndActive <- importWaterML1(inactiveAndActive) - # - # The inactive site became active, need a new test. - - # raw XML - url <- constructNWISURL( - service = "dv", - siteNumber = "02319300", - parameterCd = "00060", - startDate = "2014-01-01", - endDate = "2014-01-01" - ) - raw <- httr2::req_perform(url) - raw <- httr2::resp_body_xml(raw) - rawParsed <- importWaterML1(raw) - expect_true(nrow(rawParsed) > 0) - expect_true(data.class(rawParsed$X_00060_00003) == "numeric") - - # no data - url <- constructNWISURL( - "05212700", - "00060", - "2014-01-01", - "2014-01-10", - "dv", - statCd = "00001" - ) - noData <- importWaterML1(url) - expect_true(class(attr(noData, "url")) == "character") - expect_true(all(dim(noData) == c(0, 4))) - - url <- constructNWISURL( - service = "iv", - site = c("02319300", "02171500"), - startDate = "2015-04-04", - endDate = "2015-04-05" - ) - data <- importWaterML1(url, tz = "America/New_York", asDateTime = TRUE) - expect_true(data.class(data$dateTime) == "POSIXct") - expect_true(nrow(data) > 0) - - # expect_error(readNWISdata( - # sites = "05114000", - # service = "iv", - # parameterCd = "00060", - # startDate = "2014-05-01T00:00", - # endDate = "2014-05-01T12:00", - # tz = "blah" - # )) - # - # arg.list <- list( - # sites = "05114000", - # parameterCd = "00060", - # startDate = "2014-05-01T00:00", - # endDate = "2014-05-01T12:00" - # ) - # - # chi_iv <- readNWISdata(arg.list, - # service = "iv", - # tz = "America/Chicago" - # ) - # - # expect_true(all(chi_iv$tz_cd == "America/Chicago")) - # expect_equal(chi_iv$dateTime[1], as.POSIXct("2014-05-01T00:00", - # format = "%Y-%m-%dT%H:%M", - # tz = "America/Chicago" - # )) - # expect_equal(chi_iv$dateTime[nrow(chi_iv)], as.POSIXct("2014-05-01T12:00", - # format = "%Y-%m-%dT%H:%M", - # tz = "America/Chicago" - # )) - - # Time over daylight saving switch: - tzURL <- constructNWISURL( - "04027000", - c("00300", "63680"), - "2011-11-05", - "2011-11-07", - "uv" - ) - tzIssue <- importWaterML1(tzURL, asDateTime = TRUE, tz = "America/Chicago") - expect_false(any(duplicated(tzIssue$dateTime))) -}) - context("importWaterML2") test_that("importWaterML2 internal test", { @@ -228,27 +36,7 @@ context("importWQP_noCRAN") test_that("External WQP tests", { testthat::skip_on_cran() - rawSampleURL <- constructWQPURL( - "USGS-01594440", - "01075", - "", - "", - legacy = FALSE - ) - # rawSample <- importWQP(rawSampleURL) - # expect_is(rawSample$Activity_StartDateTime, "POSIXct") - url2 <- constructWQPURL("USGS-01594440", "01075", "", "", legacy = TRUE) rawSample2 <- suppressWarnings(importWQP(url2)) expect_is(rawSample2$ActivityStartDateTime, "POSIXct") - - STORETex <- constructWQPURL( - "WIDNR_WQX-10032762", - "Specific conductance", - "", - "", - legacy = FALSE - ) - # STORETdata <- importWQP(STORETex) - # expect_is(STORETdata$Activity_StartDateTime, "POSIXct") }) diff --git a/tests/testthat/tests_userFriendly_fxns.R b/tests/testthat/tests_userFriendly_fxns.R index 68e24645f..393c13b4b 100644 --- a/tests/testthat/tests_userFriendly_fxns.R +++ b/tests/testthat/tests_userFriendly_fxns.R @@ -44,10 +44,38 @@ test_that("Unit value data returns correct types", { ) # nolint start: line_length_linter - expect_equal( - attr(rawData, "request")[["url"]], - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&monitoring_location_id=USGS-05114000¶meter_code=00060&time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z&limit=50000" + expect_true( + grepl( + x = attr(rawData, "request")[["url"]], + pattern = "monitoring_location_id=USGS-05114000" + ) + ) + + expect_true( + grepl( + x = attr(rawData, "request")[["url"]], + pattern = "time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z" + ) ) + + expect_true( + grepl( + x = attr(rawData, "request")[["url"]], + pattern = "parameter_code=00060" + ) + ) + + expect_true( + grepl( + x = attr(rawData, "request")[["url"]], + pattern = paste0( + "https://api.waterdata.usgs.gov/ogcapi/", + getOption("dataRetrieval.api_version"), + "/collections/continuous/items" + ) + ) + ) + # nolint end timeZoneChange <- read_waterdata_continuous( monitoring_location_id = c("04024430", "04024000"), @@ -58,9 +86,11 @@ test_that("Unit value data returns correct types", { expect_is(rawData$time, "POSIXct") expect_is(rawData$value, "numeric") # nolint start: line_length_linter - expect_equal( - attr(rawData, "request")[["url"]], - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&monitoring_location_id=USGS-05114000¶meter_code=00060&time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z&limit=50000" + expect_true( + grepl( + x = attr(rawData, "request")[["url"]], + pattern = "time=2014-10-10T00%3A00%3A00Z%2F2014-10-10T00%3A00%3A00Z" + ) ) # nolint end site <- "USGS-04087170" @@ -83,14 +113,17 @@ context("Peak, rating, meas, site") test_that("peak, rating curves, surface-water measurements", { testthat::skip_on_cran() testthat::skip_on_ci() - siteNumbers <- c("01594440", "040851325") - data <- readNWISpeak(siteNumbers) - expect_is(data$agency_cd, "character") + siteNumbers <- c("USGS-01594440", "USGS-040851325") + data <- read_waterdata_peaks(monitoring_location_id = siteNumbers) + expect_true(ncol(data) > 10) - # Rating curvs: - siteNumber <- "01594440" - data <- readNWISrating(siteNumber, "base") - expect_that(length(attr(data, "RATING")), equals(7)) + # Rating curves: + siteNumber <- "USGS-01594440" + data <- read_waterdata_ratings( + monitoring_location_id = siteNumber, + file_type = "base" + ) + expect_gt(length(comment(data[[1]])), 1) # Surface meas: siteNumbers <- c("USGS-01594440", "USGS-040851325") @@ -120,6 +153,8 @@ test_that("peak, rating curves, surface-water measurements", { )), 0 ) + # This does come back empty because 50268 isn't at this site + expect_equal( ncol(read_waterdata_ts_meta( monitoring_location_id = "USGS-10312000", @@ -129,11 +164,6 @@ test_that("peak, rating curves, surface-water measurements", { 4 ) - url <- httr2::request( - "https://waterservices.usgs.gov/nwis/site/?format=rdb&seriesCatalogOutput=true&sites=05114000" - ) - x <- importRDB1(url) - siteID <- "USGS-263819081585801" gwl_1 <- read_waterdata_field_measurements(monitoring_location_id = siteID) expect_equal(unique(gwl_1$monitoring_location_id), siteID) @@ -406,6 +436,7 @@ test_that("Construct USGS urls", { url_daily <- construct_api_requests( service = "daily", + output_id = "daily_id", monitoring_location_id = siteNumber, parameter_code = pCode, time = c(startDate, endDate), @@ -414,9 +445,8 @@ test_that("Construct USGS urls", { ) # nolint start: line_length_linter - expect_equal( - url_daily$url, - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/daily/items?f=json&lang=en-US&skipGeometry=FALSE&monitoring_location_id=USGS-01594440¶meter_code=00060,00010&time=2024-01-01%2F..&statistic_id=00003,00001&limit=10000" + expect_true( + grepl(x = url_daily$url, pattern = "parameter_code=00060,00010") ) url_works <- dataRetrieval:::walk_pages(url_daily) @@ -424,14 +454,17 @@ test_that("Construct USGS urls", { url_ts_meta <- construct_api_requests( monitoring_location_id = siteNumber, + output_id = "time_series_id", parameter_code = pCode, service = "time-series-metadata", limit = 10000 ) - expect_equal( - url_ts_meta$url, - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/time-series-metadata/items?f=json&lang=en-US&skipGeometry=FALSE&monitoring_location_id=USGS-01594440¶meter_code=00060,00010&limit=10000" + expect_true( + grepl( + x = url_ts_meta$url, + pattern = "collections/time-series-metadata/items" + ) ) url_works_ts <- dataRetrieval:::walk_pages(url_ts_meta) @@ -439,13 +472,13 @@ test_that("Construct USGS urls", { url_ml <- construct_api_requests( id = siteNumber, + output_id = "monitoring_location_id", service = "monitoring-locations", limit = 50000 ) - expect_equal( - url_ml$url, - "https://api.waterdata.usgs.gov/ogcapi/v0/collections/monitoring-locations/items?f=json&lang=en-US&skipGeometry=FALSE&id=USGS-01594440&limit=50000" + expect_true( + grepl(x = url_ml$url, pattern = "id=USGS-01594440") ) url_works_ml <- dataRetrieval:::walk_pages(url_ml) @@ -585,6 +618,18 @@ test_that("bad_properties", { time = c("2021-01-01", "2022-01-01"), properties = c("value", "time", "blah") )) + + # No paging + dv_data_quick <- read_waterdata_daily( + monitoring_location_id = "USGS-02238500", + parameter_code = "00060", + no_paging = TRUE + ) + + expect_type(dv_data_quick$parameter_code, "character") + expect_is(dv_data_quick$time, "Date") + expect_equal(dv_data_quick$parameter_code[1], "00060") + # Empty result: expect_message(read_waterdata_daily( monitoring_location_id = "USGS-02238500", diff --git a/tutorials/basic_slides_deck.qmd b/tutorials/basic_slides_deck.qmd index 180967958..990c450a4 100644 --- a/tutorials/basic_slides_deck.qmd +++ b/tutorials/basic_slides_deck.qmd @@ -15,12 +15,15 @@ title-slide-attributes: data-background-size: 15% data-background-position: 2% 2% editor: source +engine: knitr editor_options: chunk_output_type: console execute: echo: true warning: false message: false +params: + run_python: true --- ```{r} @@ -29,30 +32,43 @@ execute: # library(dataRetrieval) library(ggplot2) library(dplyr) +library(reticulate) +py_require("dataretrieval") +py_require("panda") +py_require("matplotlib") options(dplyr.summarise.inform = FALSE) -dt_me <- function(x, - page_length = 8, - paging = TRUE, - font = "0.7em", - escape = TRUE){ - DT::datatable(x, - rownames = FALSE, - options = list(pageLength = page_length, - info = FALSE, - searching = FALSE, - paging = paging, - lengthChange = FALSE, - initComplete = htmlwidgets::JS( - "function(settings, json) {", - paste0("$(this.api().table().container()).css({'font-size': '", - font, "'});"), - "}")), escape = escape) +evaluate_python <- params$run_python + +dt_me <- function( + x, + page_length = 8, + paging = TRUE, + font = "0.7em", + escape = TRUE +) { + DT::datatable( + x, + rownames = FALSE, + options = list( + pageLength = page_length, + info = FALSE, + searching = FALSE, + paging = paging, + lengthChange = FALSE, + initComplete = htmlwidgets::JS( + "function(settings, json) {", + paste0( + "$(this.api().table().container()).css({'font-size': '", + font, + "'});" + ), + "}" + ) + ), + escape = escape + ) } - -theme_set(theme_grey(base_size = 24)) -update_geom_defaults("point", list(size = 3)) - ``` @@ -151,13 +167,16 @@ Go to Tools -> Global Options -> Appearances to change style. ## Installation +::: {.panel-tabset} + +### R + `dataRetrieval` is available on the Comprehensive R Archive Network (CRAN) repository. To install `dataRetrieval` on your computer, open RStudio and run this line of code in the Console: ```{r} #| echo: true #| eval: false install.packages("dataRetrieval") - ``` Then each time you open R, you'll need to load the library: @@ -167,6 +186,33 @@ Then each time you open R, you'll need to load the library: library(dataRetrieval) ``` +### Python + +Whether you are a user or developer we recommend installing `dataretrieval` in a virtual environment. This can be done using something like virtualenv or conda. + +```{bash} +#| echo: true +#| eval: false +pip install dataretrieval +``` + +or + +```{bash} +#| echo: true +#| eval: false +conda install conda-forge::dataretrieval +``` + +Then each time you open Python, you'll need to load the library: + +```{python} +#| eval: !expr evaluate_python +from dataretrieval import waterdata +``` + +::: + ::: footer ::: @@ -194,19 +240,23 @@ library(dataRetrieval) ## Documentation within R: function help pages {.smaller} +::: {.panel-tabset} + +### R + Within R, you can call help files for any `dataRetrieval` function: ```{r} #| echo: true #| eval: false -?readWQPdata +?read_waterdata_daily ``` :::: {.columns} ::: {.column width="50%"} -Click here to open a new window: +Click here to open a new window in RStudio: ![](images/help_file_2.png) @@ -221,20 +271,30 @@ Examples ```{r} #| eval: false -# Legacy: -nameToUse <- "pH" -pHData <- readWQPdata(siteid = "USGS-04024315", - characteristicName = nameToUse) -ncol(pHData) -attr(pHData, "siteInfo") -attr(pHData, "queryTime") -attr(pHData, "url") +site <- "USGS-02238500" +dv_data_sf <- read_waterdata_daily( + monitoring_location_id = site, + parameter_code = "00060", + time = c("2021-01-01", "2022-01-01") +) ``` ::: :::: +### Python + +Within Python, you can call help for any `dataretrieval` function: + +```{python} +#| eval: !expr evaluate_python +help(waterdata.get_daily) +``` + +::: + + ::: footer ::: @@ -447,9 +507,13 @@ We're going walk through 3 retrievals: ::: -## Workflow 1: Daily data for known site +## Workflow 1: Daily data for known site {.smaller} + +Let's pull daily mean discharge data for site "USGS-0940550", getting all the data from the last year. -Let's pull daily mean discharge data for site "USGS-0940550", getting all the data from October 10, 2024 onward. +::: {.panel-tabset} + +### R ```{r} #| message: true @@ -457,15 +521,38 @@ library(dataRetrieval) site <- "USGS-09405500" pcode <- "00060" # Discharge stat_cd <- "00003" # Mean -range <- c("2024-10-01", NA) -df <- read_waterdata_daily(monitoring_location_id = site, - parameter_code = pcode, - statistic_id = stat_cd, - time = range) +df <- read_waterdata_daily( + monitoring_location_id = site, + parameter_code = pcode, + statistic_id = stat_cd, + time = "P365D" +) +nrow(df) +``` + +### Python + +```{python} +#| eval: !expr evaluate_python +from dataretrieval import waterdata + +site = "USGS-09405500" +pcode = "00060" # Discharge +stat_cd = "00003" # Mean + +df, md = waterdata.get_daily( + monitoring_location_id=site, + parameter_code=pcode, + statistic_id=stat_cd, + time="P365D", +) +df.shape[0] ``` +::: + ::: footer ::: @@ -476,12 +563,11 @@ In RStudio, click on the data frame in the upper right Environment tab to open a ```{r} #| echo: false - -dt_me(df |> - sf::st_drop_geometry(), - page_length = 3) - - +dt_me( + df |> + sf::st_drop_geometry(), + page_length = 3 +) ``` ::: footer @@ -490,27 +576,56 @@ dt_me(df |> ## Workflow 1: Plot Daily Data +::: {.panel-tabset} + +### R + Let's use `ggplot2` to visualize the data. ```{r} #| echo: true #| output-location: column library(ggplot2) +theme_set(theme_bw(base_size = 24)) +update_geom_defaults("point", list(size = 3, color = "steelblue")) +options(ggplot2.discrete.colour = "viridis") ggplot(data = df) + - geom_point(aes(x = time, - y = value, - color = approval_status)) + geom_point(aes(x = time, y = value, color = approval_status)) +``` + +### Python +Let's use `matplotlib` to visualize the data. + +```{python} +#| echo: true +#| output-location: column +import matplotlib.pyplot as plt +import pandas as pd + +plt.rcParams["font.size"] = 20 + +levels, categories = pd.factorize(df["approval_status"]) + +fig, ax = plt.subplots() +scatter = ax.scatter(x=df.time, y=df.value, c=levels) +fig.legend(scatter.legend_elements()[0], categories, title="Status") ``` -## Water Data API Notes: Argument input +::: + +::: footer + +::: + +## Water Data API Notes: Argument input Use your "tab" key! ![](images/autocomplete.png) -## Water Data API Notes: Arguments +## Water Data API Notes: Arguments {.smaller} * When you look at the help file for the new functions, you’ll notice there are lots of possible inputs (arguments). @@ -521,9 +636,10 @@ Use your "tab" key! ```{r} #| eval: false #| echo: true -discharge <- read_waterdata_daily(parameter_code = "00060", - statistic_id = "00003") - +discharge <- read_waterdata_daily( + parameter_code = "00060", + statistic_id = "00003" +) ``` ::: {.fragment} @@ -564,14 +680,16 @@ The "time" argument has a few options: Here are a bunch of valid inputs: ```{r} -#| code-line-numbers: "1-7|8-9|10-13|14-17" +#| code-line-numbers: "1-9|10-11|12-15|16-19" # Ask for exact times: time = "2025-01-01" time = as.Date("2025-01-01") time = "2025-01-01T23:20:50Z" -time = as.POSIXct("2025-01-01T23:20:50Z", - format = "%Y-%m-%dT%H:%M:%S", - tz = "UTC") +time = as.POSIXct( + "2025-01-01T23:20:50Z", + format = "%Y-%m-%dT%H:%M:%S", + tz = "UTC" +) # Ask for specific range time = c("2024-01-01", "2025-01-01") # or Dates or POSIXs # Asking beginning of record to specific end: @@ -594,22 +712,47 @@ Use your "tab" key! ![](images/autocomplete_samples.png) -## Workflow 2: Discrete data for known site +## Workflow 2: Discrete data for known site {.smaller} Let's get orthophosphate ("00660") data from the Shenandoah River at Front Royal, VA ("USGS-01631000"). +::: {.panel-tabset} + +### R + ```{r} #| message: true site <- "USGS-01631000" pcode <- "00660" -qw_data <- read_waterdata_samples(monitoringLocationIdentifier = site, - usgsPCode = pcode, - dataType = "results", - dataProfile = "basicphyschem") +qw_data <- read_waterdata_samples( + monitoringLocationIdentifier = site, + usgsPCode = pcode, + dataType = "results", + dataProfile = "basicphyschem" +) ncol(qw_data) ``` +### Python + +```{python} +#| eval: !expr evaluate_python +site = "USGS-01631000" +pcode = "00660" + +qw_data, md_qw = waterdata.get_samples( + monitoringLocationIdentifier = site, + usgsPCode = pcode, + service = "results", + profile = "basicphyschem", +) + +qw_data.shape[1] +``` + +::: + That's a LOT of columns returned. We won't look at them here, but you can use `View` in RStudio to explore on your own. ::: footer @@ -626,48 +769,94 @@ That's a LOT of columns returned. We won't look at them here, but you can use `V ```{r} #| echo: false - -df <- tibble(dataType = c("results", "locations", "activities", "projects", "organizations"), - Description = c("Results data and metadata for measures and observations matching your query", - "Find monitoring locations that have data matching your query", - "Information about the monitoring activities conducted that produced data", - "Information on the projects that have results matching your data query", - "Information about the organizations that have provided data that matches your query"), - dataProfile = c('fullphyschem
basicphyschem
fullbio
basicbio
narrow
resultdetectionquantitationlimit
labsampleprep
count', - 'site
count', - 'sampact
actmetric
actgroup
ncount', - 'project
projectmonitoringlocationweight', - 'organization
count')) +df <- tibble( + dataType = c( + "results", + "locations", + "activities", + "projects", + "organizations" + ), + Description = c( + "Results data and metadata for measures and observations matching your query", + "Find monitoring locations that have data matching your query", + "Information about the monitoring activities conducted that produced data", + "Information on the projects that have results matching your data query", + "Information about the organizations that have provided data that matches your query" + ), + dataProfile = c( + 'fullphyschem
basicphyschem
fullbio
basicbio
narrow
resultdetectionquantitationlimit
labsampleprep
count', + 'site
count', + 'sampact
actmetric
actgroup
ncount', + 'project
projectmonitoringlocationweight', + 'organization
count' + ) +) dt_me(df, escape = FALSE, paging = FALSE) - ``` ::: footer ::: -## Workflow 2: Discrete data censoring +## Workflow 2: Discrete data censoring {.smaller} Let's pull a few columns out and look at those: +::: {.panel-tabset} + +### R + ```{r} library(dplyr) -qw_data_slim <- qw_data |> - select(Date = Activity_StartDate, - Result_Measure, - DL_cond = Result_ResultDetectionCondition, - DL_val = DetectionLimit_MeasureA, - DL_type = DetectionLimit_TypeA) |> - mutate(Result = if_else(!is.na(DL_cond), DL_val, Result_Measure), - Detected = if_else(!is.na(DL_cond), "Not Detected", "Detected")) |> +qw_data_slim <- qw_data |> + select( + Date = Activity_StartDate, + Result_Measure, + DL_cond = Result_ResultDetectionCondition, + DL_val = DetectionLimit_MeasureA, + DL_type = DetectionLimit_TypeA + ) |> + mutate( + Result = if_else(!is.na(DL_cond), DL_val, Result_Measure), + Detected = if_else(!is.na(DL_cond), "Not Detected", "Detected") + ) |> arrange(Detected) - ``` * What is `|>`? It's a pipe! It says take 'this thing' and put it in 'that thing'. You'll also see `%>%` in code, it is also a pipe - they are basically the same. +### Python + +```{python} +#| eval: !expr evaluate_python +import numpy as np + +qw_data_slim = ( + qw_data.rename( + columns={ + "Activity_StartDate": "Date", + "Result_ResultDetectionCondition": "DL_cond", + "DetectionLimit_MeasureA": "DL_val", + "DetectionLimit_TypeA": "DL_type", + } + )[["Date", "Result_Measure", "DL_cond", "DL_val", "DL_type"]] + .assign( + Result=lambda x: np.where( + x["DL_cond"].notna(), x["DL_val"], x["Result_Measure"] + ) + ) + .assign( + Detected=lambda x: np.where(x["DL_cond"].notna(), "Not Detected", "Detected") + ) + .sort_values(by="Detected", ascending=False) +) +``` + +::: + ::: footer ::: @@ -676,8 +865,7 @@ qw_data_slim <- qw_data |> ```{r} #| echo: false - -dt_me(qw_data_slim, page_length = 8, font = "0.7em") +dt_me(qw_data_slim, page_length = 8, font = "0.7em") ``` ::: footer @@ -696,7 +884,11 @@ dt_me(qw_data_slim, page_length = 8, font = "0.7em") ::: -## Step 1: Get the data +## Step 1: Get the data {.smaller} + +::: {.panel-tabset} + +### R ```{r} site <- "USGS-04183500" @@ -706,33 +898,91 @@ p_code_qw <- "00665" start_date <- "2015-07-03" end_date <- "2025-07-03" -qw_data <- read_waterdata_samples(monitoringLocationIdentifier = site, - usgsPCode = p_code_qw, - activityStartDateLower = start_date, - activityStartDateUpper = end_date, - dataProfile = "basicphyschem") +qw_data <- read_waterdata_samples( + monitoringLocationIdentifier = site, + usgsPCode = p_code_qw, + activityStartDateLower = start_date, + activityStartDateUpper = end_date, + dataProfile = "basicphyschem" +) + +dv_data <- read_waterdata_daily( + monitoring_location_id = site, + parameter_code = p_code_dv, + statistic_id = stat_cd, + time = c(start_date, end_date) +) +``` + +### Python + +```{python} +#| eval: !expr evaluate_python +site = "USGS-04183500" +p_code_dv = "00060" +stat_cd = "00003" +p_code_qw = "00665" +start_date = "2015-07-03" +end_date = "2025-07-03" + +qw_data, md_qw = waterdata.get_samples( + monitoringLocationIdentifier=site, + usgsPCode=p_code_qw, + activityStartDateLower=start_date, + activityStartDateUpper=end_date, + profile="basicphyschem", +) -dv_data <- read_waterdata_daily(monitoring_location_id = site, - parameter_code = p_code_dv, - statistic_id = stat_cd, - time = c(start_date, end_date)) +dv_data, md_dv = waterdata.get_daily( + monitoring_location_id=site, + parameter_code=p_code_dv, + statistic_id=stat_cd, + time=(start_date + "/" + end_date), +) ``` -## Step 2: Join -```{r} -library(dplyr) +::: +::: footer + +::: + +## Step 2: Join + +::: {.panel-tabset} + +### R + +```{r} little_dv <- dv_data |> select(time, Flow = value, monitoring_location_id) -qw_data_joined <- qw_data |> - left_join(little_dv, - by = c("Activity_StartDate" = "time")) +qw_data_joined <- qw_data |> + left_join(little_dv, by = c("Activity_StartDate" = "time")) ``` + + +### Python + +```{python} +#| eval: !expr evaluate_python +little_dv = dv_data.rename(columns={"value": "Flow"})[ + ["time", "Flow", "monitoring_location_id"] +] + +qw_data["Activity_StartDate"] = pd.to_datetime( + qw_data["Activity_StartDate"], format="%Y-%m-%d" +) + +qw_data_joined = pd.merge( + qw_data, little_dv, left_on="Activity_StartDate", right_on="time", how="left" +) +``` + +::: * "Activity_StartDate" (on the left side data frame) and "time" (on the right side data frame) need to be the same type (in this case, both are Date objects). - ::: footer @@ -742,17 +992,39 @@ qw_data_joined <- qw_data |> * You could join on multiple columns: +::: {.panel-tabset} + +### R + ```{r} #| eval: false -qw_data <- qw_data |> - left_join(little_dv, - by = c("Activity_StartDate" = "time", - "Location_Identifier" = "monitoring_location_id")) - +qw_data <- qw_data |> + left_join( + little_dv, + by = c( + "Activity_StartDate" = "time", + "Location_Identifier" = "monitoring_location_id" + ) + ) ``` See `dplyr` documentation for lots of joining options, but I find `left_join` my "go-to" for straightforward joins. +### Python + +```{python} +#| eval: !expr evaluate_python +qw_data = pd.merge( + qw_data, + little_dv, + left_on=["Activity_StartDate", "Location_Identifier"], + right_on=["time", "monitoring_location_id"], + how="left", +) +``` + +::: + ::: footer ::: @@ -761,15 +1033,28 @@ See `dplyr` documentation for lots of joining options, but I find `left_join` my Let's take a quick peak: +::: {.panel-tabset} + +### R + ```{r} #| output-location: column ggplot(data = qw_data_joined) + - geom_point(aes(x = Flow, - y = Result_Measure)) + geom_point(aes(x = Flow, y = Result_Measure)) +``` +### Python + +```{python} +#| eval: !expr evaluate_python +#| output-location: column +plt.figure() +plt.scatter(x=qw_data_joined.Flow, y=qw_data_joined.Result_Measure) ``` +::: + ## Exercise 2: Joins {.smaller} ::: {.panel-tabset} @@ -794,12 +1079,11 @@ band_instruments <- band_instruments ```{r} -band_members |> +band_members |> left_join(band_instruments, by = "name") -band_instruments |> +band_instruments |> left_join(band_members, by = "name") - ``` @@ -816,67 +1100,156 @@ band_instruments |> * We'll look at Suisun Bay a Van Sickle Island NR Pittsburg CA ("USGS-11455508"), with parameter code "99133" which is Nitrate plus Nitrite. -## Workflow 4: Continuous data for known site +## Workflow 4: Continuous data for known site {.smaller} + +::: {.panel-tabset} + +### R :::: {.columns} -::: {.column width="70%"} +::: {.column width="65%"} ```{r} -#| results: markup +#| eval: false site_id <- "USGS-11455508" p_code_rt <- "99133" start_date <- "2024-01-01" end_date <- "2024-06-01" -continuous_data <- read_waterdata_continuous(monitoring_location_id = site_id, - parameter_code = p_code_rt, - time = c(start_date, end_date)) - +continuous_data <- read_waterdata_continuous( + monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = c(start_date, end_date) +) names(continuous_data) ``` ::: -::: {.column width="30%"} +::: {.column width="35%"} +```{r} +#| results: markup +#| echo: false +options(width = 30) +site_id <- "USGS-11455508" +p_code_rt <- "99133" +start_date <- "2024-01-01" +end_date <- "2024-06-01" + +continuous_data <- read_waterdata_continuous( + monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = c(start_date, end_date) +) +names(continuous_data) ``` - [4] "time" "unit_of_measure" "parameter_code" - [7] "statistic_id" "value" "approval_status" -[10] "last_modified" "qualifier" -``` + ::: :::: +### Python + +:::: {.columns} + +::: {.column width="65%"} + +```{python} +#| eval: false +site_id = "USGS-11455508" +p_code_rt = "99133" +date_range = "2024-01-01/2024-06-01" + +continuous_data, md_cont = waterdata.get_continuous( + monitoring_location_id=site_id, parameter_code=p_code_rt, time=date_range +) ``` -Requesting: -https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&limit=50000&monitoring_location_id=USGS-11455508¶meter_code=99133&time=2024-01-01T00%3A00%3A00Z%2F2024-06-01T00%3A00%3A00Z + +::: + +::: {.column width="35%"} + +```{python} +#| eval: !expr evaluate_python +#| results: markup +#| echo: false +pd.set_option("display.width", 30) +site_id = "USGS-11455508" +p_code_rt = "99133" +date_range = "2024-01-01/2024-06-01" + +continuous_data, md_cont = waterdata.get_continuous( + monitoring_location_id=site_id, parameter_code=p_code_rt, time=date_range +) +continuous_data.columns ``` +::: + +:::: + +::: + + ## Workflow 4: Inspect +::: {.panel-tabset} + +### R + ```{r} #| output-location: column ggplot(data = continuous_data) + - geom_point(aes(x = time, - y = value)) + geom_point(aes(x = time, y = value)) +``` + +### Python + +```{python} +#| eval: !expr evaluate_python +#| output-location: column +plt.figure() +plt.scatter(x=continuous_data.time, y=continuous_data.value) ``` +::: + ## Workflow 5: Join Discrete and Continuous That same site also measures discrete Nitrate plus Nitrite, which is parameter code "00631". Let's first grab that data: +::: {.panel-tabset} + +### R + ```{r} #| message: true -discrete_data <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-11455508", - usgsPCode = "00631", - activityStartDateLower = start_date, - activityStartDateUpper = end_date, - dataProfile = "basicphyschem") +discrete_data <- read_waterdata_samples( + monitoringLocationIdentifier = "USGS-11455508", + usgsPCode = "00631", + activityStartDateLower = start_date, + activityStartDateUpper = end_date, + dataProfile = "basicphyschem" +) +``` +### Python + +```{python} +#| eval: !expr evaluate_python +discrete_data, md_qw = waterdata.get_samples( + monitoringLocationIdentifier = "USGS-11455508", + usgsPCode = "00631", + activityStartDateLower = "2024-01-01", + activityStartDateUpper = "2024-06-01", + profile = "basicphyschem" +) ``` +::: + ## Workflow 5: Join Discrete and Continuous * We now want to join the **closest** continuous sensor time with the discrete sample time. @@ -889,17 +1262,26 @@ discrete_data <- read_waterdata_samples(monitoringLocationIdentifier = "USGS-114 ## Workflow 5: Join Discrete and Continuous +::: {.panel-tabset} + +### R ```{r} -#| code-line-numbers: "1|2-3|5|6|1-6" +#| code-line-numbers: "1|2-3|5-10" library(data.table) setDT(discrete_data)[, join_date := Activity_StartDateTime] setDT(continuous_data)[, join_date := time] - -closest_dt <- continuous_data[discrete_data, on = .(join_date), roll = "nearest"] + +closest_dt <- continuous_data[ + discrete_data, + on = .(join_date), + roll = "nearest" +] closest_dt <- data.frame(closest_dt) ``` +::: + ::: footer ::: @@ -909,13 +1291,11 @@ closest_dt <- data.frame(closest_dt) ```{r} #| output-location: column ggplot(data = closest_dt) + - geom_point(aes(x = Result_Measure, - y = value)) + + geom_point(aes(x = Result_Measure, y = value)) + geom_abline() + expand_limits(x = 0, y = 0) + xlab("Discrete") + ylab("Continuous") - ``` @@ -931,19 +1311,39 @@ The next slides will demo how to use those. ## Data Discovery: Time Series {.smaller} +::: {.panel-tabset} + +### R ```{r} ts_available <- read_waterdata_ts_meta(monitoring_location_id = "USGS-04183500") ``` -```{r} -#| echo: false +### Python + +```{python} +#| eval: false +ts_avail, ts_me = waterdata.get_time_series_metadata( + monitoring_location_id="USGS-04183500" +) +``` -dt_me(ts_available |> - sf::st_drop_geometry() |> - select(parameter_name, - parameter_code, statistic_id, begin, end, - computation_identifier), page_length = 6) +::: +```{r} +#| echo: false +dt_me( + ts_available |> + sf::st_drop_geometry() |> + select( + parameter_name, + parameter_code, + statistic_id, + begin, + end, + computation_identifier + ), + page_length = 6 +) ``` ::: footer @@ -952,20 +1352,32 @@ dt_me(ts_available |> ## Data Discovery: Discrete {.smaller} -```{r} -discrete_available <- summarize_waterdata_samples(monitoringLocationIdentifier = "USGS-04183500") +::: {.panel-tabset} -``` +### R ```{r} -#| echo: false +discrete_available <- summarize_waterdata_samples( + monitoringLocationIdentifier = "USGS-04183500" +) +``` + -dt_me(discrete_available |> - select(characteristicUserSupplied, - resultCount, activityCount, - firstActivity, mostRecentActivity), - page_length = 6) +::: +```{r} +#| echo: false +dt_me( + discrete_available |> + select( + characteristicUserSupplied, + resultCount, + activityCount, + firstActivity, + mostRecentActivity + ), + page_length = 6 +) ``` ::: footer @@ -976,21 +1388,41 @@ dt_me(discrete_available |> * characteristicUserSupplied can be an input to `read_waterdata_sample` +::: {.panel-tabset} + +### R + ```{r} -discrete1 <- read_waterdata_samples(characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", - monitoringLocationIdentifier = "USGS-04183500") +discrete1 <- read_waterdata_samples( + characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", + monitoringLocationIdentifier = "USGS-04183500" +) nrow(discrete1) ``` +### Python +```{python} +#| eval: !expr evaluate_python +discrete1, discrete1_me = waterdata.get_samples( + characteristicUserSupplied = "Phosphorus as phosphorus, water, unfiltered", + monitoringLocationIdentifier = "USGS-04183500" +) +discrete1.shape[1] +``` + +::: ## More Information {.smaller} -- dataRetrieval repository: +- dataRetrieval R repository: - - [Documentation](https://doi-usgs.github.io/dataRetrieval) - [dataRetrieval New Features](https://doi-usgs.github.io/dataRetrieval/articles/read_waterdata_functions.html) - - [General Tutorial](https://rconnect.usgs.gov/NMC_dataRetrieval_1/dataRetrieval_1.html) + +- dataretrieval Python repository: + - + - [Documentation](https://doi-usgs.github.io/dataretrieval-python/) - Contact: - Computational Tools Email: comptools@usgs.gov diff --git a/tutorials/images/help_file_2.png b/tutorials/images/help_file_2.png index 232e10289..52af3b94c 100644 Binary files a/tutorials/images/help_file_2.png and b/tutorials/images/help_file_2.png differ diff --git a/tutorials/quick_intro_deck.qmd b/tutorials/quick_intro_deck.qmd index 125507745..771b61091 100644 --- a/tutorials/quick_intro_deck.qmd +++ b/tutorials/quick_intro_deck.qmd @@ -33,6 +33,7 @@ params: library(ggplot2) library(dplyr) library(reticulate) + py_require("dataretrieval") py_require("panda") py_require("matplotlib") @@ -70,9 +71,6 @@ dt_me <- function( escape = escape ) } - -theme_set(theme_grey(base_size = 24)) -update_geom_defaults("point", list(size = 3)) ``` @@ -129,12 +127,12 @@ In this ~45 minute introduction, the goal is: ## Installation -`dataRetrieval` is available on the Comprehensive R Archive Network (CRAN) repository. To install `dataRetrieval` on your computer, open RStudio and run this line of code in the Console: - ::: {.panel-tabset} ### R +`dataRetrieval` is available on the Comprehensive R Archive Network (CRAN) repository. To install `dataRetrieval` on your computer, open RStudio and run this line of code in the Console: + ```{r} #| echo: true #| eval: false @@ -147,14 +145,19 @@ Then each time you open R, you'll need to load the library: #| message: true library(dataRetrieval) ``` +### Python -### Python ```{bash} #| echo: true #| eval: false pip install dataretrieval +``` +```{bash} +#| echo: true +#| eval: false +conda install conda-forge::dataretrieval ``` Then each time you open Python, you'll need to load the library: @@ -205,22 +208,11 @@ Within R, you can call help files for any `dataRetrieval` function: ?read_waterdata_daily ``` -### Python - -Within Python, you can call help for any `dataRetrieval` function: - -```{python} -#| eval: !expr evaluate_python -help(waterdata.get_daily) -``` - -::: - :::: {.columns} ::: {.column width="50%"} -Click here to open a new window: +Click here to open a new window in RStudio: ![](images/help_file_2.png) @@ -233,10 +225,6 @@ Scroll down to the "Examples" to see how each function can be run. Examples -::: {.panel-tabset} - -### R - ```{r} #| eval: false site <- "USGS-02238500" @@ -247,23 +235,21 @@ dv_data_sf <- read_waterdata_daily( ) ``` +::: + +:::: + ### Python -```{python} -#| eval: false -df, md = waterdata.get_daily( +Within Python, you can call help for any `dataretrieval` function: - monitoring_location_id="USGS-02238500", - parameter_code="00060", - time="2021-01-01T00:00:00Z/2022-01-01T00:00:00Z", -) +```{python} +#| eval: !expr evaluate_python +help(waterdata.get_daily) ``` ::: -::: - -:::: ::: footer @@ -525,6 +511,11 @@ Let's use `ggplot2` to visualize the data. #| output-location: column library(ggplot2) +theme_set(theme_bw(base_size = 24)) +update_geom_defaults("point", list(size = 3, color = "steelblue")) +options(ggplot2.discrete.colour = "viridis") +options(ggplot2.discrete.fill = "viridis") + ggplot(data = df) + geom_point(aes(x = time, y = value, color = approval_status)) ``` @@ -539,9 +530,13 @@ Let's use `matplotlib` to visualize the data. import matplotlib.pyplot as plt import pandas as pd -df["approval_status"] = pd.Categorical(df["approval_status"]).codes +plt.rcParams["font.size"] = 20 -plt.scatter(x=df.time, y=df.value, c=df.approval_status) +levels, categories = pd.factorize(df["approval_status"]) + +fig, ax = plt.subplots() +scatter = ax.scatter(x=df.time, y=df.value, c=levels) +fig.legend(scatter.legend_elements()[0], categories, title="Status") ``` @@ -608,7 +603,7 @@ The "time" argument has a few options: Here are a bunch of valid inputs: ```{r} -#| code-line-numbers: "1-7|8-9|10-13|14-17" +#| code-line-numbers: "1-9|10-11|12-15|16-19" # Ask for exact times: time = "2025-01-01" time = as.Date("2025-01-01") @@ -738,16 +733,37 @@ dt_me(df, escape = FALSE, paging = FALSE) ## Workflow 3: Continuous data for known site {.smaller} +::: {.panel-tabset} + +### R + :::: {.columns} ::: {.column width="65%"} -::: {.panel-tabset} +```{r} +#| eval: false +site_id <- "USGS-11455508" +p_code_rt <- "99133" +start_date <- "2024-01-01" +end_date <- "2024-06-01" -### R +continuous_data <- read_waterdata_continuous( + monitoring_location_id = site_id, + parameter_code = p_code_rt, + time = c(start_date, end_date) +) +names(continuous_data) +``` + +::: + +::: {.column width="35%"} ```{r} #| results: markup +#| echo: false +options(width = 30) site_id <- "USGS-11455508" p_code_rt <- "99133" start_date <- "2024-01-01" @@ -758,61 +774,79 @@ continuous_data <- read_waterdata_continuous( parameter_code = p_code_rt, time = c(start_date, end_date) ) -nrow(continuous_data) +names(continuous_data) ``` +::: + +:::: + ### Python +:::: {.columns} + +::: {.column width="65%"} + ```{python} -#| eval: !expr evaluate_python +#| eval: false site_id = "USGS-11455508" p_code_rt = "99133" date_range = "2024-01-01/2024-06-01" continuous_data, md_cont = waterdata.get_continuous( - monitoring_location_id = site_id, - parameter_code = p_code_rt, - time = date_range + monitoring_location_id=site_id, parameter_code=p_code_rt, time=date_range ) -continuous_data.shape[0] - ``` ::: -::: - ::: {.column width="35%"} +```{python} +#| eval: !expr evaluate_python +#| results: markup +#| echo: false +pd.set_option("display.width", 30) +site_id = "USGS-11455508" +p_code_rt = "99133" +date_range = "2024-01-01/2024-06-01" + +continuous_data, md_cont = waterdata.get_continuous( + monitoring_location_id=site_id, parameter_code=p_code_rt, time=date_range +) +continuous_data.columns ``` - [1] "monitoring_location_id" - [2] "parameter_code" - [3] "statistic_id" - [4] "time" - [5] "value" - [6] "unit_of_measure" - [7] "approval_status" - [8] "last_modified" - [9] "qualifier" -[10] "time_series_id" -``` + ::: :::: -``` -Requesting: -https://api.waterdata.usgs.gov/ogcapi/v0/collections/continuous/items?f=json&lang=en-US&skipGeometry=TRUE&limit=50000&monitoring_location_id=USGS-11455508¶meter_code=99133&time=2024-01-01T00%3A00%3A00Z%2F2024-06-01T00%3A00%3A00Z -``` +::: + ## Workflow 3: Inspect +::: {.panel-tabset} + +### R + ```{r} #| output-location: column ggplot(data = continuous_data) + geom_point(aes(x = time, y = value)) ``` +### Python + +```{python} +#| eval: !expr evaluate_python +#| output-location: column +plt.figure() +plt.scatter(x=continuous_data.time, y=continuous_data.value) +``` + +::: + ## Data Discovery @@ -827,6 +861,10 @@ ggplot(data = continuous_data) + The next slides will demo how to use those. +::: footer + +::: + ## Data Discovery: Time Series {.smaller} ```{r} @@ -892,11 +930,14 @@ nrow(discrete1) ## More Information {.smaller} -- dataRetrieval repository: +- dataRetrieval R repository: - - [Documentation](https://doi-usgs.github.io/dataRetrieval) - [dataRetrieval New Features](https://doi-usgs.github.io/dataRetrieval/articles/read_waterdata_functions.html) - - [General Tutorial](https://rconnect.usgs.gov/NMC_dataRetrieval_1/dataRetrieval_1.html) + +- dataretrieval Python repository: + - + - [Documentation](https://doi-usgs.github.io/dataretrieval-python/) - Contact: - Computational Tools Email: comptools@usgs.gov diff --git a/vignettes/Contributing.Rmd b/vignettes/Contributing.Rmd index 97e4be399..1c528253f 100644 --- a/vignettes/Contributing.Rmd +++ b/vignettes/Contributing.Rmd @@ -15,9 +15,7 @@ editor_options: ```{r setup, include=FALSE} library(knitr) -opts_chunk$set(echo = TRUE, - warning = FALSE, - message = FALSE) +opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) ``` @@ -388,6 +386,45 @@ git pull codeusgs main In general, create a pull request to push to the DOI-USGS/dataRetrieval main branch or a merge request to push to the main branch of code.usgs.gov/water/dataRetrieval. +# Setting up Quarto Slides + +Slide decks are produced using Quarto in the "tutorials" folder. They require both R and Python to be installed. To install, download and install Miniforge. For Windows, open the "Miniforge Prompt", for MacOS and Linux open a terminal. Navigate to the dataRetrieval directory and type the following: + +``` +mamba env create -f environment.yml +``` + +Then activate the environment (still in Miniforge) using `conda`: +``` +conda activate pyclass +``` +The slides will use the R package `reticulate` to manage flipping back and forth between R and Python. To help `reticulate` know where Python is installed, you will need to add an envionmnental variable to your .Renviorn file "RETICULATE_PYTHON". Run `usethis::edit_r_environ()`, then add the path to your Python installation, and restart R. RStudio can render both the R and Python in the Quarto slides. However, if you want to do troubleshooting on individual code chunks, you might want to switch to Positron which allows seamless transition between R and Python consoles. + +Let's say you need to update to a new version of dataretrieval: + +1. Open the Miniforge prompt + +2. Navigate to dataRetrieval directory + +3. Activate pyclass: +``` +conda activate pyclass +``` + +4. Update package: +``` +conda install conda-forge::dataretrieval +``` + +5. Restart your python session. + + +Let's say you want to update all packages. In step 4 above, run: +``` +mamba update +``` + + # References diff --git a/vignettes/Reference_Lists.Rmd b/vignettes/Reference_Lists.Rmd new file mode 100644 index 000000000..4614bb105 --- /dev/null +++ b/vignettes/Reference_Lists.Rmd @@ -0,0 +1,243 @@ +--- +title: "USGS Reference Lists" +editor_options: + chunk_output_type: console +output: + rmarkdown::html_vignette: + toc: true + number_sections: false +vignette: > + %\VignetteIndexEntry{USGS Reference Lists} + \usepackage[utf8]{inputenc} + %\VignetteEngine{knitr::rmarkdown} +--- + + +```{r setup, include=FALSE, message=FALSE} +library(knitr) +library(dataRetrieval) + +options(continue = " ", + width = 50) + +knitr::opts_chunk$set( + echo = TRUE, + message = FALSE, + warning = FALSE, + fig.height = 4, + fig.width = 7 +) +``` + +## USGS Reference Lists + +### Agency Codes + +`r dataRetrieval:::get_description("agency-codes")` + +```{r} +agency_codes <- read_waterdata_metadata("agency-codes") +head(agency_codes) +``` + +### Altitude Datums + +`r dataRetrieval:::get_description("altitude-datums")` + +```{r} +altitude_datums <- read_waterdata_metadata("altitude-datums") +head(altitude_datums) +``` + + +### Aquifer Codes + +`r dataRetrieval:::get_description("aquifer-codes")` + +```{r} +aquifer_codes <- read_waterdata_metadata("aquifer-codes") +head(aquifer_codes) +``` + +### Aquifer Types + +`r dataRetrieval:::get_description("aquifer-types")` + +```{r} +aquifer_types <- read_waterdata_metadata("aquifer-types") +head(aquifer_types) +``` + +### Coordinate Accuracy Codes + +`r dataRetrieval:::get_description("coordinate-accuracy-codes")` + +```{r} +coordinate_accuracy_codes <- read_waterdata_metadata("coordinate-accuracy-codes") +head(coordinate_accuracy_codes) +``` + +### Coordinate Datum Codes + +`r dataRetrieval:::get_description("coordinate-accuracy-codes")` + +```{r} +coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") +head(coordinate_datum_codes) +``` + +### Coordinate Method Codes + +`r dataRetrieval:::get_description("coordinate-method-codes")` + +```{r} +coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") +head(coordinate_method_codes) +``` + +### Country Identifiers + +`r dataRetrieval:::get_description("countries")` + +```{r} +countries <- read_waterdata_metadata("countries") +head(countries) +``` + +### County Identifiers + +`r dataRetrieval:::get_description("counties")` + +```{r} +counties <- read_waterdata_metadata("counties") +head(counties) +``` + +### Hydrologic Unit Codes + +`r dataRetrieval:::get_description("hydrologic-unit-codes")` + +```{r} +huc_codes <- read_waterdata_metadata("hydrologic-unit-codes") +head(huc_codes) +``` + + +### Medium Codes + +`r dataRetrieval:::get_description("medium-codes")` + +```{r} +medium_codes <- read_waterdata_metadata("medium-codes") +head(medium_codes) +``` + +### Methods + +`r dataRetrieval:::get_description("methods")` + +```{r} +methods <- read_waterdata_metadata("methods") +head(methods) +``` + +### Method Categories + +`r dataRetrieval:::get_description("method-categories")` + +```{r} +method_categories <- read_waterdata_metadata("method-categories") +head(method_categories) +``` + +### Method Citations + +`r dataRetrieval:::get_description("method-citations")` + +```{r} +method_citations <- read_waterdata_metadata("method-citations") +head(method_citations) +``` + +### Citations + +`r dataRetrieval:::get_description("citations")` + +```{r} +citations <- read_waterdata_metadata("citations") +head(citations) +``` + + +### National Aquifer Codes + +`r dataRetrieval:::get_description("national-aquifer-codes")` + +```{r} +medium_codes <- read_waterdata_metadata("medium-codes") +head(medium_codes) +``` + +### Parameter Codes + +`r dataRetrieval:::get_description("parameter-codes")` + +```{r} +parameter_codes <- read_waterdata_metadata("parameter-codes") +head(parameter_codes) +``` + +### Reliability Codes + +`r dataRetrieval:::get_description("reliability-codes")` + +```{r} +reliability_codes <- read_waterdata_metadata("reliability-codes") +head(reliability_codes) +``` + +### Site Types + +`r dataRetrieval:::get_description("site-types")` + +```{r} +site_types <- read_waterdata_metadata("site-types") +head(site_types) +``` + +### State Identifiers + +`r dataRetrieval:::get_description("states")` + +```{r} +states <- read_waterdata_metadata("states") +head(states) +``` + +### Statistic Codes + +`r dataRetrieval:::get_description("statistic-codes")` + +```{r} +statistic_codes <- read_waterdata_metadata("statistic-codes") +head(statistic_codes) +``` + + +### Topographic Codes + +`r dataRetrieval:::get_description("topographic-codes")` + +```{r} +topographic_codes <- read_waterdata_metadata("topographic-codes") +head(topographic_codes) +``` + +### Time Zone Codes + +`r dataRetrieval:::get_description("time-zone-codes")` + +```{r} +time_zone_codes <- read_waterdata_metadata("time-zone-codes") +head(time_zone_codes) +``` diff --git a/vignettes/Status.Rmd b/vignettes/Status.Rmd index f0fcd3a21..f6aab2006 100644 --- a/vignettes/Status.Rmd +++ b/vignettes/Status.Rmd @@ -60,18 +60,20 @@ df <- data.frame( "`read_waterdata_field_measurements`, `read_waterdata_channel`", "`read_waterdata`", "`read_waterdata_continuous`", - "", #rating + "`read_waterdata_rating`", #rating "`read_waterdata_stats_por`, `read_waterdata_stats_daterange`", - rep("", 2), + "`read_waterdata_peaks`", + "", "`read_waterdata_ts_meta`, `read_waterdata_field_meta`, `read_waterdata_combined_meta`" ), "Available on (branch)" = c(rep("main (CRAN)", 6), "main (CRAN)", "main (CRAN)", "main (CRAN)", - "", + "develop", "main (CRAN)", - rep("", 2), + "develop", + "", "main (CRAN)") ) diff --git a/vignettes/daily_data_statistics.Rmd b/vignettes/daily_data_statistics.Rmd index 15f7819ab..7ee0fa9fb 100644 --- a/vignettes/daily_data_statistics.Rmd +++ b/vignettes/daily_data_statistics.Rmd @@ -45,12 +45,12 @@ Note that the `start_date` and `end_date` are set in `month-day` format to descr ```{r} jan_por_mean <- read_waterdata_stats_por( - monitoring_location_id = site1, - parameter_code = "00060", - computation = "arithmetic_mean", - start_date = "01-01", - end_date = "01-02" -) + monitoring_location_id = site1, + parameter_code = "00060", + computation = "arithmetic_mean", + start_date = "01-01", + end_date = "01-02" + ) jan_por_mean ``` @@ -59,12 +59,20 @@ The first two rows show the average discharge values, aggregated across all Janu But wait: what's in that third row? Looking at the `time_of_year` and `time_of_year_type` columns, we see the third row represents the average discharge value aggregated across all *Januarys*. This illustrates one quirk of the modern statistics API: any time the `start_date` to `end_date` range overlaps with the first day of a month (e.g., `"01-01"`), we will get month-of-year as well as the day-of-year summary statistics. -You can filter these rows out of the data if you don't want them in downstream analyses: + +You can use the `normal_type` argument to get only day-of-year or month-of-year statistics. ```{r} -jan_por_mean[jan_por_mean$time_of_year_type != "month_of_year",] +read_waterdata_stats_por( + monitoring_location_id = site1, + parameter_code = "00060", + computation = "arithmetic_mean", + normal_type = "MOY" # or "DOY" for day-of-year +) ``` +### Percentile band plot example + Let's now look at an example that illustrates the benefits of the statistics API. In the example below, we pull all day-of-year discharge percentiles for our site. Keep in mind that doing so *without* the statistics API would require us to download the **entire** daily period of record for this site and hand-compute these percentiles ourselves, a time- and resource-intensive process indeed. @@ -86,7 +94,10 @@ full_por_percentiles |> filter(time_of_year == "01-01" & time_of_year_type == "day_of_year") |> arrange(percentile) |> select( - time_of_year, computation, percentile, value + time_of_year, + computation, + percentile, + value ) ``` @@ -100,19 +111,24 @@ doy_perc_bands <- dplyr::filter(time_of_year_type == "day_of_year") |> select(time_of_year, percentile, value) |> mutate(time_of_year = as.Date(time_of_year, format = "%m-%d")) |> - pivot_wider(names_from = percentile, values_from = value) + pivot_wider(names_from = percentile, values_from = value) pcode_info <- read_waterdata_parameter_codes(parameter_code = "00060") -bins <- c("95 - Max", "90 - 95", "75 - 90", "25 - 75", - "10 - 25", "5 - 10", "Min - 5") -bins <- factor(bins,levels = bins) +bins <- c( + "95 - Max", + "90 - 95", + "75 - 90", + "25 - 75", + "10 - 25", + "5 - 10", + "Min - 5" +) +bins <- factor(bins, levels = bins) -update_geom_defaults("ribbon", - list(alpha = 0.6)) +update_geom_defaults("ribbon", list(alpha = 0.6)) -ggplot(data = doy_perc_bands, - aes(x = time_of_year)) + +ggplot(data = doy_perc_bands, aes(x = time_of_year)) + geom_ribbon(aes(ymin = `95`, ymax = `100`, fill = bins[1])) + geom_ribbon(aes(ymin = `90`, ymax = `95`, fill = bins[2])) + geom_ribbon(aes(ymin = `75`, ymax = `90`, fill = bins[3])) + @@ -120,11 +136,16 @@ ggplot(data = doy_perc_bands, geom_ribbon(aes(ymin = `10`, ymax = `25`, fill = bins[5])) + geom_ribbon(aes(ymin = `5`, ymax = `10`, fill = bins[6])) + geom_ribbon(aes(ymin = `0`, ymax = `5`, fill = bins[7])) + - scale_x_date(date_labels = "%b", date_breaks = "1 month", - expand = expand_scale(mult = c(0, 0))) + - scale_y_log10(breaks = scales::breaks_log(base = 10), - labels = scales::label_log(base = 10), - minor_breaks = scales::minor_breaks_log()) + + scale_x_date( + date_labels = "%b", + date_breaks = "1 month", + expand = expand_scale(mult = c(0, 0)) + ) + + scale_y_log10( + breaks = scales::breaks_log(base = 10), + labels = scales::label_log(base = 10), + minor_breaks = scales::minor_breaks_log() + ) + annotation_logticks(sides = "lr") + labs( x = "Month-day", @@ -132,22 +153,27 @@ ggplot(data = doy_perc_bands, title = "Day-of-year percentile bands" ) + theme_bw() + - scale_fill_manual("Historical Percentiles", - values = c("95 - Max" = "#292f6b", - "90 - 95" = "#5699c0", - "75 - 90" = "#aacee0", - "25 - 75" = "#e9e9e9", - "10 - 25" = "#ebd6ab", - "5 - 10" = "#dcb668", - "Min - 5" = "#8f4f1f"), - labels = c("95th Percentile - Max", - "90th - 95th Percentile", - "75th - 90th Percentile", - "25th - 75th Percentile", - "10th - 25th Percentile", - "5th - 10th Percentile", - "Min - 5th Percentile")) - + scale_fill_manual( + "Historical Percentiles", + values = c( + "95 - Max" = "#292f6b", + "90 - 95" = "#5699c0", + "75 - 90" = "#aacee0", + "25 - 75" = "#e9e9e9", + "10 - 25" = "#ebd6ab", + "5 - 10" = "#dcb668", + "Min - 5" = "#8f4f1f" + ), + labels = c( + "95th Percentile - Max", + "90th - 95th Percentile", + "75th - 90th Percentile", + "25th - 75th Percentile", + "10th - 25th Percentile", + "5th - 10th Percentile", + "Min - 5th Percentile" + ) + ) ``` Finally, let's overlay daily mean data onto the plot: @@ -155,22 +181,24 @@ Finally, let's overlay daily mean data onto the plot: ```{r} range <- as.Date(c("2025-01-01", "2026-03-02")) -complete_df <- data.frame(time = seq.Date(from = range[1], - to = as.Date("2026-12-30"), - by = "day")) |> +complete_df <- data.frame( + time = seq.Date(from = range[1], to = as.Date("2026-12-30"), by = "day") +) |> mutate(time_of_year = as.Date(format(time, "%m-%d"), format = "%m-%d")) -daily_data <- complete_df |> - left_join(read_waterdata_daily( - monitoring_location_id = site1, - parameter_code = "00060", - statistic_id = "00003", - time = range), - by = "time") |> +daily_data <- complete_df |> + left_join( + read_waterdata_daily( + monitoring_location_id = site1, + parameter_code = "00060", + statistic_id = "00003", + time = range + ), + by = "time" + ) |> left_join(doy_perc_bands, by = "time_of_year") -ggplot(data = daily_data, - aes(x = time)) + +ggplot(data = daily_data, aes(x = time)) + geom_ribbon(aes(ymin = `95`, ymax = `100`, fill = bins[1])) + geom_ribbon(aes(ymin = `90`, ymax = `95`, fill = bins[2])) + geom_ribbon(aes(ymin = `75`, ymax = `90`, fill = bins[3])) + @@ -179,43 +207,55 @@ ggplot(data = daily_data, geom_ribbon(aes(ymin = `5`, ymax = `10`, fill = bins[6])) + geom_ribbon(aes(ymin = `0`, ymax = `5`, fill = bins[7])) + geom_line(aes(y = value, color = approval_status), linewidth = 1) + - scale_x_date(date_labels = "%b %Y", - date_breaks = "3 month", - expand = expand_scale(mult = c(0, 0)))+ - scale_y_log10(breaks = scales::breaks_log(base = 10), - labels = scales::label_log(base = 10), - minor_breaks = scales::minor_breaks_log()) + + scale_x_date( + date_labels = "%b %Y", + date_breaks = "3 month", + expand = expand_scale(mult = c(0, 0)) + ) + + scale_y_log10( + breaks = scales::breaks_log(base = 10), + labels = scales::label_log(base = 10), + minor_breaks = scales::minor_breaks_log() + ) + annotation_logticks(sides = "lr") + - scale_color_manual("Status", - values = c("Approved" = "black", - "Provisional" = "red"), - breaks = c("Approved", "Provisional"), - limits = force, - drop = TRUE) + - scale_fill_manual("Historical Percentiles", - values = c("95 - Max" = "#292f6b", - "90 - 95" = "#5699c0", - "75 - 90" = "#aacee0", - "25 - 75" = "#e9e9e9", - "10 - 25" = "#ebd6ab", - "5 - 10" = "#dcb668", - "Min - 5" = "#8f4f1f"), - labels = c("95th Percentile - Max", - "90th - 95th Percentile", - "75th - 90th Percentile", - "25th - 75th Percentile", - "10th - 25th Percentile", - "5th - 10th Percentile", - "Min - 5th Percentile")) + + scale_color_manual( + "Status", + values = c("Approved" = "black", "Provisional" = "red"), + breaks = c("Approved", "Provisional"), + limits = force, + drop = TRUE + ) + + scale_fill_manual( + "Historical Percentiles", + values = c( + "95 - Max" = "#292f6b", + "90 - 95" = "#5699c0", + "75 - 90" = "#aacee0", + "25 - 75" = "#e9e9e9", + "10 - 25" = "#ebd6ab", + "5 - 10" = "#dcb668", + "Min - 5" = "#8f4f1f" + ), + labels = c( + "95th Percentile - Max", + "90th - 95th Percentile", + "75th - 90th Percentile", + "25th - 75th Percentile", + "10th - 25th Percentile", + "5th - 10th Percentile", + "Min - 5th Percentile" + ) + ) + labs( x = "", y = pcode_info$unit_of_measure, - title = paste0("January 1, 2025 - December 31, 2026\n", - pcode_info$parameter_description) + title = paste0( + "January 1, 2025 - December 31, 2026\n", + pcode_info$parameter_description + ) ) + theme_bw() - ``` As also seen on the WDFN pages: @@ -230,12 +270,12 @@ Notice that the `start_date` and `end_date` arguments are given in `YYYY-MM-DD` ```{r} jan_daterange_mean <- read_waterdata_stats_daterange( - monitoring_location_id = site1, - parameter_code = "00060", - computation = "arithmetic_mean", - start_date = "2024-01-01", - end_date = "2024-01-31" -) + monitoring_location_id = site1, + parameter_code = "00060", + computation = "arithmetic_mean", + start_date = "2024-01-01", + end_date = "2024-01-31" + ) jan_daterange_mean ``` @@ -247,16 +287,31 @@ Annual statistics will be returned for any calendar/water years than intersect w ```{r} multiyear_daterange_mean <- read_waterdata_stats_daterange( + monitoring_location_id = site1, + parameter_code = "00060", + computation = "arithmetic_mean", + start_date = "2023-09-30", + end_date = "2024-01-01" + ) + +multiyear_daterange_mean +``` + +You can set the `interval_type` argument to limit the output to specific intervals. + +```{r} +read_waterdata_stats_daterange( monitoring_location_id = site1, parameter_code = "00060", computation = "arithmetic_mean", start_date = "2023-09-30", - end_date = "2024-01-01" + end_date = "2024-01-01", + interval_type = c("CY", "WY") ) - -multiyear_daterange_mean ``` +### Monthly average table example + Before we move on, consider the following example where we create a Monthly mean statistics table similar to what you'd find in the [Water Year Summaries](https://rconnect.chs.usgs.gov/water-year-summaries-dev/?_inputs_&render_button=1&site_no_select=%2205428500%22&wateryear_select=%222024%22). Note that the values reported here are slightly different from what you'll find in the Water Year Summary because of differences in how values are rounded. ```{r, message=FALSE, warning=FALSE} diff --git a/vignettes/dataRetrieval.Rmd b/vignettes/dataRetrieval.Rmd index a89c25566..d5b3c69ca 100644 --- a/vignettes/dataRetrieval.Rmd +++ b/vignettes/dataRetrieval.Rmd @@ -71,9 +71,9 @@ Functions <- c( "read_waterdata", "read_waterdata_daily", "read_waterdata_continuous", - "readNWISrating", + "read_waterdata_rating", "read_waterdata_field_measurements", - "readNWISpeak", + "read_waterdata_peaks", "read_waterdata_field_measurements", "readNWISuse", "read_waterdata_stats_por, read_waterdata_stats_daterange", @@ -81,7 +81,7 @@ Functions <- c( "read_waterdata_monitoring_location", "read_waterdata_samples", "summarize_waterdata_samples", - "whatNWISsites", + "read_waterdata_combined_meta", "read_waterdata_ts_meta", "readWQPdata", "readWQPqw", @@ -119,16 +119,17 @@ Description <- c( Source <- c("USGS Water Data API", "USGS Water Data API", "USGS Water Data API", - "NWIS", + "USGS Water Data API", + "USGS Water Data API", + "USGS Water Data API", "USGS Water Data API", "NWIS", "USGS Water Data API", - rep("NWIS",2), "USGS Water Data API", "USGS Water Data API", "USGS Samples Data", "USGS Samples Data", - "NWIS", + "USGS Water Data API", "USGS Water Data API", rep("WQP", 7)) @@ -139,7 +140,7 @@ data.df <- data.frame( Source, stringsAsFactors = FALSE ) -kable(data.df, +kable(data.df, col.names = c("Name", "Data Returned", "Source"), caption = "Table 1: dataRetrieval functions" ) ``` @@ -381,11 +382,11 @@ groundWater <- read_waterdata_field_measurements(monitoring_location_id = siteNu ## Peak Flow Data -Peak flow data are instantaneous discharge or stage data that record the maximum values of these variables during a flood event. They include the annual peak flood event but can also include records of other peaks that are lower than the annual maximum. Peak discharge measurements can be obtained with the `readNWISpeak` function. Information on the returned data can be found with the `comment` function and attached attributes as described in the [metadata](#embedded-metadata) section. +Peak flow data are instantaneous discharge or stage data that record the maximum values of these variables during a flood event. They include the annual peak flood event but can also include records of other peaks that are lower than the annual maximum. Peak discharge measurements can be obtained with the `read_waterdata_peaks` function. ```{r peakexample, echo=TRUE, eval=FALSE} -siteNumber <- "01594440" -peakData <- readNWISpeak(siteNumber) +siteNumber <- "USGS-01594440" +peakData <- read_waterdata_peaks(monitoring_location_id = siteNumber) ``` @@ -393,11 +394,12 @@ peakData <- readNWISpeak(siteNumber) Rating curves are the calibration curves that are used to convert measurements of stage to discharge. Because of changing hydrologic conditions these rating curves change over time. Information on the returned data can be found with the `comment` function and attached attributes as described in the [metadata](#embedded-metadata) section. -Rating curves can be obtained with the `readNWISrating` function. +Rating curves can be obtained with the `read_waterdata_ratings` function. ```{r ratingexample, echo=TRUE, eval=FALSE} -ratingData <- readNWISrating(siteNumber, "base") -attr(ratingData, "RATING") +ratingData <- read_waterdata_ratings(monitoring_location_id = siteNumber, + file_type = "base") +comment(ratingData[[1]]) ``` diff --git a/vignettes/read_waterdata_functions.Rmd b/vignettes/read_waterdata_functions.Rmd index 5cb9c37de..cb2e94cfa 100644 --- a/vignettes/read_waterdata_functions.Rmd +++ b/vignettes/read_waterdata_functions.Rmd @@ -445,171 +445,7 @@ leaflet(data = what_huc_sites |> There is a new function `read_waterdata_metadata` that gives access to a wide variety of tables that have metadata information. Any returned column can also be filtered on, similar to the time series functions above. -### Agency Codes - -`r dataRetrieval:::get_description("agency-codes")` - -```{r} -#| eval: false -agency_codes <- read_waterdata_metadata("agency-codes") -``` - -### Altitude Datums - -`r dataRetrieval:::get_description("altitude-datums")` - -```{r} -#| eval: false -altitude_datums <- read_waterdata_metadata("altitude-datums") -``` - - -### Aquifer Codes - -`r dataRetrieval:::get_description("aquifer-codes")` - -```{r} -#| eval: false -aquifer_codes <- read_waterdata_metadata("aquifer-codes") -``` - -### Aquifer Types - -`r dataRetrieval:::get_description("aquifer-types")` - -```{r} -#| eval: false -aquifer_types <- read_waterdata_metadata("aquifer-types") -``` - -### Coordinate Accuracy Codes - -`r dataRetrieval:::get_description("coordinate-accuracy-codes")` - -```{r} -#| eval: false -coordinate_accuracy_codes <- read_waterdata_metadata("coordinate-accuracy-codes") -``` - -### Coordinate Datum Codes - -`r dataRetrieval:::get_description("coordinate-accuracy-codes")` - -```{r} -#| eval: false -coordinate_datum_codes <- read_waterdata_metadata("coordinate-datum-codes") -``` - -### Coordinate Method Codes - -`r dataRetrieval:::get_description("coordinate-method-codes")` - -```{r} -#| eval: false -coordinate_method_codes <- read_waterdata_metadata("coordinate-method-codes") -``` - -### County Identifiers - -`r dataRetrieval:::get_description("counties")` - -```{r} -#| eval: false -counties <- read_waterdata_metadata("counties") -``` - -### Hydrologic Unit Codes - -`r dataRetrieval:::get_description("hydrologic-unit-codes")` - -```{r} -#| eval: false -huc_codes <- read_waterdata_metadata("hydrologic-unit-codes") -``` - - -### Medium Codes - -`r dataRetrieval:::get_description("medium-codes")` - -```{r} -#| eval: false -medium_codes <- read_waterdata_metadata("medium-codes") -``` - -### National Aquifer Codes - -`r dataRetrieval:::get_description("national-aquifer-codes")` - -```{r} -#| eval: false -medium_codes <- read_waterdata_metadata("medium-codes") -``` - -### Parameter Codes - -`r dataRetrieval:::get_description("parameter-codes")` - -```{r} -#| eval: false -parameter_codes <- read_waterdata_metadata("parameter-codes") -``` - -### Reliability Codes - -`r dataRetrieval:::get_description("reliability-codes")` - -```{r} -#| eval: false -reliability_codes <- read_waterdata_metadata("reliability-codes") -``` - -### Site Types - -`r dataRetrieval:::get_description("site-types")` - -```{r} -#| eval: false -site_types <- read_waterdata_metadata("site-types") -``` - -### State Identifiers - -`r dataRetrieval:::get_description("states")` - -```{r} -#| eval: false -states <- read_waterdata_metadata("states") -``` - -### Statistic Codes - -`r dataRetrieval:::get_description("statistic-codes")` - -```{r} -#| eval: false -statistic_codes <- read_waterdata_metadata("statistic-codes") -``` - - -### Topographic Codes - -`r dataRetrieval:::get_description("topographic-codes")` - -```{r} -#| eval: false -topographic_codes <- read_waterdata_metadata("topographic-codes") -``` - -### Time Zone Codes - -`r dataRetrieval:::get_description("time-zone-codes")` - -```{r} -#| eval: false -time_zone_codes <- read_waterdata_metadata("time-zone-codes") -``` - +See [USGS Reference Lists](articles/Reference_Lists.html) for more detail. ## Discrete Samples diff --git a/vignettes/samples_data.Rmd b/vignettes/samples_data.Rmd index 8895130da..e3928f873 100644 --- a/vignettes/samples_data.Rmd +++ b/vignettes/samples_data.Rmd @@ -202,7 +202,7 @@ Let's say we don't know a USGS site number, but we do have an area of interest. North and south are latitude values; east and west are longitude values. A vector of 4 (west, south, east, north) is expected. -```{r} +```{r eval=FALSE} bbox <- c(-90.8, 44.2, -89.9, 45.0) user_char <- "Phosphorus as phosphorus, water, unfiltered" @@ -214,9 +214,6 @@ bbox_sites <- read_waterdata_samples(boundingBox = bbox, ``` -```{r echo=FALSE, message=FALSE} -map_it(bbox_sites) -``` ### Hydrologic Unit Codes (HUCs) diff --git a/vignettes/tutorial.Rmd b/vignettes/tutorial.Rmd index 19651ee65..e97fa1cf5 100644 --- a/vignettes/tutorial.Rmd +++ b/vignettes/tutorial.Rmd @@ -112,8 +112,8 @@ There are many types of data available from `dataRetrieval`. To understand how t |site|`readNWISsite`|Site metadata| `read_waterdata_monitoring_location` | |pcode|`readNWISpCode`|Parameter code metadata | `read_waterdata_parameter_codes` | |stat|`readNWISstat`| Site statistics | `read_waterdata_stats_por`, `read_waterdata_stats_daterange` | -|rating|`readNWISrating`| Rating curves| None yet | -|peak|`readNWISpeak`|Peak flow| None yet | +|rating|`readNWISrating`| Rating curves| `read_waterdata_ratings` | +|peak|`readNWISpeak`|Peak flow| `read_waterdata_peaks` | |use|`readNWISuse`|Water Use| None yet | |meas|`readNWISmeas`|Discrete surface water| `read_waterdata_field_measurements`| | | `readNWISdata` | General data import | `read_waterdata` |