Skip to content

Commit 434602a

Browse files
Merge pull request #16 from ropensci/factors_as_formats
Add factors_as_strings argument to sas_from_r()
2 parents 3a7af14 + 7c6b863 commit 434602a

File tree

5 files changed

+131
-29
lines changed

5 files changed

+131
-29
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# sasquatch (development version)
2+
13
# sasquatch 0.1.0
24

35
## Features

R/from-r.R

Lines changed: 88 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#' @param x `data.frame`; R table.
99
#' @param table_name string; Name of table to be created in SAS.
1010
#' @param libref string; Name of libref to store SAS table within.
11+
#' @param factors_as_strings logical; If `TRUE`, factors will become SAS strings.
12+
#' Else, factors will become formatted numerics.
1113
#'
1214
#' @details
1315
#' SAS only has two data types (numeric and character). Data types are converted
@@ -31,43 +33,57 @@
3133
#' sas_connect()
3234
#' sas_from_r(mtcars, "mtcars")
3335
#' }
34-
sas_from_r <- function(x, table_name, libref = "WORK") {
36+
sas_from_r <- function(
37+
x,
38+
table_name,
39+
libref = "WORK",
40+
factors_as_strings = TRUE
41+
) {
3542
check_session()
3643
check_data_frame(x)
3744
check_has_sas_valid_datatypes(x)
3845
check_has_sas_valid_colnames(x)
3946
check_has_rownames(x)
4047
check_string(table_name)
4148
check_string(libref)
49+
check_logical(factors_as_strings)
4250

43-
x_data <- from_r_data(x)
44-
x_datetypes <- from_r_datetypes(x)
45-
date_dict <- do.call(what = reticulate::dict, x_datetypes)
51+
x_data <- from_r_data(x, factors_as_strings)
52+
date_dict <- from_r_datedict(x)
53+
factor_dict <- reticulate::dict()
54+
if (!factors_as_strings) {
55+
factor_dict <- from_r_factordict(x, libref)
56+
}
4657

4758
execute_if_connection_active(
4859
reticulate::py_capture_output(
49-
.sas_from_r(x_data, table_name, libref, date_dict)
60+
.sas_from_r(x_data, table_name, libref, date_dict, factor_dict)
5061
)
5162
)
5263

5364
invisible(x)
5465
}
5566

56-
.sas_from_r <- function(x, table_name, libref, date_dict) {
67+
.sas_from_r <- function(x, table_name, libref, date_dict, factor_dict) {
5768
.pkgenv$session$dataframe2sasdata(
5869
x,
5970
table_name,
6071
libref,
61-
datetimes = date_dict
72+
datetimes = date_dict,
73+
outfmts = factor_dict
6274
)
6375
}
6476

65-
from_r_data <- function(x) {
77+
from_r_data <- function(x, factors_as_strings) {
6678
numeric_cols <- vapply(x, is.integer, FUN.VALUE = logical(1)) |
6779
vapply(x, is.logical, FUN.VALUE = logical(1))
6880
x[numeric_cols] <- lapply(x[numeric_cols], as.double)
6981
factor_cols <- vapply(x, is.factor, FUN.VALUE = logical(1))
70-
x[factor_cols] <- lapply(x[factor_cols], as.character)
82+
if (factors_as_strings) {
83+
x[factor_cols] <- lapply(x[factor_cols], as.character)
84+
} else {
85+
x[factor_cols] <- lapply(x[factor_cols], as.numeric)
86+
}
7187
date_cols <- vapply(
7288
x,
7389
\(col) identical(class(col), "Date"),
@@ -86,7 +102,7 @@ from_r_data <- function(x) {
86102
x
87103
}
88104

89-
from_r_datetypes <- function(x) {
105+
from_r_datedict <- function(x) {
90106
date_cols <- vapply(
91107
x,
92108
\(col) identical(class(col), "Date"),
@@ -97,7 +113,68 @@ from_r_datetypes <- function(x) {
97113
date_list <- as.list(rep("date", length(date_colnames)))
98114
names(date_list) <- date_colnames
99115

100-
date_list
116+
do.call(reticulate::dict, date_list)
117+
}
118+
119+
from_r_factordict <- function(x, libref) {
120+
is_factor_col <- vapply(x, is.factor, logical(1))
121+
factor_col_names <- names(x)[is_factor_col]
122+
123+
format_dataframe <- lapply(
124+
factor_col_names,
125+
function(col_name) {
126+
generate_format_string(x, col_name, libref)
127+
}
128+
) |>
129+
do.call(what = rbind.data.frame)
130+
131+
format_statements <- paste0(format_dataframe$statement, collapse = "\n\n")
132+
133+
execute_if_connection_active(
134+
reticulate::py_capture_output(
135+
.sas_run_string(format_statements)
136+
)
137+
)
138+
139+
reticulate::py_dict(
140+
factor_col_names,
141+
paste0(format_dataframe$name, ".")
142+
)
143+
}
144+
145+
generate_format_string <- function(x, colname, libref) {
146+
col <- x[[colname]]
147+
148+
proc_statement <- paste0("proc format library = ", libref, ";")
149+
150+
rand_string <- paste(sample(c(LETTERS, 0:9), 6), collapse = "")
151+
format_name <- paste0(colname, "_", rand_string)
152+
values_start <- paste0("value ", format_name)
153+
154+
col_levels <- levels(col)
155+
format_values <- vapply(
156+
seq_along(col_levels),
157+
function(i) {
158+
paste(i, "=", sas_quote(col_levels[i]))
159+
},
160+
character(1)
161+
)
162+
163+
list(
164+
name = format_name,
165+
statement = paste(
166+
proc_statement,
167+
values_start,
168+
paste(format_values, collapse = "\n"),
169+
";",
170+
"run;",
171+
sep = "\n"
172+
)
173+
)
174+
}
175+
176+
sas_quote <- function(x) {
177+
paste0("'", gsub("'", "''", x), "'")
101178
}
102179

103180
check_has_sas_valid_datatypes <- function(x, call = rlang::caller_env()) {

man/sas_from_r.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/sas_run_string.Rd

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-from-r.R

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,54 +2,54 @@
22
test_that("double should not be altered", {
33
x <- data.frame(a = runif(1000, min = -1, max = 1))
44

5-
expect_equal(x, from_r_data(x))
5+
expect_equal(x, from_r_data(x, factors_as_strings = FALSE))
66

77
x$a[sample(seq_len(nrow(x)), 100)] <- NA
88

9-
expect_equal(from_r_data(x), x)
9+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x)
1010
})
1111

1212
test_that("integer should become a double", {
1313
x <- data.frame(a = sample(-1000:1000, 1000, replace = TRUE))
1414
x_expected <- x
1515
x_expected$a <- as.double(x_expected$a)
1616

17-
expect_equal(from_r_data(x), x_expected)
17+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x_expected)
1818

1919
x$a[sample(seq_len(nrow(x)), 100)] <- NA
2020
x_expected <- x
2121
x_expected$a <- as.double(x_expected$a)
2222

23-
expect_equal(from_r_data(x), x_expected)
23+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x_expected)
2424
})
2525

2626
test_that("logical should become a double", {
2727
x <- data.frame(a = sample(c(TRUE, FALSE), 1000, replace = TRUE))
2828
x_expected <- x
2929
x_expected$a <- as.double(x_expected$a)
3030

31-
expect_equal(from_r_data(x), x_expected)
31+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x_expected)
3232

3333
x$a[sample(seq_len(nrow(x)), 100)] <- NA
3434
x_expected <- x
3535
x_expected$a <- as.double(x_expected$a)
3636

37-
expect_equal(from_r_data(x), x_expected)
37+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x_expected)
3838
})
3939

4040
test_that("character should not be altered", {
4141
x <- data.frame(
4242
a = sample(c("apple", "pear", "orange", "cherry"), 1000, replace = TRUE)
4343
)
4444

45-
expect_equal(from_r_data(x), x)
45+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x)
4646

4747
x$a[sample(seq_len(nrow(x)), 100)] <- NA
4848

49-
expect_equal(from_r_data(x), x)
49+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x)
5050
})
5151

52-
test_that("factor should become a character", {
52+
test_that("factor as character", {
5353
x <- data.frame(
5454
a = as.factor(sample(
5555
c("apple", "pear", "orange", "cherry"),
@@ -60,13 +60,33 @@ test_that("factor should become a character", {
6060
x_expected <- x
6161
x_expected$a <- as.character(x_expected$a)
6262

63-
expect_equal(from_r_data(x), x_expected)
63+
expect_equal(from_r_data(x, factors_as_strings = TRUE), x_expected)
6464

6565
x$a[sample(seq_len(nrow(x)), 100)] <- NA
6666
x_expected <- x
6767
x_expected$a <- as.character(x_expected$a)
6868

69-
expect_equal(from_r_data(x), x_expected)
69+
expect_equal(from_r_data(x, factors_as_strings = TRUE), x_expected)
70+
})
71+
72+
test_that("factor as format (numeric)", {
73+
x <- data.frame(
74+
a = as.factor(sample(
75+
c("apple", "pear", "orange", "cherry"),
76+
1000,
77+
replace = TRUE
78+
))
79+
)
80+
x_expected <- x
81+
x_expected$a <- as.numeric(x_expected$a)
82+
83+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x_expected)
84+
85+
x$a[sample(seq_len(nrow(x)), 100)] <- NA
86+
x_expected <- x
87+
x_expected$a <- as.numeric(x_expected$a)
88+
89+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x_expected)
7090
})
7191

7292
test_that("POSIXct should be converted to UTC", {
@@ -78,13 +98,13 @@ test_that("POSIXct should be converted to UTC", {
7898
x_expected <- x
7999
x_expected$a <- as.POSIXct(format(x_expected$a), tz = "UTC")
80100

81-
expect_equal(from_r_data(x), x_expected)
101+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x_expected)
82102

83103
x$a[sample(seq_len(nrow(x)), 100)] <- NA
84104
x_expected <- x
85105
x_expected$a <- as.POSIXct(format(x_expected$a), tz = "UTC")
86106

87-
expect_equal(from_r_data(x), x_expected)
107+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x_expected)
88108
})
89109

90110
test_that("date should become a POSIXct", {
@@ -94,13 +114,13 @@ test_that("date should become a POSIXct", {
94114
x_expected <- x
95115
x_expected$a <- as.POSIXct(x_expected$a)
96116

97-
expect_equal(from_r_data(x), x_expected)
117+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x_expected)
98118

99119
x$a[sample(seq_len(nrow(x)), 100)] <- NA
100120
x_expected <- x
101121
x_expected$a <- as.POSIXct(x_expected$a)
102122

103-
expect_equal(from_r_data(x), x_expected)
123+
expect_equal(from_r_data(x, factors_as_strings = FALSE), x_expected)
104124
})
105125

106126
test_that("date should be added to date dict", {
@@ -114,7 +134,7 @@ test_that("date should be added to date dict", {
114134
)
115135

116136
expect_equal(
117-
from_r_datetypes(x),
137+
reticulate::py_to_r(from_r_datedict(x)),
118138
date_dict_expected
119139
)
120140
})

0 commit comments

Comments
 (0)