Skip to content

Commit f69ef5e

Browse files
committed
changed data.frame format to use column-oriented values
1 parent b1f7530 commit f69ef5e

File tree

1 file changed

+69
-58
lines changed

1 file changed

+69
-58
lines changed

src/EnvironmentWatcher.cpp

Lines changed: 69 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,14 @@ inline bool isOrderedFactor(Rcpp::StringVector& classNames, RObject& robj) {
3131
}
3232

3333
std::string
34-
columnType(RObject& robj, json& jobj, int colNum) {
34+
columnType(RObject& robj) {
3535
if (Rf_isFactor(robj)) {
3636
Rcpp::StringVector classVal(robj.attr(kClass));
3737
if (isOrderedFactor(classVal, robj)) {
3838
return "of";
3939
} else {
4040
return "f";
4141
}
42-
char namebuf[16];
43-
snprintf(namebuf, 16, "f%d", colNum);
44-
jobj[namebuf] = Rcpp::StringVector(robj.attr("levels"));
4542
} else {
4643
switch(robj.sexp_type()) {
4744
case LGLSXP: return "b";
@@ -54,36 +51,69 @@ columnType(RObject& robj, json& jobj, int colNum) {
5451
}
5552

5653
json
57-
basicValue(RObject& val, int idx)
58-
{
59-
switch(val.sexp_type()) {
54+
basicColumnValues(RObject &colValue, int rowCount) {
55+
if(rowCount < 1) { return json::array(); }
56+
switch(colValue.sexp_type()) {
6057
case LGLSXP: {
61-
Rcpp::LogicalVector bvector(val);
62-
bool bval = bvector[idx] == 0;
63-
return json(bval);
58+
// Rcpp returns logical values as ints of value 0 or 1, not bool
59+
Rcpp::LogicalVector bvector(colValue);
60+
json jvalues;
61+
for (int i = 0; i < bvector.length(); i++) {
62+
if (Rcpp::LogicalVector::is_na(bvector[i])) {
63+
jvalues.push_back(json(nullptr));
64+
} else {
65+
jvalues.push_back(json(bvector[i] == 1));
66+
}
67+
}
68+
return jvalues;
6469
}
6570
case INTSXP: {
66-
Rcpp::IntegerVector ivector(val);
67-
return json(ivector[idx]);
68-
}
69-
case STRSXP: {
70-
Rcpp::StringVector svector(val);
71-
if (Rcpp::StringVector::is_na(svector[idx])) return json(nullptr);
72-
return json(svector[idx]);
71+
Rcpp::IntegerVector ivector(colValue);
72+
json jvalues;
73+
for (int i = 0; i < ivector.length(); i++) {
74+
if (Rcpp::IntegerVector::is_na(ivector[i])) {
75+
jvalues.push_back(json(nullptr));
76+
} else {
77+
jvalues.push_back(json(ivector[i]));
78+
}
79+
}
80+
return jvalues;
7381
}
7482
case REALSXP: {
75-
Rcpp::NumericVector dvals(val);
76-
double d = dvals[idx];
77-
if (d == R_NaN || std::isnan(d)) return json("NaN");
78-
else if (d == R_PosInf || std::isinf(d)) return json("Inf");
79-
else if (d == R_NegInf || d == -std::numeric_limits< double >::infinity()) return json("-Inf");
80-
return json(d);
83+
Rcpp::NumericVector dvals(colValue);
84+
json jvalues;
85+
for (int i=0; i < dvals.length(); i++) {
86+
// This is the only test that really works. See https://stackoverflow.com/questions/26241085/rcpp-function-check-if-missing-value/26262984#26262984
87+
if (R_IsNA(dvals[i])) {
88+
jvalues.push_back(json(nullptr));
89+
} else {
90+
double d = dvals[i];
91+
if (d == R_NaN || std::isnan(d)) { jvalues.push_back("NaN"); }
92+
else if (d == R_NegInf || d == -std::numeric_limits< double >::infinity()) { jvalues.push_back("-Inf"); }
93+
else if (d == R_PosInf || std::isinf(d)) { jvalues.push_back("Inf"); }
94+
else { jvalues.push_back(d); }
95+
}
96+
}
97+
return jvalues;
98+
}
99+
case CPLXSXP:
100+
case STRSXP: {
101+
Rcpp::StringVector svals(colValue);
102+
json jvalues;
103+
for (int i = 0; i < svals.length(); i++) {
104+
if (Rcpp::StringVector::is_na(svals[i])) {
105+
jvalues.push_back(json(nullptr));
106+
} else {
107+
jvalues.push_back(json(svals[i]));
108+
}
109+
}
110+
return jvalues;
81111
}
82112
default:
83-
LOG(WARNING) << "dataframe invalid col type:" << val.sexp_type() << std::endl;
113+
LOG(WARNING) << "dataframe invalid col type:" << colValue.sexp_type() << std::endl;
84114
return json(nullptr);
85115
}
86-
116+
87117
}
88118

89119
RC2::EnvironmentWatcher::EnvironmentWatcher ( SEXP environ, ExecuteCallback callback )
@@ -291,48 +321,29 @@ void
291321
RC2::EnvironmentWatcher::setDataFrameData ( RObject& robj, json& jobj )
292322
{
293323
int colCount = LENGTH(robj);
294-
Rcpp::StringVector colNames(robj.attr("names"));
295-
jobj["cols"] = colNames;
296324
jobj["ncol"] = colCount;
297325
RObject rowList(robj.attr("row.names"));
298326
if (LENGTH(rowList) > 0) {
299327
jobj["row.names"] = Rcpp::StringVector(rowList);
300328
}
301-
json colTypes;
302-
std::vector<RObject> colObjs;
329+
json columns;
330+
int rowCount = 0;
331+
Rcpp::StringVector colNames(robj.attr("names"));
303332
for (int i=0; i < colCount; i++) {
304333
RObject element(VECTOR_ELT(robj, i));
305-
if (element.sexp_type() == CPLXSXP) //coerce complex to string
306-
element = Rf_coerceVector(element, STRSXP);
307-
colObjs.push_back(element);
308-
colTypes.push_back(columnType(element, jobj, i));
309-
}
310-
jobj["types"] = colTypes;
311-
int rowCount = LENGTH(colObjs[0]);
312-
jobj["nrow"] = rowCount;
313-
//create robjs for each column list
314-
json rows;
315-
for (int row=0; row < kMaxLen && row < rowCount; row++) {
316-
json aRow;
317-
for (int col=0; col < colNames.length(); col++) {
318-
RObject &val = colObjs[col];
319-
aRow.push_back(basicValue(val, row));
320-
// switch(val.sexp_type()) {
321-
// case LGLSXP: aRow.push_back(LOGICAL(val)[row]); break;
322-
// case INTSXP: aRow.push_back(INTEGER(val)[row]); break;
323-
// case REALSXP: aRow.push_back(REAL(val)[row]); break;
324-
// case STRSXP:
325-
// aRow.push_back(Rcpp::StringVector(val)[row]);
326-
// break;
327-
// default:
328-
// LOG(WARNING) << "dataframe invalid col type:" << val.sexp_type() << std::endl;
329-
// aRow.push_back(nullptr);
330-
// break;
331-
// }
334+
rowCount = LENGTH(element);
335+
json aCol;
336+
std::string aType = columnType(element);
337+
aCol["type"] = aType;
338+
if(aType == "of" || aType == "f") {
339+
aCol["levels"] = Rcpp::StringVector(element.attr("levels"));
332340
}
333-
rows.push_back(aRow);
341+
aCol["values"] = basicColumnValues(element, rowCount);
342+
aCol["name"] = colNames[i];
343+
columns.push_back(aCol);
334344
}
335-
jobj["rows"] = rows;
345+
jobj["columns"] = columns;
346+
jobj["nrow"] = rowCount;
336347
}
337348

338349
void

0 commit comments

Comments
 (0)