-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-11031][SPARKR] Method str() on a DataFrame #9613
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
dab0565
b5129cd
d825d2c
daa3d41
5b4f6b1
6d226e9
05bb4aa
b74288b
db96730
992bf89
6bb5bd4
4135029
a995f6e
7f4adbc
957b3c2
1705432
cfdfc01
5d7deb8
7b8a563
4b416cc
8140e20
6a7ff1b
cfb85e4
0ac7384
a7141cc
74c9651
1428925
40a5202
5bdf3f9
38c21f3
2701898
2a8115d
0ffcb4f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -271,6 +271,7 @@ export("as.DataFrame", | |
| "parquetFile", | ||
| "read.df", | ||
| "sql", | ||
| "str", | ||
| "table", | ||
| "tableNames", | ||
| "tables", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2223,3 +2223,76 @@ setMethod("with", | |
| newEnv <- assignNewEnv(data) | ||
| eval(substitute(expr), envir = newEnv, enclos = newEnv) | ||
| }) | ||
|
|
||
| #' Display the structure of a DataFrame, including column names, column types, as well as a | ||
| #' a small sample of rows. | ||
| #' @name str | ||
| #' @title Compactly display the structure of a dataset | ||
| #' @rdname str | ||
| #' @family DataFrame functions | ||
| #' @param object a DataFrame | ||
| #' @examples \dontrun{ | ||
| #' # Create a DataFrame from the Iris dataset | ||
| #' irisDF <- createDataFrame(sqlContext, iris) | ||
| #' | ||
| #' # Show the structure of the DataFrame | ||
| #' str(irisDF) | ||
| #' } | ||
| setMethod("str", | ||
| signature(object = "DataFrame"), | ||
| function(object) { | ||
|
|
||
| # TODO: These could be made global parameters, though in R it's not the case | ||
| MAX_CHAR_PER_ROW <- 120 | ||
| MAX_COLS <- 100 | ||
|
|
||
| # Get the column names and types of the DataFrame | ||
| names <- names(object) | ||
| types <- coltypes(object) | ||
|
|
||
| # Get the first elements of the dataset. Limit number of columns accordingly | ||
| localDF <- if (ncol(object) > MAX_COLS) { | ||
| head(object[, c(1:MAX_COLS)]) | ||
| } else { | ||
| head(object) | ||
| } | ||
|
|
||
| # The number of observations will not be displayed as computing the | ||
| # number of rows is a very expensive operation | ||
| cat(paste0("'", class(object), "': ", length(names), " variables:\n")) | ||
|
|
||
| if (nrow(localDF) > 0) { | ||
| for (i in 1 : ncol(localDF)) { | ||
| # Get the first elements for each column | ||
|
|
||
| firstElements <- if (types[i] == "character") { | ||
| paste(paste0("\"", localDF[,i], "\""), collapse = " ") | ||
| } else { | ||
| paste(localDF[,i], collapse = " ") | ||
| } | ||
|
|
||
| # Add the corresponding number of spaces for alignment | ||
| spaces <- paste(rep(" ", max(nchar(names) - nchar(names[i]))), collapse="") | ||
|
|
||
| # Get the short type. For 'character', it would be 'chr'; | ||
| # 'for numeric', it's 'num', etc. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have a 100 char line limit. I think the comments here or in line 2230 should fit in one line ?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Combining those two lines will end up in 106 characters |
||
| dataType <- SHORT_TYPES[[types[i]]] | ||
| if (is.null(dataType)) { | ||
| dataType <- substring(types[i], 1, 3) | ||
| } | ||
|
|
||
| # Concatenate the colnames, coltypes, and first | ||
| # elements of each column | ||
| line <- paste0(" $ ", names[i], spaces, ": ", | ||
| dataType, " ",firstElements) | ||
|
|
||
| # Chop off extra characters if this is too long | ||
| cat(substr(line, 1, MAX_CHAR_PER_ROW)) | ||
| cat("\n") | ||
| } | ||
|
|
||
| if (ncol(localDF) < ncol(object)) { | ||
| cat(paste0("\nDisplaying first ", ncol(localDF), " columns only.")) | ||
| } | ||
| } | ||
| }) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -378,7 +378,6 @@ setGeneric("subtractByKey", | |
| setGeneric("value", function(bcast) { standardGeneric("value") }) | ||
|
|
||
|
|
||
|
|
||
| #################### DataFrame Methods ######################## | ||
|
|
||
| #' @rdname agg | ||
|
|
@@ -389,6 +388,14 @@ setGeneric("agg", function (x, ...) { standardGeneric("agg") }) | |
| #' @export | ||
| setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") }) | ||
|
|
||
| #' @rdname as.data.frame | ||
| #' @export | ||
| setGeneric("as.data.frame") | ||
|
|
||
| #' @rdname attach | ||
| #' @export | ||
| setGeneric("attach") | ||
|
|
||
| #' @rdname columns | ||
| #' @export | ||
| setGeneric("colnames", function(x, do.NULL = TRUE, prefix = "col") { standardGeneric("colnames") }) | ||
|
|
@@ -529,13 +536,12 @@ setGeneric("saveAsTable", function(df, tableName, source, mode, ...) { | |
| standardGeneric("saveAsTable") | ||
| }) | ||
|
|
||
| #' @rdname withColumn | ||
| #' @export | ||
| setGeneric("transform", function(`_data`, ...) {standardGeneric("transform") }) | ||
| setGeneric("str") | ||
|
|
||
| #' @rdname write.df | ||
| #' @rdname mutate | ||
| #' @export | ||
| setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") }) | ||
| setGeneric("transform", function(`_data`, ...) {standardGeneric("transform") }) | ||
|
|
||
| #' @rdname write.df | ||
| #' @export | ||
|
|
@@ -581,6 +587,10 @@ setGeneric("unionAll", function(x, y) { standardGeneric("unionAll") }) | |
| #' @export | ||
| setGeneric("where", function(x, condition) { standardGeneric("where") }) | ||
|
|
||
| #' @rdname with | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there 2 'with' here in this file?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice catch. I have fixed it. |
||
| #' @export | ||
| setGeneric("with") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it looks like
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed this and also re-ordered generics declaration for attach and as.data.frame. |
||
|
|
||
| #' @rdname withColumn | ||
| #' @export | ||
| setGeneric("withColumn", function(x, colName, col) { standardGeneric("withColumn") }) | ||
|
|
@@ -590,6 +600,9 @@ setGeneric("withColumn", function(x, colName, col) { standardGeneric("withColumn | |
| setGeneric("withColumnRenamed", | ||
| function(x, existingCol, newCol) { standardGeneric("withColumnRenamed") }) | ||
|
|
||
| #' @rdname write.df | ||
| #' @export | ||
| setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") }) | ||
|
|
||
| ###################### Column Methods ########################## | ||
|
|
||
|
|
@@ -1093,7 +1106,6 @@ setGeneric("weekofyear", function(x) { standardGeneric("weekofyear") }) | |
| #' @export | ||
| setGeneric("year", function(x) { standardGeneric("year") }) | ||
|
|
||
|
|
||
| #' @rdname glm | ||
| #' @export | ||
| setGeneric("glm") | ||
|
|
@@ -1105,15 +1117,3 @@ setGeneric("predict", function(object, ...) { standardGeneric("predict") }) | |
| #' @rdname rbind | ||
| #' @export | ||
| setGeneric("rbind", signature = "...") | ||
|
|
||
| #' @rdname as.data.frame | ||
| #' @export | ||
| setGeneric("as.data.frame") | ||
|
|
||
| #' @rdname attach | ||
| #' @export | ||
| setGeneric("attach") | ||
|
|
||
| #' @rdname with | ||
| #' @export | ||
| setGeneric("with") | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the first line should be a title ? cc @felixcheung
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's ok since he has
@nameand@titletag below.Apparently this is the doc style adopted in DataFrame.R - we should make it consistent across source files though at some point.