petermeissner
diff --git a/‎DESCRIPTION‎
Lines changed: 5 additions & 6 deletions b/‎DESCRIPTION‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 2 additions & 26 deletions b/‎NAMESPACE‎
Lines changed: 2 additions & 26 deletions
diff --git a/‎NEWS.md‎
Lines changed: 6 additions & 89 deletions b/‎NEWS.md‎
Lines changed: 6 additions & 89 deletions
diff --git a/‎R/diffrproject.R‎
Lines changed: 2 additions & 2 deletions b/‎R/diffrproject.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/imports.r‎
Lines changed: 5 additions & 3 deletions b/‎R/imports.r‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎dev.R‎
Lines changed: 13 additions & 107 deletions b/‎dev.R‎
Lines changed: 13 additions & 107 deletions
diff --git a/‎man/dp_hash.Rd‎
Lines changed: 0 additions & 15 deletions b/‎man/dp_hash.Rd‎
Lines changed: 0 additions & 15 deletions
@@ -1,7 +1,7 @@
 Package: diffrprojects
 Title: Using diffr for more than two files
-Date: 2016-07-06
-Version: 0.1.0.90000
+Date: 2016-08-01
+Version: 0.1.1.90000
 Authors@R: c(
   person(
     "Peter", "Meissner",
@@ -32,8 +32,9 @@ Imports:
     R6 (>= 2.1.2),
     hellno (>= 0.0.1),
     magrittr (>= 1.5),
-    Rcpp (>= 0.12.5),
     digest (>= 0.6.9),
+    stringb (>= 0.1.0),
+    rtext (>= 0.1.0),
     stats,
     graphics
 Suggests:
@@ -43,7 +44,5 @@ Suggests:
 BugReports: https://github.com/petermeissner/diffrprojects/issues
 URL: https://github.com/petermeissner/diffrprojects
 RoxygenNote: 5.0.1
-VignetteBuilder: knitr
-LinkingTo:
-    Rcpp
+
 
@@ -1,45 +1,21 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(plot,rtext)
-S3method(text_collapse,data.frame)
-S3method(text_collapse,default)
-S3method(text_collapse,list)
-S3method(text_tokenize,default)
-S3method(text_tokenize,rtext)
 export(classes)
 export(diffrproject)
 export(dim1)
 export(dim2)
-export(dp_hash)
 export(dp_ls)
 export(dp_text_base_data)
 export(dp_tf)
 export(get_vector_element)
 export(is_between)
-export(load_into)
 export(modus)
 export(rbind_fill)
-export(rtext)
-export(rtext_get_character)
-export(rtext_tokenizer_list)
 export(seq_dim1)
 export(shift)
-export(text_collapse)
-export(text_eval)
-export(text_extract)
-export(text_extract_all)
-export(text_length)
-export(text_nchar)
-export(text_read)
-export(text_show)
-export(text_snippet)
-export(text_tokenize)
-export(text_tokenize_words)
-export(vector_delete)
 export(which_token)
-export(which_token_worker)
 import(hellno)
+import(rtext)
+import(stringb)
 importFrom(R6,R6Class)
-importFrom(Rcpp,sourceCpp)
 importFrom(magrittr,"%>%")
-useDynLib(diffrprojects)
@@ -1,119 +1,36 @@
 NEWS diffrprojects
 ==========================================================================
 
-version 0.5.0 // 2016-06-09 ... 
+version 0.1.2 // 2016-06-09 ... 
 --------------------------------------------------------------------------
 
 * BUGFIXES
-    - fixing which_token supressing results error
 
-    
-* FEATURE
-    - rtext : char_code()     -> char_data_set()
-    - rtext : char_get_code() -> char_data_get()
-    - rtext : introducing tokens and token_data
-    - introducing Rcpp to speed up: which_token_worker, which_token
-
-
-
-version 0.4.2 // 2016-06-07 ... 
---------------------------------------------------------------------------
-
-* BUGFIXES
 
 
 * FEATURE
-    - rtext : char_code()
-    - rtext : char_length()
-
-
-version 0.4.1 // 2016-06-06 ... 
---------------------------------------------------------------------------
-
-* BUGFIXES
-    - rtext : text_get() and char_get() would return text decently encoded as UTF-8 nut fail to tell Windows about that
-    
-* FEATURE
-
-
-
-
-version 0.4.0 // 2016-05-14 ... 
---------------------------------------------------------------------------
-
-* BUGFIXES
-    
 
-* FEATURE
-    - rtext : save()
-    - rtext : load()
-    - rtext : text is tokenized into characters and then stored in characters
-    - rtext : char_add()
-    - rtext : char_delete()
-    - rtext : char_replace()
-    - rtext : text_hash()
-    - rtext : hash_text()
 
+* DEVELOPMENT
 
 
 
-version 0.3.4 // 2016-05-13 ... 
+version 0.1.1 // 2016-06-07 ... 
 --------------------------------------------------------------------------
 
 * BUGFIXES
-    - rtext : getting tokenization on init right
-    
-* FEATURE
-    - dp_text : !!! rename to rtext :-) !!!
-
-
-
 
-version 0.3.3 // 2016-05-09 ... 
---------------------------------------------------------------------------
-
-* BUGFIXES
-    - fixing documentation and minor build check complaints 
 
 * FEATURE
-    - dp_text() : add tokenization to initializetion stage
- 
- 
-    
-
-version 0.3.2 // 2016-05-09 ... 
---------------------------------------------------------------------------
 
-* FEATURES
-    - tools : text_tokenize()
-    - tools : text_tokenize_words()
-    
-
-
-
-version 0.2.0 // 2016-04-28 ... 
---------------------------------------------------------------------------
-
-* FEATURES
-    - dp_text : show_text()
-    - dp_text : info()
-    - dp_text : get_text()
-
-
-
-
-version 0.1.0 // 2016-04-27 ... 
---------------------------------------------------------------------------
 
-* FEATURES
-    - dp_text : an object for text (basic layout)
-    - tools : text_read() function for reading text
-    - tools : text_snippet() function for getting snippet of text
+* DEVELOPMENT
+    - big big restructuring: putting rtext into separate package
 
 
 
 
-version 0.0.1 // 2016-04-26 ... 
+version 0.1.0 // 2016-04-26 ... 
 --------------------------------------------------------------------------
 
 * START of development
 
@@ -68,12 +68,12 @@ diffrproject <-
         # doing-duty-to-do
         if( is.null(name) ){
           next_num <- max(c(as.numeric(text_extract(names, "\\d+")),0))+1
-          name <- text_collapse( "noname_", next_num)
+          name <- text_c( "noname_", next_num)
         }
         self$texts[[name]]    <- rtext
         i <- 0
         while( rtext$id %in% ids ){
-          rtext$id <- text_collapse(id, "_", i)
+          rtext$id <- text_c(id, "_", i)
           i <- i+1
         }
 
 
@@ -2,11 +2,13 @@
 #' @importFrom R6 R6Class
 #' @import hellno
 #' @importFrom magrittr %>%
+#' @import stringb
+#' @import rtext
 dummyimport <- function(){
   R6::R6Class()
   1 %>% magrittr::add(1)
 }
 
-#' @useDynLib diffrprojects
-#' @importFrom Rcpp sourceCpp
-NULL
+# #' @useDynLib diffrprojects
+# #' @importFrom Rcpp sourceCpp
+# NULL
@@ -5,6 +5,9 @@ library(magrittr)
 library(dplyr)
 library(hellno)
 
+library(stringb)
+library(rtext)
+
 #### ---------------------------------------------------------------------------
 
 
@@ -13,115 +16,18 @@ text_files <- list.files(text_path, pattern = "txt", full.names = TRUE)
 
 
 dp <- diffrproject$new()
-dp$text_add(rtext = rtext$new(text_file=text_files[1], encoding="latin1", tokenize_by="\n"),name = basename(text_files[1]))
-dp$text_add(rtext = rtext$new(text_file=text_files[1], encoding="latin1", tokenize_by="\n"),name = basename(text_files[2]))
-
-
-#### ---------------------------------------------------------------------------
-
-
-rtext_tokenizer_data <- function(rt, tokenize_by){
+dp$text_add(
+  rtext =
+    rtext$new(text_file=text_files[1], encoding="latin1")
+)
 
-}
+dp$texts
 
-tokenize_data = function(...){
-  # datanize tokens
-  update_token_data <- function(...){
-    # tokenize if necessary
-    private$tokenize()
-    if( !is.null(private$char_data$i) ){
-      # datanize tokens
-      token_i <- which_token( private$char_data$i, private$token$from, private$token$to )
-      if( "FUN"  %in% names(as.list(match.call())) ){
-        # user supplied functions and otpions
-        private$token_data <-
-          private$char_data[,-1] %>%
-          stats::aggregate(by = list( token_i=token_i ), ... )
-      }else{
-        # standard
-        private$token_data <-
-          private$char_data[,-1] %>%
-          stats::aggregate(
-            by = list( token_i=token_i ),
-            FUN="modus",
-            multimodal=NA,
-            warn=FALSE
-          )
-      }
-      names(private$token_data)[-1] <- names(private$char_data)[-1]
-    }
-    # store hashes
-    private$token_store$tok_hashed_data <- private$hashed_data
-    private$token_store$tok_hashed_call <- dp_hash(as.list(match.call()))
-  }
-  # deciding when to re-datanize tokens
-  if(       # no datanization has been done so far
-    length(private$hashed_text)==0 |
-    length(private$token_store$tok_hashed_text)==0 |
-    length(private$hashed_data)==0 |
-    length(private$token_store$tok_hashed_data)==0 |
-    length(private$token_store$tok_hashed_call)==0
-  ){
-    self$message("datanizing tokens")
-    update_token_data(...)
-  }else if( # text / data / call has changed
-    private$hashed_text != private$token_store$tok_hashed_text |
-    identical(private$hashed_text, character(0)) |
-    private$hashed_text != private$token_store$tok_hashed_data |
-    identical(private$hashed_data, character(0)) |
-    dp_hash(as.list(match.call())) != private$token_store$tok_hashed_call
-  ){
-    self$message("datanizing tokens")
-    update_token_data(...)
-  }
-}
+dp$text_add(
+  rtext = rtext$new(text_file=text_files[1], encoding="latin1"),
+  name = basename(text_files[2])
+)
 
 
-# token_get
-token_get = function(){
-  # tokenize text if necessary else take cache
-  private$tokenize()
-  # return tokens
-  data.frame( private$token, token_i=seq_len(dim1(private$token)) )
-},
-token_data_get = function(...){
-  # tokenize text / gen token data if necessary else take cache
-  private$tokenize_data(...)
-  # return token data
-  private$token_data
-},
-
-
-
-
-token_store =
-  list(
-    tok_hashed_text = character(0),
-    tok_hashed_data = character(0),
-    tok_hashed_call = character(0)
-  ),
-
+#### ---------------------------------------------------------------------------
 
-# get text line information
-text_lines = function(){
-  lengths <- nchar(self$text_get(split="\n"))+1
-  lengths[length(lengths)] <- lengths[length(lengths)]-1
-  res <-
-    data.frame(
-      line_i = seq_along(lengths),
-      from   = c(0, cumsum(lengths)[seq_len(length(lengths)-1)] )+1,
-      to     = cumsum(lengths),
-      nchar  = lengths
-    )
-  return(res)
-},
-text_lines_get = function(lines, nl=FALSE){
-  res   <- character(length(lines))
-  lines <- self$text_lines()[lines,]
-  from  <- lines$from
-  to    <- lines$to
-  for( i in seq_along(from) ){
-    res[i] <- self$text_get(from=from[i], to=to[i] - ifelse(!nl&from[i]<to[i],1,0))
-  }
-  return(res)
-},
Original file line number	Diff line number	Diff line change
`@@ -68,12 +68,12 @@ diffrproject <-`
`68`	`68`	`# doing-duty-to-do`
`69`	`69`	`if( is.null(name) ){`
`70`	`70`	`next_num <- max(c(as.numeric(text_extract(names, "\\d+")),0))+1`
`71`		`- name <- text_collapse( "noname_", next_num)`
	`71`	`+ name <- text_c( "noname_", next_num)`
`72`	`72`	`}`
`73`	`73`	`self$texts[[name]] <- rtext`
`74`	`74`	`i <- 0`
`75`	`75`	`while( rtext$id %in% ids ){`
`76`		`- rtext$id <- text_collapse(id, "_", i)`
	`76`	`+ rtext$id <- text_c(id, "_", i)`
`77`	`77`	`i <- i+1`
`78`	`78`	`}`
`79`	`79`