REBOL[ Author: "Brett Handley" Date: 17-July-2000 File: %delimited-text.r Purpose: "Load and export tables stored as text using a character as a column delimiter." Comment: {Handles quoted strings. Works under version 2.3 of core. } History: [ 8-Aug-2000 "Brett Handley" { o Changed a lot of names to improve readability. o Changed function names from export... to form... in expectation of modules. o Restructured functions. o Now allows values to be loaded (not yet in form-..). o Now you can specify fixed or variable encodings. } ] ] ; ; This is the beastie that decodes a delimited text string. ; delimited-text-parser: make object! [ ; ; Parser variables. ; mode-obj: none ; Feature switches. cell-data: none ; Hold one cell's data during parsing. cell-data-segment: none ; Holds part of cell-data during parsing. current-row: none ; Holds the row being loaded. loaded-rows: none ; Holds the rows that have been loaded so far. row-count: none; Holds the number of loaded rows. column-count: none; Holds the max number of columns found in a row during parsing. ; ; Scanning rules ; I guess the parser mostly exists inside the () of the scanning rules. ; scan-delimited-text:[ any [ scan-row (add-row current-row) ] ] scan-row: [ (current-row: make block! 1) [ row-delimiter-char | [ some [ scan-cell (add-cell-to-row cell-data) ] opt row-delimiter-char ] ] ] scan-cell: [ [ cell-delimiter-char (cell-data: none) ] | [ scan-cell-data opt cell-delimiter-char ] ] scan-cell-data: [ [scan-quoted-data | scan-unquoted-data ] ] scan-quoted-data: [ ( cell-data: copy {} ) {"} copy cell-data-segment to {"} {"} (append cell-data change-none-to-string cell-data-segment) any [ {"} copy cell-data-segment to {"} {"} (append cell-data rejoin [{"} change-none-to-string cell-data-segment]) ] ] scan-unquoted-data: [ copy cell-data some scan-unquoted-data-chars ] scan-unquoted-data-chars: none ; See initialise function. cell-delimiter-char: none ; See load-delimited function. row-delimiter-char: "^/" ; ; Helper functions for the parser. ; initialise: func[][ scan-unquoted-data-chars: complement charset rejoin [cell-delimiter-char {"} row-delimiter-char] loaded-rows: make block! 10 column-count: 0 row-count: 0 ] add-cell-to-row: func [ ; "Add a cell to the current row." cell-data ][ either :mode-obj/load-values [ cell-data: either cell-data [load cell-data][none] ][ cell-data: change-none-to-string cell-data ] append/only current-row cell-data ] add-row: func[ ; "Add a row to output according to mode." a-row ][ ; Add the row. append/only loaded-rows a-row ; Update the counts. column-count: max column-count length? a-row row-count: add row-count 1 ] change-none-to-string: func [ ; "Converts none to an empty string." s [any-string! none!] ][ either s [s][copy {}] ] ; ; The parser invocation method. ; load-delimited: function [ x [string!] /mode mode-object ; mode object has fields: ] [result] [ ; Parameters. mode-obj: mode-object ; Setup. ; By default use tab as delimiter. cell-delimiter-char: to-string either :mode-obj/delimiter [:mode-obj/delimiter][tab] initialise ; Now lets go. if not parse/all x scan-delimited-text [Print "Assumptions failed."] ; Fix the rows if necessary. either mode-obj/fixed [ result: reformat-as-fixed-length-rows loaded-rows column-count ][ result: :loaded-rows ] ; Time to cough up the results. either mode-obj/counts [ reduce [ row-count column-count result] ][ result ] ] ] ; ; Encodes strings when creating delimited text. ; to-quoted-unquoted-string: function[x delimiter][do-special][ do-special: any [ find x delimiter find x {"} find x {'} ] either do-special [ head insert append replace/all copy x {"} {""} {"} {"} ][ copy x ] ] ; ; Create delimited text according to specifications. ; Will call itself. ; to-delimited: function[ x [block!] mode-object [object!] ][result-string row-start secondary-mode process-value][ either mode-object/table [ ; Handle many rows. result-string: make string! 10000 either mode-object/column-count [ ; Fixed table secondary-mode: make mode-object [column-count: none] result-string: to-delimited reformat-as-variable-length-rows x mode-object/column-count secondary-mode ][ ; Row by row - each row is a block of column values secondary-mode: make mode-object [table: false] if 0 < length? x [ append result-string to-delimited x/1 secondary-mode ] for i 2 length? x 1 [ append result-string "^/" append result-string to-delimited (pick x i) secondary-mode ] ] result-string ][ process-value: func[a-value] [ append result-string to-quoted-unquoted-string to-string a-value mode-object/delimiter ] ; Handle only one row. result-string: make string! 10000 if 0 < length? x [ process-value (x/1) ] for i 2 length? x 1 [ append result-string mode-object/delimiter process-value (pick x i) ] result-string ] ] ; --------------------------------- ; Functions appropriate for users. ; --------------------------------- ; ; Load in from delimited text. ; load-delimited: function [ "Loads delimited text into a block." delimited-string [string!] delimiter [char!] /load-values "Loads the individual values." /fixed "Fixed columns per row instead of row-as-block - will fill with 'none if necessary." /counts "Result will be a block [row-count column-count rows]." /refinements refine-list [block!] "Preferences as list of refinement-name value." ][mode-object refine-spec result][ mode-object: make object! compose [ load-values: (:load-values) fixed: (:fixed) counts: (:counts) delimiter: (:delimiter) ] if refinements [ foreach [refmt refmtval] refine-list [ set in mode-object refmt refmtval ] ] result: delimited-text-parser/load-delimited/mode :delimited-string :mode-object ] ; ; Output delimited text. ; form-delimited: function [ "Creates delimited text." x [block!] delimiter [string!] /skip "Treat the block as records of fixed size." size [integer!] "Size of each record." ][mode-object][ mode-object: make object! compose [ delimiter: (:delimiter) table: none column-count: none ] either x [ either skip [ mode-object/table: true mode-object/column-count: size ][ ; Lets see if it is variable format (1 row = block) if block? x/1 [ mode-object/table: true ] ] to-delimited x mode-object ][ copy {} ] ] ; ; Change format of table. ; reformat-as-fixed-length-rows: function [ "Changes variable length encoding to fixed length." loaded-table [block!] "Assumes a block of rows - each row being a block of values." new-length [integer!] /in-place "Changes given block rather than returning a copy of data." ][result current-row table-cursor more-rows has-split][ either in-place [ ; Process in reverse in case we split rows. table-cursor: tail loaded-table more-rows: greater? length? loaded-table 0 while [more-rows] [ ; get a row table-cursor: back table-cursor current-row: table-cursor/1 ; Do we need to split the row? either greater? length? current-row new-length [ insert/only next table-cursor copy/part at current-row add new-length 1 subtract length? current-row new-length clear at current-row add new-length 1 ; Better process the newly created row table-cursor: next next table-cursor has-split: yes ][ has-split: no ] if not has-split [ ; Do we need to extend the row? if lesser? length? current-row new-length [ insert/dup tail current-row 'none subtract new-length length? current-row ] ; We're done with the row shell now let get rid of it. for i length? current-row 1 -1 [ insert next table-cursor pick current-row i ] remove table-cursor ] ; more to do? more-rows: greater? index? table-cursor 1 ] ][ reformat-as-fixed-length-rows/in-place result: copy/deep loaded-table new-length return result ] ] reformat-as-variable-length-rows: function [ "Changes fixed length encoding to variable length." loaded-table [block!] size [integer!] /in-place "Changes given block rather than returning a copy of data." /preserve-none "Preserve 'none instead of trimming it from each row." ][result row-start trim-nones current-row][ either in-place [ row-start: loaded-table while [not tail? row-start][ row-start: insert/only row-start current-row: copy/part row-start size if not preserve-none [ while [ all [greater? length? current-row 0 equal? last current-row 'none] ] [ remove back tail current-row ] ] row-start: remove/part row-start size ] ][ either preserve-none [ reformat-as-variable-length-rows/in-place/preserve-none result: copy/deep loaded-table size ][ reformat-as-variable-length-rows/in-place result: copy/deep loaded-table size ] return result ] ] ; ; Specifically tab delimited. ; load-tab-delimited: function[ "Loads tab delimited data into a block." delimited-text [string!] /load-values "Loads the individual values." /fixed "Fixed columns per row instead of row-as-block - will fill with 'none if necessary." /counts "Result will be a block [row-count column-count rows]." ][refine-list][ refine-list: compose [load-values (load-values) fixed (fixed) counts (counts)] load-delimited/refinements delimited-text #"^-" refine-list ] form-tab-delimited: func [ x [block!] /skip "Treat the block as records of fixed size." size [integer!] "Size of each record." ][ either skip [ form-delimited/skip x "^-" size ][ form-delimited x "^-" ] ] ; ; Specifically comma delimited. ; load-comma-delimited: function[ "Loads comma delimited data into a block." delimited-text [string!] /load-values "Loads the individual values." /fixed "Fixed columns per row instead of row-as-block - will fill with 'none if necessary." /counts "Result will be a block [row-count column-count rows]." ][refine-list][ refine-list: compose [load-values (load-values) fixed (fixed) counts (counts)] load-delimited/refinements delimited-text #"," refine-list ] form-comma-delimited: func [ x [block!] /skip "Treat the block as records of fixed size." size [integer!] "Size of each record." ][ either skip [ form-delimited/skip x "," size ][ form-delimited x "," ] ]