const
DT_NameWidth = 50; { maximum length of col/row names }
DT_MaxColOrRows = 2000000000 div (DT_NameWidth+1);
DT_NominalID_Width = 10; // maximum length of nominal/ordinal identifiers
DT_MaxNominalIDs = 150; // maximum number of nominal/ordinal identifiers
{$IFDEF PAIDVERS}
SDLVersionInfo = 'datatable_r1210_full';
IsLightEd = false;
{$ELSE}
SDLVersionInfo = 'datatable_r1210_lighted';
IsLightEd = true;
{$ENDIF}
Release = 1210;
csNAN = $01; { cell states: not a number }
csUndefined = $02; { undefined or empty cell }
csUser1 = $04; { user defined cell type }
csUser2 = $08; { user defined cell type }
csReadOnly = $10; { cell is readonly }
csImputed = $20; { cell contains imputed data }
csMarkedA = $40; { cell is marked, type 1 }
csMarkedB = $80; { cell is marked, type 2 }
csReset = $00; { cell is not marked at all }
csNone = $00; { cell is not marked at all }
csAll = $FF; { any kind of mark }
type
TMScaleType = (stUnknown, stNominal, stOrdinal, stInterval, stRatio);
TCompareMode = (cmLT, cmGT, cmLE, cmGE, cmEQ, cmNE, cmInRange, cmOutOfRange);
TDataSelMode = (selmRandSel, selmBlocked, selmInterleave);
TDataSetSplitMode = (splitColumns, splitRows, splitTstTrn);
TNominalIDStr = string[DT_NominalID_Width];
TVTypeSpec = record
MScaleType : TMScaleType; // measurement scale type of variable
IDs : array[0..DT_MaxNominalIDs] of TNominalIDStr;
end;
TDTxmlTags = (xmlDataTableId, xmlDataTableEndId, xmlComment, xmlSize, xmlColNames,
xmlRowNames, xmlColAttrib, xmlRowAttrib, xmlColId, xmlRowId,
xmlDataCells, xmlCellID, xmlVarType, xmlVarIdentifier, xmlInvalid);
const
IDS_COMPAREMODE: array[TCompareMode] of string =
('cmLT', 'cmGT', 'cmLE', 'cmGE', 'cmEQ', 'cmNE',
'cmInRange', 'cmOutOfRange');
MScaleTypeNames : array[TMScaleType] of string =
('Unknown', 'Nominal', 'Ordinal', 'Interval', 'Ratio');
type
TDTNameStr = string[DT_NameWidth]; { col/row header type }
ESDLDataTableError = class(ESDLError); { exception type to indicate errors }
TDataTableResizeEvent = procedure (Sender: TObject; OldColumns, OldRows,
NewColumns, NewRows: integer) of object;
{$IFDEF GE_LEV29}
[ComponentPlatformsAttribute(pidWin32 or pidWin64 or pidWin64x)]
{$ENDIF}
TDataTable = class(TComponent)
private
FOnChange : TNotifyEvent;
FOnChangeSetup : TNotifyEvent;
FOnChangeNames : TNotifyEvent;
FOnChangeCellSt : TNotifyEvent;
FOnChangeAttrib : TNotifyEvent;
FOnPercentDone : TOnPercentDoneEvent;
FOnResize : TDataTableResizeEvent;
FComment : string; { comment on data }
FRowAtt : array of integer; { class information on objects }
FColAtt : array of integer; { class information on features }
FVType : array of TVTypeSpec; { measurement scale type }
FCState : array of array of byte; { array of cell states }
FColName : array of TDTNameStr; { column names }
FRowName : array of TDTNameStr; { row names }
FNrOfCols : longint; { number of columns of data matrix }
FNrOfRows : longint; { number of columns of data matrix }
{$IFDEF PAIDVERS}
FVNState : TQuotedStrState; { state of var parser }
FVNName : string; { intermediary name for var. parsing }
{$ENDIF}
FSortIncludeHd : boolean; { flag to indicate sorting mode }
FOnSortExchange : TSortExchgEvent;
FOnExportText : TExportTextEvent;
FExtMatAssigned : boolean;
procedure ExchangeDuringSort (Sender: TObject; ExchgWhat: byte;
index1, index2, first, last: longint);
function GetRowAttrib (RowNr: longint): integer;
function GetCellState (ACol, ARow: longint): byte;
function GetColAttrib (ColNr: longint): integer;
function GetMScaleType (ColNr: longint): TMScaleType;
function GetColName (ColNr: longint): TDTNameStr;
function GetElem (ACol, ARow: longint): double;
function GetElemNominal (ACol, ARow: longint): TNominalIDStr;
function GetIsNumber (ACol, ARow: longint): boolean;
function GetIsEmpty (ACol, ARow: longint): boolean;
function GetIsImputed (ACol, ARow: longint): boolean;
function GetRowName (RowNr: longint): TDTNameStr;
function GetNominalID (ColIx, OrdVal: integer): TNominalIDStr;
procedure DataHasChanged (Sender: TObject);
function MakeValidColRange (var LowCol, HighCol: integer): integer;
function MakeValidRowRange (var LowRow, HighRow: integer): integer;
procedure PercDone (Sender: TObject; Percent: longint);
function ProcessXmlTag (xmlTag: TDTXmlTags; attr, cont: string;
default: string; ParentTag: integer): integer;
procedure SetRowAttrib (RowNr: longint; Attrib: integer);
procedure SetCellState (ACol, ARow: longint; const Value: byte);
procedure SetColAttrib (ColNr: longint; Attrib: integer);
procedure SetMScaleType (ColNr: longint; MScaleType: TMScaleType);
procedure SetColName (ColNr: longint; HLine: TDTNameStr);
procedure SetElem (ACol, ARow: longint; const Value: double);
procedure SetElemNominal (ACol, ARow: longint;
const Value: TNominalIDStr);
procedure SetFComment (cmt: string);
procedure SetIsEmpty (ACol, ARow: longint; const Value: boolean);
procedure SetIsImputed (ACol, ARow: longint; const Value: boolean);
procedure SetNrCols (NrCols: longint);
procedure SetNrRows (NrRows: longint);
procedure SetRowName (RowNr: longint; HLine: TDTNameStr);
procedure SetNominalID (ColIx, OrdVal: integer;
MScaleID: TNominalIDStr);
protected
procedure AssignTo (Dest: TPersistent); override;
public
NumericData : TMatrix; { data matrix }
constructor Create(AOwner: TComponent); override;
destructor Destroy; override;
function AddNominalID (Col: integer;
MScaleID: TNominalIDStr): integer;
procedure Assign (Source: TPersistent); override;
procedure AssignAnotherDataMatrix (NewDMat: TMatrix);
function CalcCovar (CovarMat: TMatrix; LoC, HiC, LoR, HiR: integer;
Mode: integer): boolean;
procedure ChangedData;
procedure ChangedSetup;
procedure ChangedNames;
procedure ChangedAttrib;
procedure ChangedCellStates;
procedure Clear;
procedure ClearNominalIDs (Col: integer);
function CountNominalIDs (Col: integer): integer;
function CRCofNamesAndAttributes: string;
function CRCofCellStates: string;
function CreateContingencyTable (Col1, Col2, LowRow, HighRow: longint;
var ContTab: TInt2DArray): integer;
function ExportAsASC (FName: string;
Precision: integer): string; overload;
function ExportAsASC (FName: string; Precision: integer;
CustData: TStr2DArray): string; overload;
function ExportAsASC (const OutFile: TextFile;
Precision: integer): string; overload;
function ExportAsASC (OutStream: TStream;
Precision: integer): string; overload;
function ImportASC (FName: string): integer; overload;
function ImportASC (InStream: TStream): integer; overload;
function ImportASC (ClipBd: TClipBoard): integer; overload;
function ImportASC (FName: string;
var CustData: TStr2DArray): integer; overload;
{$IFNDEF DOTNET}
function ExportAsCSV (FName: string; Precision: integer;
Delimiter: TCSVDelimiters; IncludeColHeaders,
IncludeRowHeaders: boolean): string; overload;
function ExportAsCSV (FName: string; Precision: integer;
Delimiter: TCSVDelimiters; IncludeColHeaders,
IncludeRowHeaders,
IncludeRowAttributes: boolean): string; overload;
function ExportAsCSV (FName: string; Precision: integer;
Delimiter: TCSVDelimiters; IncludeColHeaders,
IncludeRowHeaders, IncludeRowAttributes,
IncludeComment: boolean): string; overload;
function ImportFromCSV (FName: string; Delimiter: TCSVDelimiters;
IncludeColHeaders,
IncludeRowHeaders: boolean): integer; overload;
function ImportFromCSV (FName: string; Delimiter: TCSVDelimiters;
NLinesSkip, LastImportLine: integer; IncludeColHeaders,
IncludeRowHeaders: boolean): integer; overload;
{$ENDIF}
property Elem[ACol, ARow: longint]: double
read GetElem write SetElem; default;
property ElemNominal[ACol, ARow: longint]: TNominalIDStr
read GetElemNominal write SetElemNominal;
property CellState[ACol, ARow: longint]: byte
read GetCellState write SetCellState;
procedure Resize (NrCols, NrRows: longint);
property RowAttrib [ix: longint]: integer
read GetRowAttrib write SetRowAttrib;
function CheckDichotomousColumn (col: integer;
var v1, v2: double): boolean;
property ColAttrib [ix: longint]: integer
read GetColAttrib write SetColAttrib;
function CountMarkedCellsInColumn (Col: integer;
StateMask: byte): integer;
function CountMarkedCellsInRow (Row: integer;
StateMask: byte): integer;
function FillMarkedCells (Value: double; StateMask: byte): integer;
function IfAnyCellHasCellState (LowCol, LowRow, HighCol,
HighRow: integer; CellState: byte): boolean;
function IfColHasCellState (col: longint; CellState: byte): boolean;
function IfRowHasCellState (row: longint; CellState: byte): boolean;
procedure InsertRow (ix: longint);
procedure InsertColumn (ix: longint);
property IsNumber[ACol, ARow: longint]: boolean read GetIsNumber;
property IsEmpty[ACol, ARow: longint]: boolean
read GetIsEmpty write SetIsEmpty;
property IsImputed[ACol, ARow: longint]: boolean
read GetIsImputed write SetIsImputed;
procedure Clone (ExtTab: TDataTable);
property ColName[ix: longint]: TDTNameStr
read GetColName write SetColName;
function CopyCellStatesFrom (ExtTab: TDataTable): boolean;
function CopyCellsToVector (LowCol,LowRow,HighCol,HighRow: integer;
CellState: byte; DestVect: TVector): integer;
procedure CopyContentsFrom (ExtTab: TDataTable);
function CopyColumnFrom (ExtTab: TDataTable;
SrcCol, DestCol: integer): integer;
function CopyDataColumnToArray (Col: integer;
var Dest: TDoubleArray): integer; overload;
function CopyDataColumnToArray (var Dest: TDoubleArray; Col,
FirstRow, LastRow, DestElem: integer): integer; overload;
function CopyDataColumnToArray (var Dest: TIntArray; Col,
FirstRow, LastRow, DestElem: integer): integer; overload;
function CopyDataColumnToVector (Col: integer;
Dest: TVector): integer; overload;
function CopyDataColumnToVector (Dest: TVector; Col,
FirstRow, LastRow, DestElem: integer): integer; overload;
function CopyDataColumnToVector (Dest: TIntVector; Col,
FirstRow, LastRow, DestElem: integer): integer; overload;
function CopyDataColumnFromArray (Col: integer;
SrcArray: TDoubleArray): integer;
function CopyDataColumnFromVector (Col: integer;
SrcVec: TVector): integer;
function CopyDataRowFromArray (Row: integer;
SrcArray: TDoubleArray): integer;
function CopyDataRowFromVector (Row: integer;
SrcVec: TVector): integer;
function CopyDataRowToArray (Row: integer;
var Dest: TDoubleArray): integer; overload;
function CopyDataRowToArray (var Dest: TDoubleArray; Row,
FirstCol, LastCol, DestElem: integer): integer; overload;
function CopyDataRowToArray (var Dest: TIntArray; Row,
FirstCol, LastCol, DestElem: integer): integer; overload;
function CopyDataRowToVector (Row: integer;
Dest: TVector): integer; overload;
function CopyDataRowToVector (Dest: TVector; Row,
FirstCol, LastCol, DestElem: integer): integer; overload;
function CopyDataRowToVector (Dest: TIntVector; Row,
FirstCol, LastCol, DestElem: integer): integer; overload;
function CopyDataToArray (var Dest: TDouble2DArray;
SrcColLo, SrcRowLo, SrcColHi, SrcRowHi,
DestCol, DestRow: integer): integer; overload;
function CopyDataToArray (var Dest: TInt2DArray;
SrcColLo, SrcRowLo, SrcColHi, SrcRowHi,
DestCol, DestRow: integer): integer; overload;
function CopyDataToMatrix (Dest: TMatrix;
SrcColLo, SrcRowLo, SrcColHi, SrcRowHi,
DestCol, DestRow: integer): integer;
function CopyDataFromArray (Src: TDouble2DArray;
SrcColLo, SrcRowLo, SrcColHi, SrcRowHi,
Col, Row: integer): integer; overload;
function CopyDataFromArray (Src: TInt2DArray;
SrcColLo, SrcRowLo, SrcColHi, SrcRowHi,
Col, Row: integer): integer; overload;
function CopyDataFromArray (Src: TDoubleArray;
FirstElem, LastElem, Col, Row: integer;
AsColumn: boolean): integer; overload;
function CopyDataFromArray (Src: TIntArray;
FirstElem, LastElem, Col, Row: integer;
AsColumn: boolean): integer; overload;
function CopyDataFromMatrix (MatSrc: TMatrix;
SrcColLo, SrcRowLo, SrcColHi, SrcRowHi,
DstCol, DstRow: integer): integer;
function CopyDataFromVector (Src: TVector;
FirstElem, LastElem, Col, Row: integer;
AsColumn: boolean): integer; overload;
function CopyDataFromVector (Src: TIntVector;
FirstElem, LastElem, Col, Row: integer;
AsColumn: boolean): integer; overload;
procedure CopyFrom (ExtTab: TDataTable; SourceColLo,
SourceRowLo, SourceColHi, SourceRowHi,
DestCol, DestRow: integer);
function CountEmptyCells (LowCol, LowRow, HighCol,
HighRow: integer): integer;
function CountImputedCells (LowCol, LowRow, HighCol,
HighRow: integer): integer;
function CountNumCells (LowCol, LowRow, HighCol,
HighRow: integer): integer;
function CountValidCells (LowCol, LowRow, HighCol,
HighRow: integer): integer;
function DuplicateDataAsArray (var Dest: TDouble2DArray): integer;
procedure ExchangeColumns (c1, c2: integer);
procedure ExchangeMarkings;
procedure ExchangeRows (r1, r2: integer);
function FindHighestMLevel (FirstCol, LastCol: longint): TMScaleType;
function FindLowestMLevel (FirstCol, LastCol: longint): TMScaleType;
function GeometricMeanOfNumCells (LowCol, LowRow, HighCol,
HighRow: integer; var NumData: integer): double;
function HarmonicMeanOfNumCells (LowCol, LowRow, HighCol,
HighRow: integer; var NumData: integer): double;
procedure InvertCellStates (StateMask: byte);
function IsConstantColumn (ColIx: longint): boolean; overload;
function IsConstantColumn (ColIx: longint;
var ConstVal: double): boolean; overload;
function IsConstantRow (RowIx: longint): boolean; overload;
function IsConstantRow (RowIx: longint;
var ConstVal: double): boolean; overload;
function IsDichotomousColumn (ColIx: integer): boolean;
function ListOfCategorialVars: TIntArray;
function LoadFromXMLFile (FName: string; DataID: string): boolean;
function MaxColNameLength: integer;
function MaxRowNameLength: integer;
function MaxRowAttrib (FirstRow, LastRow: longint;
var Row: integer): integer;
function MinRowAttrib (FirstRow, LastRow: longint;
var Row: integer): integer;
function MaxColAttrib (FirstCol, LastCol: longint;
var Col: integer): integer;
function MinColAttrib (FirstCol, LastCol: longint;
var Col: integer): integer;
procedure MeanCenterNumCellColumns (Means: TVector);
procedure MeanCenterNumCellRows (Means: TVector);
function MeanVarOfNumCells (LowCol, LowRow, HighCol,
HighRow: integer; var Mean, Variance: double): integer;
function MeanVarOfMarkedCells (LowCol, LowRow, HighCol,
HighRow: integer; CellState: byte;
var Mean, Variance: double): integer;
function MinMaxOfNumCells (LowCol, LowRow, HighCol,
HighRow: integer; var Minimum, Maximum: double): integer;
function MinMaxOfMarkedCells (LowCol, LowRow, HighCol,
HighRow: integer; CellState: byte;
var Minimum, Maximum: double): integer;
function MinMaxOfValidCells (LowCol, LowRow, HighCol,
HighRow: integer; var Minimum, Maximum: double): integer;
procedure MirrorColumns;
procedure MirrorRows;
property MScaleType [Col: longint]: TMScaleType
read GetMScaleType write SetMScaleType;
property NominalID [Col, Ord: longint]: TNominalIDStr
read GetNominalID write SetNominalID;
function OrdinalOfNominalID (Col: longint; ID: TNominalIDStr;
IgnoreCase: boolean): integer;
function PercentileOfNumCells (prob: double; LowCol, LowRow, HighCol,
HighRow: integer; var NumData: integer): double;
function PercentileOfMarkedCells (prob: double; LowCol, LowRow,
HighCol, HighRow: integer; CellState: byte;
var NumData: integer): double;
function PrepareSplitCounts (SplitMode: TDatasetSplitMode;
SplitSize: integer; CreateAllPairs: boolean;
var DataCnts: TIntArray): integer;
function QuartilesOfNumCells (LowCol, LowRow, HighCol,
HighRow: integer; var Q1, Q2, Q3: double;
var NumData: integer): boolean;
function QuartilesOfMarkedCells (LowCol, LowRow, HighCol,
HighRow: integer; var Q1, Q2, Q3: double;
CellState: byte; var NumData: integer): boolean;
procedure Serialize (ColHeadsAsVar, IncludeColRowNumbers: boolean);
procedure StandardizeNumCellColumns (Means, StdDevs: TVector);
procedure StandardizeNumCellRows (Means, StdDevs: TVector);
function SumOfNumCells (LowCol,LowRow,HighCol,HighRow: integer;
var NumData: integer): double;
function SumOfMarkedCells (LowCol,LowRow,HighCol,HighRow: integer;
CellState: byte; var NumData: integer): double;
function SumOfSquaredNumCells (LowCol, LowRow,
HighCol, HighRow: integer;
var NumData: integer): double;
function SumOfSquaredMarkedCells (LowCol, LowRow,
HighCol, HighRow: integer;
CellState: byte; var NumData: integer): double;
function ReadFromOpenXMLFile (var InFile: TextFile;
DataID: string): boolean;
function ReadFromXMLStream (InStream: TStream;
DataID: string): boolean;
function ReadFromXMLString (InString: string; var StartAt: integer;
DataID: string): boolean;
procedure RemoveColumn (ix: longint);
function RemoveMarkedColumns (FirstCol, LastCol: integer;
CSMask: byte): integer;
function RemoveMarkedRows (FirstRow, LastRow: integer;
CSMask: byte): integer;
procedure RemoveRow (ix: longint);
function RemoveRows (FirstRow, LastRow: integer): integer;
function RemoveUnmarkedColumns (FirstCol, LastCol: integer;
CSMask: byte): integer;
function RemoveUnmarkedRows (FirstRow, LastRow: integer;
CSMask: byte): integer;
function RowAttribLevels: integer;
function ColAttribLevels: integer;
property RowName[ix: longint]: TDTNameStr
read GetRowName write SetRowName;
function ScanForCatValues (ACol, LowRow, HighRow: longint;
var OrdList: TIntArray): integer;
procedure SortCols (SortRowIx: integer; Ascending: boolean;
LowCol, LowRow, HighCol, HighRow: integer;
IncludeHeaders: boolean);
procedure SortRows (SortColIx: integer; Ascending: boolean;
LowCol, LowRow, HighCol, HighRow: integer;
IncludeHeaders: boolean);
procedure SortColAttributes (Ascending: boolean;
LowCol, LowRow, HighCol, HighRow: integer);
procedure SortColNames (Ascending: boolean;
LowCol, LowRow, HighCol, HighRow: integer); overload;
procedure SortColNames (Ascending, IgnoreCase: boolean;
LowCol, LowRow, HighCol, HighRow: integer); overload;
procedure SortRowAttributes (Ascending: boolean;
LowCol, LowRow, HighCol, HighRow: integer);
procedure SortRowNames (Ascending: boolean;
LowCol, LowRow, HighCol, HighRow: integer); overload;
procedure SortRowNames (Ascending, IgnoreCase: boolean;
LowCol, LowRow, HighCol, HighRow: integer); overload;
function SplitDataSet (SplitMode: TDataSetSplitMode;
SelMode: TDataSelMode; SplitSize: integer;
CreateAllPairs: boolean; FNameTemplate: string;
AllowOverWrite: boolean;
var FileNames: TStringList): integer;
procedure SetAllCellStates (CellState: byte);
procedure SetCellStates (LowCol, LowRow, HighCol, HighRow: integer;
CellState: byte; LogicOp: TLOgicOp);
procedure SetColAttributes (Attrib: integer); overload;
procedure SetColAttributes (Attrib: integer;
Mask: longword); overload;
procedure SetRowAttributes (Attrib: integer); overload;
procedure SetRowAttributes (Attrib: integer;
Mask: longword); overload;
function SliceCol (Col, FirstRow, LastRow, Step: integer;
var DstVec: TDoubleArray): integer;
function SliceRow (Row, FirstCol, LastCol, Step: integer;
var DstVec: TDoubleArray): integer;
function FindCells (Limit, Limit2: double; CompareOp: TCompareMode;
LowCol, LowRow, HighCol, HighRow: integer;
var CellList: TInt2DArray): integer;
function FindColIndex (ColID: TDTNameStr): integer;
function FindRowIndex (RowID: TDTNameStr): integer;
function ReadFromASC (FName: string; PosX, PosY: longint;
ReplaceIDs: boolean): integer; overload;
function ReadFromASC (InStream: TStream; PosX, PosY: longint;
ReplaceIDs: boolean): integer; overload;
function ReadFromASC (ClipBd: TClipBoard; PosX, PosY: longint;
ReplaceIDs: boolean): integer; overload;
procedure UnmarkAllCells;
procedure SaveAsXMLFile (FName: string; Precision: integer;
DataID: string);
procedure WriteToOpenXMLFile (const OutFile: TextFile;
CreateHeader: boolean; Precision: integer;
DataID: string);
procedure WriteToXMLStream (OutStream: TStream;
CreateHeader: boolean; Precision: integer;
DataID: string);
published
property Comment: string
read FComment write SetFComment;
property NrOfColumns: longint
read FNrOfCols write SetNrCols;
property NrOfRows: longint
read FNrOfRows write SetNrRows;
property OnChange: TNotifyEvent
read FOnChange write FOnChange;
property OnChangeSetup: TNotifyEvent
read FOnChangeSetup write FOnChangeSetup;
property OnChangeNames: TNotifyEvent
read FOnChangeNames write FOnChangeNames;
property OnChangeAttrib: TNotifyEvent
read FOnChangeAttrib write FOnChangeAttrib;
property OnChangeCellState: TNotifyEvent
read FOnChangeCellSt write FOnChangeCellSt;
property OnPercentDone: TOnPercentDoneEvent
read FOnPercentDone write FOnPercentDone;
property OnResize: TDataTableResizeEvent
read FOnResize write FOnResize;
property OnSortExchange: TSortExchgEvent
read FOnSortExchange write FOnSortExchange;
property OnExportText: TExportTextEvent
read FOnExportText write FOnExportText;
end;
|