Compare commits

..

5 Commits

Author SHA1 Message Date
2274e88690 removed a bug in the "old" slow calculation. (option) UseHistProxy=false 2022-03-23 12:44:42 +01:00
Renken
6c31374b1a updated TestCREClass 2020-02-25 17:30:53 +01:00
Renken
8ac21ada83 Solved bug in usage histcount function used in the method: CCREClass.Calc. Now it calculates probability as it should.
Updated the test script
2020-02-21 16:16:54 +01:00
Renken
026a3f589f updated TestCREClass 2020-02-21 08:42:07 +01:00
Renken
11d91979df added CCREClass 2020-02-21 08:24:58 +01:00
3 changed files with 131 additions and 19 deletions

77
CCREClass.m Normal file
View File

@@ -0,0 +1,77 @@
classdef CCREClass<CREClass
%CCRECLASS calculate the Conditional Cummulative Residual Entropy
% This class uses the CREClass options
%
properties
%the data field inherited from the CREClass will contain the data
%to calculate the CCRE on.
%the nBin field inherited from the CREClass will contain the number
%of bins if UseHistProxy is set.
end
properties
DataRef %This parameter will contain the reference set.
nBinRef
CCRE %parameter containing the conditional Cumulative Residual Entropy
end
methods %constructor
function obj = CCREClass()
%CCRECLASS Construct an instance of this class
% Detailed explanation goes here
obj@CREClass;
end
end
methods %set and get functions
function set.DataRef(obj,val)
if isnumeric(val)
obj.DataRef=val;
else
error('please enter numeric array/matrix as reference');
end
end
function set.nBinRef(obj,val)
nBinFromCRE=obj.nBin; %park the current value of nBin
obj.nBin=val; %use the check system from CREClass
obj.nBinRef=obj.nBin;
obj.nBin=nBinFromCRE; %restore the current value of nBin
end
end
methods %user callable functions
%% CCRE A|B
%
% $$CCRE = \varepsilon (A) - E[\varepsilon(A|B)]$$
%
% $$\varepsilon(A) = -\sum_{\lambda}F_c^A(\lambda)logF_c(A^(\lambda)$$
% $$F_c^A(\lambda)=\int_{\lambda}^{\infty}p^A(l)dl$$
%
% $$E[\varepsilon(A|B)]= \sigma_{\kappa} p^B(\kappa)*\varepsilon(A|B)$$
%
% $$\varepsilon(A|B) = \sigma_{\lambda}F_c^{A|B}(\lambda)log
% F_c^{A|B}(\lambda)$$
%
% $$F_c^{A|B}(\lambda) = \int_{\lambda}^{\infty}p^{A,B}(l,k)/p^B(\kappa)dl$$
%
% see equation 7 in wan and Vemuri
function Calc(obj)
Calc@CREClass(obj); %calculate the CRE for the data.
if isempty(obj.DataRef);return;end %if no refdata given, return;
%calculate histogram for RefData
EVarEpsDataGivenRef=0; %clear previous calculations
[pDataRef,E]=histcounts(obj.DataRef(:),obj.nBinRef,'Normalization','probability');
[JointHist,E1,E2]=histcounts2(obj.Data(:),obj.DataRef(:),[obj.nBin,obj.nBinRef],'Normalization','probability');
for k=1:numel(pDataRef) %loop over the bins in pDataRef NB: must be equal to the bins in JointHist for the Ref
if pDataRef(k)==0;continue;end% if no data at this line go to the next
ind=E(k)<=obj.DataRef(:)&obj.DataRef(:)<E(k+1);
CRE_DataGivenDataRef=CREClass;
CRE_DataGivenDataRef.Data=obj.Data(ind);
CRE_DataGivenDataRef.nBin=E1;
CRE_DataGivenDataRef.Calc;
EVarEpsDataGivenRef=EVarEpsDataGivenRef+pDataRef(k)*CRE_DataGivenDataRef.CRE; % I really have to check this line on merit.
end
obj.CCRE=obj.CRE-EVarEpsDataGivenRef;
end
end
end

View File

@@ -1,7 +1,7 @@
classdef CREClass < handle
properties
Data; % the data set to get the CRE off
nBin; %number of bins for the reference
nBin; %number of bins
Monitor=true; %set to true to get intermediat results/figures
UseHistProxy=true; % set this to get a proxy of the survival function using the hist function
end
@@ -104,7 +104,7 @@ classdef CREClass < handle
LogFC=log(FC);
LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
if any(isnan(FC));error('something went wrong');end %catch a posible error.
if transpose(dl)*FC==0;
if transpose(dl)*FC==0
%if only one value in the most left bin in the distribution I may get a 0 divided by 0
%as the CRE of a delta function is 0, enforce this outcome
obj.CRE=0;
@@ -119,8 +119,12 @@ classdef CREClass < handle
obj.P_RB=FC(ind);
obj.RB=bin(ind);
else
SortData=sort(obj.Data);
obj.RB=SortData(ind);
% SortData=sort(obj.Data);
% obj.RB=SortData(ind);
% I think the two lines above are a bug. I should not use
% "SortData but rather q itsself. Some numbers may be in
% the data multiple times
obj.RB=q(ind);
obj.P_RB=FC(ind);
end
end

View File

@@ -1,28 +1,59 @@
%% Test CCRE and CRE class
% show some default behaviour
% and test the error handling.
%% set some constants
A=randn(100);
%% create instance of CRE class
S=CREClass;
S.Data=randn(100,1);
S.nBin=(1000);
%% use histogram
%% check histogram vs "new" technique
% NB I come to the conclusion that the new method is not always working.
% Why?
S.Data=A;% set gaussian data
S.nBin=(numel(S.Data)); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
% use histogram
S.UseHistProxy=true;
S.Calc;
S
%% Use my aproximation
% Use my aproximation for the histogram
% only do this if the number of data points is not too big.
S.UseHistProxy=false;
S.Calc;
S
std(S.Data)
%% multiply S
S.Data=100*S.Data;
%% Show effect of scaling or ofsett the data
% Scale
S.Data=100*A;
S.UseHistProxy=true;
S.Calc;
S
std(S.Data)
%% ones again the "new" way
S.UseHistProxy=false;
% Offset
S.Data=A+10;
S.UseHistProxy=true;
S.Calc;
S
%%
%% Test CCRE class
figure(1);clf;hold on
figure(2);clf;
figure(3);clf;hold on;
A=randn(100);
B=A;
B(:)=A(randperm(numel(A)))
for k=-1:0.1:1
CS=CCREClass;
CS.Data=randn(100,1);
CS.nBin=10;
CS.DataRef=randn(100,1);
CS.nBinRef=10;
CS.Data=1*(1-abs(k))*A+k*B;
CS.nBin=50;
CS.DataRef=B;
CS.nBinRef=500;
CS.Calc;
figure(1);
plot(k,CS.CCRE,'o')
plot(k,CS.CRE,'x');
figure(2);
scatter(CS.DataRef(:),CS.Data(:));
figure(3);
plot(k,CS.CCRE./CS.CRE,'d');
end
CS
snapnow