Compare commits

...

5 Commits

Author SHA1 Message Date
e6e9e593a4 see previous comment 2022-11-24 14:10:27 +01:00
cbcee6ac27 Please use EndPoint25Mar2022 as the stable branch.
Changes made to CREClass.m
Here I've added an alternative calculation for the Relevance Boundary.
CalcRBShannon is now part of the CREClass.m file.
The TestCREClass has been updated to allow testing this new feature.
NB: this is work in progress
2022-11-24 14:09:47 +01:00
e68267182d Major update on CCREClass and some bug fixes in the CRE class.
CCRE class is now "more logical" from a programing point of view.
Created a fucntion to create the bin-edges for the reference function.
"condensed" the calc function (removed redundancies)
2022-03-27 17:12:16 +02:00
ce95974c7b Big modification in the way the histogram is calculated.
Now first the edges are calculated based on the two paramters: EqualSizeBinFlag and UseHistProxy.
Once edges for bins are known (calculated) creating the cdf is the same for all cases.
2022-03-26 16:10:39 +01:00
3998755f40 Allow for constant log(dFC) across bins. i.e. all bins will contain (approximately) and equal number of datum points.
In other words binsize for the refence is adapted such that the number of points per bin is equal.
2022-03-25 16:06:59 +01:00
3 changed files with 218 additions and 82 deletions

View File

@ -11,8 +11,15 @@ classdef CCREClass<CREClass
end end
properties properties
DataRef %This parameter will contain the reference set. DataRef %This parameter will contain the reference set.
nBinRef nBinRef %number of "bins" to use for the reference distribution
CCRE %parameter containing the conditional Cumulative Residual Entropy EqualSizeBinFlagRef=false; %set to true to get equally spaced bins, default : false, put an equal number of points per bin.
EdgesRef; %edges for the bins of the reference function. %todo, turn into dep. variable!
end
properties(SetAccess=private) %make read only
CCRE %parameter containing the conditional Cumulative Residual Entropy
end
properties(Constant)
UseHistProxyFlagRef=true; % this value is there to remind the user that we will have to bin the reference image. The actual value is not used in the code
end end
methods %constructor methods %constructor
@ -38,6 +45,16 @@ classdef CCREClass<CREClass
end end
end end
methods %user callable functions methods %user callable functions
function CalcEdgesRef(obj)
% copy the info for "ref" to a CRE object
Stmp=CREClass('ref',obj.DataRef...
,'nbin',obj.nBinRef...
,'EqualSizeBinFlag',obj.EqualSizeBinFlagRef...
,'UseHistProxyFlag',obj.UseHistProxyFlagRef...
);
Stmp.CalcEdges;
obj.EdgesRef=Stmp.Edges;
end
%% CCRE A|B %% CCRE A|B
% %
% $$CCRE = \varepsilon (A) - E[\varepsilon(A|B)]$$ % $$CCRE = \varepsilon (A) - E[\varepsilon(A|B)]$$
@ -58,14 +75,18 @@ classdef CCREClass<CREClass
if isempty(obj.DataRef);return;end %if no refdata given, return; if isempty(obj.DataRef);return;end %if no refdata given, return;
%calculate histogram for RefData %calculate histogram for RefData
EVarEpsDataGivenRef=0; %clear previous calculations EVarEpsDataGivenRef=0; %clear previous calculations
[pDataRef,E]=histcounts(obj.DataRef(:),obj.nBinRef,'Normalization','probability'); %get the edge definition.
[JointHist,E1,E2]=histcounts2(obj.Data(:),obj.DataRef(:),[obj.nBin,obj.nBinRef],'Normalization','probability'); % a bit of a workaround, I don't plan to reprogram the function
if isempty(obj.EdgesRef)
obj.CalcEdgesRef;
end
[pDataRef,E]=histcounts(obj.DataRef(:),obj.EdgesRef,'Normalization','probability');
for k=1:numel(pDataRef) %loop over the bins in pDataRef NB: must be equal to the bins in JointHist for the Ref for k=1:numel(pDataRef) %loop over the bins in pDataRef NB: must be equal to the bins in JointHist for the Ref
if pDataRef(k)==0;continue;end% if no data at this line go to the next if pDataRef(k)==0;continue;end% if no data at this line go to the next
ind=E(k)<=obj.DataRef(:)&obj.DataRef(:)<E(k+1); ind=E(k)<=obj.DataRef(:)&obj.DataRef(:)<E(k+1); %get an index to all points in Ref data inside the current bin
CRE_DataGivenDataRef=CREClass; CRE_DataGivenDataRef=CREClass('data',obj.Data(ind)...
CRE_DataGivenDataRef.Data=obj.Data(ind); ,'nbin',obj.Edges...
CRE_DataGivenDataRef.nBin=E1; );
CRE_DataGivenDataRef.Calc; CRE_DataGivenDataRef.Calc;
EVarEpsDataGivenRef=EVarEpsDataGivenRef+pDataRef(k)*CRE_DataGivenDataRef.CRE; % I really have to check this line on merit. EVarEpsDataGivenRef=EVarEpsDataGivenRef+pDataRef(k)*CRE_DataGivenDataRef.CRE; % I really have to check this line on merit.
end end

View File

@ -1,17 +1,29 @@
classdef CREClass < handle classdef CREClass < handle
% 23 and 24 Nov 2022 I added a new calculation for the RB based on
% Shanon entropy given a threshold \lambda. I also think there is a
% "mistake" in calculating the RB the classical way.
properties properties
Data; % the data set to get the CRE off Data; % the data set to get the CRE off
nBin; %number of bins nBin; %number of bins
Monitor=true; %set to true to get intermediat results/figures Monitor=true; %set to true to get intermediat results/figures
UseHistProxy=true; % set this to get a proxy of the survival function using the hist function UseHistProxyFlag=true; % set this to get a proxy of the survival function using the hist function
EqualSizeBinFlag=true; % set to true to get equal sized bins, set to false to get equi-content bins,
% if set to false: each bin will contain (aproximately) an equal
% number of pixel values.
end end
properties %to be made read only properties (SetAccess = private) %read only
CRE CRE
RB RB
P_RB P_RB
end end
properties(Dependent)
Edges
end
properties (Hidden) properties (Hidden)
WaitBarHandle; WaitBarHandle;
HiddenEdges;
end end
properties (Constant,Hidden) properties (Constant,Hidden)
alpha=1; %needed for the scaled cummulative redual entropy as defined in zografoss. Will be implemented in the future. (I hope) alpha=1; %needed for the scaled cummulative redual entropy as defined in zografoss. Will be implemented in the future. (I hope)
@ -33,6 +45,10 @@ classdef CREClass < handle
obj.Data=varargin{k}; obj.Data=varargin{k};
case {'nbin','bin'} case {'nbin','bin'}
obj.nBin=varargin{k}; obj.nBin=varargin{k};
case {'equalsizebinflag'}
obj.EqualSizeBinFlag=varargin{k};
case {'usehistproxyflag'}
obj.UseHistProxyFlag=varargin{k};
otherwise otherwise
error('invalid variable name'); error('invalid variable name');
end end
@ -47,8 +63,17 @@ classdef CREClass < handle
end end
end end
end end
%% set functions %% set and get functions
methods methods
function Edges=get.Edges(obj)
Edges=obj.HiddenEdges;
end
function set.Edges(obj,val)
if ~isnumeric(val);error('provide a array with edge values');end
if numel(val)<3;error('too few edges provided. Need at least 3 values, i.e. 2 bins defined');end
obj.HiddenEdges=transpose(val(:));
obj.nBin=numel(val)-1;
end
function set.Data(obj,val) function set.Data(obj,val)
if isnumeric(val) if isnumeric(val)
obj.Data=val; obj.Data=val;
@ -66,41 +91,94 @@ classdef CREClass < handle
end end
%% core function(s) %% core function(s)
methods methods
function CalcEdges(obj)
function Calc(obj) if isempty(obj.Data)
warning('nothing to do');
return;
end
if obj.UseHistProxyFlag
if isempty(obj.nBin)
obj.nBin=floor(numel(obj.Data)./10);
end
if obj.EqualSizeBinFlag
[~,obj.Edges]=histcounts(obj.Data(:),obj.nBin);
else
prc=linspace(0,100,obj.nBin+1); % define the edges of the bins as a precentile
obj.Edges=prctile(obj.Data(:),prc); %find the edges of the bins
end
else
if obj.UseHistProxyFlag ; error('You cannot use EqualSizeBinFlag=true and UseHistProxyFlag=false as a combination');end
q=transpose(unique(sort(obj.Data(:))));
dq=diff(q);
obj.Edges=cat(2,q(1)-eps,q(1:end-1)+dq/2,q(end)+eps); %the eps is a "shortcut" to avoid value=edge cases. Note that eps will increase the size of the bin by a very small amount. Although this is a systemetic bias, its effect will be neglectable
obj.nBin=numel(obj.Edges)-1;
end
end
function CalcRBShannon(obj)
% 23Nov22 Here I will try to implement a (hopefully better)
% definition of the relevance boundry. The basic idea is
% assume a dataset X= {x_1 ... x_i} X
% set a threshold \lambda NB \lambda \in of the data X
%Shanon entropy for this \lambda will be:
% H(\lamda)=p(x<\lambda)log(p(x<lambda))+p(x>=\lambda)log(p(x>=\lambda))
% Now the expectation value for H will be :
%E(H)=\int p(\lambda)*H(\lambda) d\lambda
if isempty(obj.Data)
warning('nothing to do');
return;
end
if isempty(obj.Edges);obj.CalcEdges;end
[CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf');
dl=transpose(diff(bin)./sum(diff(bin))); % will capture the case with non-equidistant bins as well as equidistant bins
pdf_lambda=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','pdf');
CP=transpose(CP);
LogCP=log(CP);
LogCP(~isfinite(LogCP))=0; % see comment below
FC=1-CP;
FC(FC<0)=0; % due to rounding errors, small negative numbers can arrise. These are theoretically not possible.
LogFC=log(FC);
LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
H=-1*(CP.*LogCP+FC.*LogFC); %Shannon Entropy when a theshold (lambda) is set to the lower bin edge
w=(dl.*pdf_lambda);
w=w./sum(w); % for one reason or the other, the sum is not always equal to 1.
EH=w*H; %calculate expectation value for H
RB_ind=[find(H>=EH,1,'first') find(H>=EH,1,'last')];
obj.RB=obj.Edges(RB_ind)+dl(RB_ind)/2;
obj.P_RB=FC(RB_ind);
if obj.Monitor
lambda=obj.Edges(1:end-1);
scatter(w,H);xlabel('weight');ylabel('Shannon Entropy');snapnow
plot(lambda,H);xlabel('lambda');ylabel('Shannon Entropy');
line(lambda(RB_ind(:)),[EH;EH],'linestyle',':');snapnow
histogram(obj.Data(:));snapnow
disp('sum of weights')
disp(sum(dl.*pdf_lambda))
disp('EH')
disp(EH)
disp('boundry index');
disp(RB_ind);
disp('boundry lambda value');
disp(obj.RB);
disp('boundry p-values');
disp(FC(RB_ind));
disp('fraction inside interval');
disp(abs(diff(FC(RB_ind))));
end
end
function Calc(obj) %calculate the CRE as well as a on-tailed definition of the RB.
% RR note: 23Nov22: I am not sure if I still like this RB
% definition. I keep having a hard time defending it
% theoretically. The code is retained, for now, however for backwards
% compatibility.
if isempty(obj.Data) if isempty(obj.Data)
warning('nothing to do'); warning('nothing to do');
return; return;
end end
if obj.UseHistProxy if isempty(obj.Edges);obj.CalcEdges;end
if isempty(obj.nBin) [CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf');
obj.nBin=floor(numel(obj.Data)./10); FC=1-transpose(CP);
end FC(FC<0)=0;
%update part below to prefered matlab commands dl=diff(bin)./sum(diff(bin)); % will capture the case with non-equidistant bins as well as equidistant bins
% [N,bin]=hist(obj.Data(:),obj.nBin);
% N=transpose(N);
% P=N./sum(N);
% CP=cumsum(P);
[CP,bin]=histcounts(obj.Data(:),obj.nBin,'Normalization','cdf');
FC=1-transpose(CP);
FC(FC<0)=0;
dl=ones(size(FC))./numel(FC);
else
q=sort(unique(obj.Data(:)));
FC=zeros(size(q));
dl=zeros(size(q));
for k=1:numel(q)
FC(k)=sum(obj.Data(:)>=q(k));
if k==1
dl(k)=(q(k+1)-q(k))/2;
elseif k==numel(q)
dl(k)=(q(k)-q(k-1))/2;
else
dl(k)=(q(k+1)-q(k))/2+(q(k)-q(k-1))/2;
end
end
FC=FC./numel(obj.Data);
end
LogFC=log(FC); LogFC=log(FC);
LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
if any(isnan(FC));error('something went wrong');end %catch a posible error. if any(isnan(FC));error('something went wrong');end %catch a posible error.
@ -115,16 +193,16 @@ classdef CREClass < handle
dl(FC>0.5)=0; %set the weight for all in the histogram to the left side (i.e. <p50) to 0 dl(FC>0.5)=0; %set the weight for all in the histogram to the left side (i.e. <p50) to 0
CRE_Med_Inf=-transpose(dl)*(FC.*LogFC)./(transpose(dl)*FC); %CRE zografos CRE_Med_Inf=-transpose(dl)*(FC.*LogFC)./(transpose(dl)*FC); %CRE zografos
ind=find(exp(-CRE_Med_Inf)>=FC,1,'first'); ind=find(exp(-CRE_Med_Inf)>=FC,1,'first');
if obj.UseHistProxy if obj.UseHistProxyFlag
obj.P_RB=FC(ind); obj.P_RB=FC(ind);
obj.RB=bin(ind); if ind==numel(bin)%if RB is (beyond) the last bin,
warning(sprintf('RB is hitting the upper bin value\n Maybe increase number of bins')) %#ok<*SPWRN>
obj.RB=bin(ind);
end
obj.RB=(bin(ind)+bin(ind+1))/2; % set the RB to the center of the bin
else else
% SortData=sort(obj.Data); SortData=unique(sort(obj.Data(:)));
% obj.RB=SortData(ind); obj.RB=SortData(ind);
% I think the two lines above are a bug. I should not use
% "SortData but rather q itsself. Some numbers may be in
% the data multiple times
obj.RB=q(ind);
obj.P_RB=FC(ind); obj.P_RB=FC(ind);
end end
end end

View File

@ -2,35 +2,63 @@
% show some default behaviour % show some default behaviour
% and test the error handling. % and test the error handling.
%% set some constants %% set some constants
A=randn(100); % A=randn(1000,1);
% A=rand(1000,1);
A=cat(1,randn(1000,1),randn(200,1)+10);
%% create instance of CRE class %% create instance of CRE class
S=CREClass; % % % S1=CREClass;
%% check histogram vs "new" technique % % % %% Calculate by setting labda equal to each of the (unique) values in the data.
% NB I come to the conclusion that the new method is not always working. % % % S1.UseHistProxyFlag=false;
% Why? % % % S1.EqualSizeBinFlag=false;
S.Data=A;% set gaussian data % % % S1.Data=A;% set gaussian data
S.nBin=(numel(S.Data)); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos % % % S1.Calc; %get RB based on the CRE
% use histogram % % % S1.CalcRBShannon; %% get RB based on shannon entropy
S.UseHistProxy=true; % % % disp(S1)
S.Calc; % % % %% use histogram aproximation with unequal bin sizes in the histogram
S % % % S2=CREClass;
% Use my aproximation for the histogram % % % S2.UseHistProxyFlag=true;
% only do this if the number of data points is not too big. % % % S2.EqualSizeBinFlag=false;
S.UseHistProxy=false; % % % S2.Data=A;% set gaussian data
S.Calc; % % % S2.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
S % % % S2.Calc
% % % S2.CalcRBShannon;
% % % disp(S2)
% % % %% use histogram aproximation with equal bin sizes in the histogram
% % % S3=CREClass;
% % % S3.UseHistProxyFlag=true;
% % % S3.EqualSizeBinFlag=false;
% % % S3.Data=A;% set gaussian data
% % % S3.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
% % % S3.Calc
% % % S3.CalcRBShannon;
% % % disp(S3)
%% Check recalculation of edges
S4=CREClass;
S4.UseHistProxyFlag=true;
S4.EqualSizeBinFlag=true;
S4.Data=A;% set gaussian data
S4.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
S4.Calc
S4.CalcRBShannon;
disp(S4)
S4.nBin=round(numel(A)/10);
S4.CalcEdges ; % force a recalculation of the edges
S4.Calc;
S4.CalcRBShannon;
disp(S4)
%%
return;
%% Show effect of scaling or ofsett the data %% Show effect of scaling or ofsett the data
% Scale % Scale
S.Data=100*A; S.Data=100*A;
S.UseHistProxy=true; S.UseHistProxyFlag=true;
S.Calc; S.Calc;
S disp(S);
% Offset % Offset
S.Data=A+10; S.Data=A+10;
S.UseHistProxy=true; S.UseHistProxyFlag=true;
S.Calc; S.Calc;
S disp(S);
%% %%
%% Test CCRE class %% Test CCRE class
@ -39,21 +67,30 @@ figure(2);clf;
figure(3);clf;hold on; figure(3);clf;hold on;
A=randn(100); A=randn(100);
B=A; B=A;
B(:)=A(randperm(numel(A))) B(:)=A(randperm(numel(A)));
for k=-1:0.1:1 w=-1:0.1:1;
out(numel(w),1)=struct('w',[],'CRE',[],'CCRE',[],'R',[]);
for k=1:numel(w)
out(k).w=w(k);
CS=CCREClass; CS=CCREClass;
CS.Data=1*(1-abs(k))*A+k*B; CS.EqualSizeBinFlag=false;
CS.Data=1*(1-abs(w(k)))*A+w(k)*B;
CS.nBin=50; CS.nBin=50;
CS.DataRef=B; CS.DataRef=B;
CS.nBinRef=500; CS.nBinRef=500;
CS.Calc; CS.Calc;
figure(1); out(k).CRE=CS.CRE;
plot(k,CS.CCRE,'o') out(k).CCRE=CS.CCRE;
plot(k,CS.CRE,'x'); out(k).R=CS.CCRE./CS.CRE;
figure(2); figure(2);
title(sprintf('w: %d',w(k)));
scatter(CS.DataRef(:),CS.Data(:)); scatter(CS.DataRef(:),CS.Data(:));
figure(3); snapnow
plot(k,CS.CCRE./CS.CRE,'d');
end end
CS disp(CS);
snapnow figure(1);
clf;hold on;
plot([out(:).w],[out(:).CCRE],'o');
plot([out(:).w],[out(:).CRE],'x');
figure(3);clf
plot([out(:).w],[out(:).R],'d');