Compare commits

..

No commits in common. "master" and "EndPoint25Mar2022" have entirely different histories.

3 changed files with 83 additions and 219 deletions

View File

@ -11,16 +11,9 @@ classdef CCREClass<CREClass
end end
properties properties
DataRef %This parameter will contain the reference set. DataRef %This parameter will contain the reference set.
nBinRef %number of "bins" to use for the reference distribution nBinRef
EqualSizeBinFlagRef=false; %set to true to get equally spaced bins, default : false, put an equal number of points per bin.
EdgesRef; %edges for the bins of the reference function. %todo, turn into dep. variable!
end
properties(SetAccess=private) %make read only
CCRE %parameter containing the conditional Cumulative Residual Entropy CCRE %parameter containing the conditional Cumulative Residual Entropy
end end
properties(Constant)
UseHistProxyFlagRef=true; % this value is there to remind the user that we will have to bin the reference image. The actual value is not used in the code
end
methods %constructor methods %constructor
function obj = CCREClass() function obj = CCREClass()
@ -45,16 +38,6 @@ classdef CCREClass<CREClass
end end
end end
methods %user callable functions methods %user callable functions
function CalcEdgesRef(obj)
% copy the info for "ref" to a CRE object
Stmp=CREClass('ref',obj.DataRef...
,'nbin',obj.nBinRef...
,'EqualSizeBinFlag',obj.EqualSizeBinFlagRef...
,'UseHistProxyFlag',obj.UseHistProxyFlagRef...
);
Stmp.CalcEdges;
obj.EdgesRef=Stmp.Edges;
end
%% CCRE A|B %% CCRE A|B
% %
% $$CCRE = \varepsilon (A) - E[\varepsilon(A|B)]$$ % $$CCRE = \varepsilon (A) - E[\varepsilon(A|B)]$$
@ -75,18 +58,14 @@ classdef CCREClass<CREClass
if isempty(obj.DataRef);return;end %if no refdata given, return; if isempty(obj.DataRef);return;end %if no refdata given, return;
%calculate histogram for RefData %calculate histogram for RefData
EVarEpsDataGivenRef=0; %clear previous calculations EVarEpsDataGivenRef=0; %clear previous calculations
%get the edge definition. [pDataRef,E]=histcounts(obj.DataRef(:),obj.nBinRef,'Normalization','probability');
% a bit of a workaround, I don't plan to reprogram the function [JointHist,E1,E2]=histcounts2(obj.Data(:),obj.DataRef(:),[obj.nBin,obj.nBinRef],'Normalization','probability');
if isempty(obj.EdgesRef)
obj.CalcEdgesRef;
end
[pDataRef,E]=histcounts(obj.DataRef(:),obj.EdgesRef,'Normalization','probability');
for k=1:numel(pDataRef) %loop over the bins in pDataRef NB: must be equal to the bins in JointHist for the Ref for k=1:numel(pDataRef) %loop over the bins in pDataRef NB: must be equal to the bins in JointHist for the Ref
if pDataRef(k)==0;continue;end% if no data at this line go to the next if pDataRef(k)==0;continue;end% if no data at this line go to the next
ind=E(k)<=obj.DataRef(:)&obj.DataRef(:)<E(k+1); %get an index to all points in Ref data inside the current bin ind=E(k)<=obj.DataRef(:)&obj.DataRef(:)<E(k+1);
CRE_DataGivenDataRef=CREClass('data',obj.Data(ind)... CRE_DataGivenDataRef=CREClass;
,'nbin',obj.Edges... CRE_DataGivenDataRef.Data=obj.Data(ind);
); CRE_DataGivenDataRef.nBin=E1;
CRE_DataGivenDataRef.Calc; CRE_DataGivenDataRef.Calc;
EVarEpsDataGivenRef=EVarEpsDataGivenRef+pDataRef(k)*CRE_DataGivenDataRef.CRE; % I really have to check this line on merit. EVarEpsDataGivenRef=EVarEpsDataGivenRef+pDataRef(k)*CRE_DataGivenDataRef.CRE; % I really have to check this line on merit.
end end

View File

@ -1,29 +1,17 @@
classdef CREClass < handle classdef CREClass < handle
% 23 and 24 Nov 2022 I added a new calculation for the RB based on
% Shanon entropy given a threshold \lambda. I also think there is a
% "mistake" in calculating the RB the classical way.
properties properties
Data; % the data set to get the CRE off Data; % the data set to get the CRE off
nBin; %number of bins nBin; %number of bins
Monitor=true; %set to true to get intermediat results/figures Monitor=true; %set to true to get intermediat results/figures
UseHistProxyFlag=true; % set this to get a proxy of the survival function using the hist function UseHistProxy=true; % set this to get a proxy of the survival function using the hist function
EqualSizeBinFlag=true; % set to true to get equal sized bins, set to false to get equi-content bins,
% if set to false: each bin will contain (aproximately) an equal
% number of pixel values.
end end
properties (SetAccess = private) %read only properties %to be made read only
CRE CRE
RB RB
P_RB P_RB
end end
properties(Dependent)
Edges
end
properties (Hidden) properties (Hidden)
WaitBarHandle; WaitBarHandle;
HiddenEdges;
end end
properties (Constant,Hidden) properties (Constant,Hidden)
alpha=1; %needed for the scaled cummulative redual entropy as defined in zografoss. Will be implemented in the future. (I hope) alpha=1; %needed for the scaled cummulative redual entropy as defined in zografoss. Will be implemented in the future. (I hope)
@ -45,10 +33,6 @@ classdef CREClass < handle
obj.Data=varargin{k}; obj.Data=varargin{k};
case {'nbin','bin'} case {'nbin','bin'}
obj.nBin=varargin{k}; obj.nBin=varargin{k};
case {'equalsizebinflag'}
obj.EqualSizeBinFlag=varargin{k};
case {'usehistproxyflag'}
obj.UseHistProxyFlag=varargin{k};
otherwise otherwise
error('invalid variable name'); error('invalid variable name');
end end
@ -63,17 +47,8 @@ classdef CREClass < handle
end end
end end
end end
%% set and get functions %% set functions
methods methods
function Edges=get.Edges(obj)
Edges=obj.HiddenEdges;
end
function set.Edges(obj,val)
if ~isnumeric(val);error('provide a array with edge values');end
if numel(val)<3;error('too few edges provided. Need at least 3 values, i.e. 2 bins defined');end
obj.HiddenEdges=transpose(val(:));
obj.nBin=numel(val)-1;
end
function set.Data(obj,val) function set.Data(obj,val)
if isnumeric(val) if isnumeric(val)
obj.Data=val; obj.Data=val;
@ -91,94 +66,41 @@ classdef CREClass < handle
end end
%% core function(s) %% core function(s)
methods methods
function CalcEdges(obj)
function Calc(obj)
if isempty(obj.Data) if isempty(obj.Data)
warning('nothing to do'); warning('nothing to do');
return; return;
end end
if obj.UseHistProxyFlag if obj.UseHistProxy
if isempty(obj.nBin) if isempty(obj.nBin)
obj.nBin=floor(numel(obj.Data)./10); obj.nBin=floor(numel(obj.Data)./10);
end end
if obj.EqualSizeBinFlag %update part below to prefered matlab commands
[~,obj.Edges]=histcounts(obj.Data(:),obj.nBin); % [N,bin]=hist(obj.Data(:),obj.nBin);
else % N=transpose(N);
prc=linspace(0,100,obj.nBin+1); % define the edges of the bins as a precentile % P=N./sum(N);
obj.Edges=prctile(obj.Data(:),prc); %find the edges of the bins % CP=cumsum(P);
end [CP,bin]=histcounts(obj.Data(:),obj.nBin,'Normalization','cdf');
else
if obj.UseHistProxyFlag ; error('You cannot use EqualSizeBinFlag=true and UseHistProxyFlag=false as a combination');end
q=transpose(unique(sort(obj.Data(:))));
dq=diff(q);
obj.Edges=cat(2,q(1)-eps,q(1:end-1)+dq/2,q(end)+eps); %the eps is a "shortcut" to avoid value=edge cases. Note that eps will increase the size of the bin by a very small amount. Although this is a systemetic bias, its effect will be neglectable
obj.nBin=numel(obj.Edges)-1;
end
end
function CalcRBShannon(obj)
% 23Nov22 Here I will try to implement a (hopefully better)
% definition of the relevance boundry. The basic idea is
% assume a dataset X= {x_1 ... x_i} X
% set a threshold \lambda NB \lambda \in of the data X
%Shanon entropy for this \lambda will be:
% H(\lamda)=p(x<\lambda)log(p(x<lambda))+p(x>=\lambda)log(p(x>=\lambda))
% Now the expectation value for H will be :
%E(H)=\int p(\lambda)*H(\lambda) d\lambda
if isempty(obj.Data)
warning('nothing to do');
return;
end
if isempty(obj.Edges);obj.CalcEdges;end
[CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf');
dl=transpose(diff(bin)./sum(diff(bin))); % will capture the case with non-equidistant bins as well as equidistant bins
pdf_lambda=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','pdf');
CP=transpose(CP);
LogCP=log(CP);
LogCP(~isfinite(LogCP))=0; % see comment below
FC=1-CP;
FC(FC<0)=0; % due to rounding errors, small negative numbers can arrise. These are theoretically not possible.
LogFC=log(FC);
LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
H=-1*(CP.*LogCP+FC.*LogFC); %Shannon Entropy when a theshold (lambda) is set to the lower bin edge
w=(dl.*pdf_lambda);
w=w./sum(w); % for one reason or the other, the sum is not always equal to 1.
EH=w*H; %calculate expectation value for H
RB_ind=[find(H>=EH,1,'first') find(H>=EH,1,'last')];
obj.RB=obj.Edges(RB_ind)+dl(RB_ind)/2;
obj.P_RB=FC(RB_ind);
if obj.Monitor
lambda=obj.Edges(1:end-1);
scatter(w,H);xlabel('weight');ylabel('Shannon Entropy');snapnow
plot(lambda,H);xlabel('lambda');ylabel('Shannon Entropy');
line(lambda(RB_ind(:)),[EH;EH],'linestyle',':');snapnow
histogram(obj.Data(:));snapnow
disp('sum of weights')
disp(sum(dl.*pdf_lambda))
disp('EH')
disp(EH)
disp('boundry index');
disp(RB_ind);
disp('boundry lambda value');
disp(obj.RB);
disp('boundry p-values');
disp(FC(RB_ind));
disp('fraction inside interval');
disp(abs(diff(FC(RB_ind))));
end
end
function Calc(obj) %calculate the CRE as well as a on-tailed definition of the RB.
% RR note: 23Nov22: I am not sure if I still like this RB
% definition. I keep having a hard time defending it
% theoretically. The code is retained, for now, however for backwards
% compatibility.
if isempty(obj.Data)
warning('nothing to do');
return;
end
if isempty(obj.Edges);obj.CalcEdges;end
[CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf');
FC=1-transpose(CP); FC=1-transpose(CP);
FC(FC<0)=0; FC(FC<0)=0;
dl=diff(bin)./sum(diff(bin)); % will capture the case with non-equidistant bins as well as equidistant bins dl=ones(size(FC))./numel(FC);
else
q=sort(unique(obj.Data(:)));
FC=zeros(size(q));
dl=zeros(size(q));
for k=1:numel(q)
FC(k)=sum(obj.Data(:)>=q(k));
if k==1
dl(k)=(q(k+1)-q(k))/2;
elseif k==numel(q)
dl(k)=(q(k)-q(k-1))/2;
else
dl(k)=(q(k+1)-q(k))/2+(q(k)-q(k-1))/2;
end
end
FC=FC./numel(obj.Data);
end
LogFC=log(FC); LogFC=log(FC);
LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
if any(isnan(FC));error('something went wrong');end %catch a posible error. if any(isnan(FC));error('something went wrong');end %catch a posible error.
@ -193,16 +115,16 @@ classdef CREClass < handle
dl(FC>0.5)=0; %set the weight for all in the histogram to the left side (i.e. <p50) to 0 dl(FC>0.5)=0; %set the weight for all in the histogram to the left side (i.e. <p50) to 0
CRE_Med_Inf=-transpose(dl)*(FC.*LogFC)./(transpose(dl)*FC); %CRE zografos CRE_Med_Inf=-transpose(dl)*(FC.*LogFC)./(transpose(dl)*FC); %CRE zografos
ind=find(exp(-CRE_Med_Inf)>=FC,1,'first'); ind=find(exp(-CRE_Med_Inf)>=FC,1,'first');
if obj.UseHistProxyFlag if obj.UseHistProxy
obj.P_RB=FC(ind); obj.P_RB=FC(ind);
if ind==numel(bin)%if RB is (beyond) the last bin,
warning(sprintf('RB is hitting the upper bin value\n Maybe increase number of bins')) %#ok<*SPWRN>
obj.RB=bin(ind); obj.RB=bin(ind);
end
obj.RB=(bin(ind)+bin(ind+1))/2; % set the RB to the center of the bin
else else
SortData=unique(sort(obj.Data(:))); % SortData=sort(obj.Data);
obj.RB=SortData(ind); % obj.RB=SortData(ind);
% I think the two lines above are a bug. I should not use
% "SortData but rather q itsself. Some numbers may be in
% the data multiple times
obj.RB=q(ind);
obj.P_RB=FC(ind); obj.P_RB=FC(ind);
end end
end end

View File

@ -2,63 +2,35 @@
% show some default behaviour % show some default behaviour
% and test the error handling. % and test the error handling.
%% set some constants %% set some constants
% A=randn(1000,1); A=randn(100);
% A=rand(1000,1);
A=cat(1,randn(1000,1),randn(200,1)+10);
%% create instance of CRE class %% create instance of CRE class
% % % S1=CREClass; S=CREClass;
% % % %% Calculate by setting labda equal to each of the (unique) values in the data. %% check histogram vs "new" technique
% % % S1.UseHistProxyFlag=false; % NB I come to the conclusion that the new method is not always working.
% % % S1.EqualSizeBinFlag=false; % Why?
% % % S1.Data=A;% set gaussian data S.Data=A;% set gaussian data
% % % S1.Calc; %get RB based on the CRE S.nBin=(numel(S.Data)); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
% % % S1.CalcRBShannon; %% get RB based on shannon entropy % use histogram
% % % disp(S1) S.UseHistProxy=true;
% % % %% use histogram aproximation with unequal bin sizes in the histogram S.Calc;
% % % S2=CREClass; S
% % % S2.UseHistProxyFlag=true; % Use my aproximation for the histogram
% % % S2.EqualSizeBinFlag=false; % only do this if the number of data points is not too big.
% % % S2.Data=A;% set gaussian data S.UseHistProxy=false;
% % % S2.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos S.Calc;
% % % S2.Calc S
% % % S2.CalcRBShannon;
% % % disp(S2)
% % % %% use histogram aproximation with equal bin sizes in the histogram
% % % S3=CREClass;
% % % S3.UseHistProxyFlag=true;
% % % S3.EqualSizeBinFlag=false;
% % % S3.Data=A;% set gaussian data
% % % S3.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
% % % S3.Calc
% % % S3.CalcRBShannon;
% % % disp(S3)
%% Check recalculation of edges
S4=CREClass;
S4.UseHistProxyFlag=true;
S4.EqualSizeBinFlag=true;
S4.Data=A;% set gaussian data
S4.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
S4.Calc
S4.CalcRBShannon;
disp(S4)
S4.nBin=round(numel(A)/10);
S4.CalcEdges ; % force a recalculation of the edges
S4.Calc;
S4.CalcRBShannon;
disp(S4)
%%
return;
%% Show effect of scaling or ofsett the data %% Show effect of scaling or ofsett the data
% Scale % Scale
S.Data=100*A; S.Data=100*A;
S.UseHistProxyFlag=true; S.UseHistProxy=true;
S.Calc; S.Calc;
disp(S); S
% Offset % Offset
S.Data=A+10; S.Data=A+10;
S.UseHistProxyFlag=true; S.UseHistProxy=true;
S.Calc; S.Calc;
disp(S); S
%% %%
%% Test CCRE class %% Test CCRE class
@ -67,30 +39,21 @@ figure(2);clf;
figure(3);clf;hold on; figure(3);clf;hold on;
A=randn(100); A=randn(100);
B=A; B=A;
B(:)=A(randperm(numel(A))); B(:)=A(randperm(numel(A)))
w=-1:0.1:1; for k=-1:0.1:1
out(numel(w),1)=struct('w',[],'CRE',[],'CCRE',[],'R',[]);
for k=1:numel(w)
out(k).w=w(k);
CS=CCREClass; CS=CCREClass;
CS.EqualSizeBinFlag=false; CS.Data=1*(1-abs(k))*A+k*B;
CS.Data=1*(1-abs(w(k)))*A+w(k)*B;
CS.nBin=50; CS.nBin=50;
CS.DataRef=B; CS.DataRef=B;
CS.nBinRef=500; CS.nBinRef=500;
CS.Calc; CS.Calc;
out(k).CRE=CS.CRE;
out(k).CCRE=CS.CCRE;
out(k).R=CS.CCRE./CS.CRE;
figure(2);
title(sprintf('w: %d',w(k)));
scatter(CS.DataRef(:),CS.Data(:));
snapnow
end
disp(CS);
figure(1); figure(1);
clf;hold on; plot(k,CS.CCRE,'o')
plot([out(:).w],[out(:).CCRE],'o'); plot(k,CS.CRE,'x');
plot([out(:).w],[out(:).CRE],'x'); figure(2);
figure(3);clf scatter(CS.DataRef(:),CS.Data(:));
plot([out(:).w],[out(:).R],'d'); figure(3);
plot(k,CS.CCRE./CS.CRE,'d');
end
CS
snapnow