Compare commits
12 Commits
fb282e53e1
...
master
Author | SHA1 | Date | |
---|---|---|---|
e6e9e593a4 | |||
cbcee6ac27 | |||
e68267182d | |||
ce95974c7b | |||
3998755f40 | |||
2274e88690 | |||
|
6c31374b1a | ||
|
8ac21ada83 | ||
|
026a3f589f | ||
|
11d91979df | ||
|
baac9e2032 | ||
|
2c7a9f90f0 |
98
CCREClass.m
Normal file
98
CCREClass.m
Normal file
@@ -0,0 +1,98 @@
|
||||
classdef CCREClass<CREClass
|
||||
%CCRECLASS calculate the Conditional Cummulative Residual Entropy
|
||||
% This class uses the CREClass options
|
||||
%
|
||||
|
||||
properties
|
||||
%the data field inherited from the CREClass will contain the data
|
||||
%to calculate the CCRE on.
|
||||
%the nBin field inherited from the CREClass will contain the number
|
||||
%of bins if UseHistProxy is set.
|
||||
end
|
||||
properties
|
||||
DataRef %This parameter will contain the reference set.
|
||||
nBinRef %number of "bins" to use for the reference distribution
|
||||
EqualSizeBinFlagRef=false; %set to true to get equally spaced bins, default : false, put an equal number of points per bin.
|
||||
EdgesRef; %edges for the bins of the reference function. %todo, turn into dep. variable!
|
||||
end
|
||||
properties(SetAccess=private) %make read only
|
||||
CCRE %parameter containing the conditional Cumulative Residual Entropy
|
||||
end
|
||||
properties(Constant)
|
||||
UseHistProxyFlagRef=true; % this value is there to remind the user that we will have to bin the reference image. The actual value is not used in the code
|
||||
end
|
||||
|
||||
methods %constructor
|
||||
function obj = CCREClass()
|
||||
%CCRECLASS Construct an instance of this class
|
||||
% Detailed explanation goes here
|
||||
obj@CREClass;
|
||||
end
|
||||
end
|
||||
methods %set and get functions
|
||||
function set.DataRef(obj,val)
|
||||
if isnumeric(val)
|
||||
obj.DataRef=val;
|
||||
else
|
||||
error('please enter numeric array/matrix as reference');
|
||||
end
|
||||
end
|
||||
function set.nBinRef(obj,val)
|
||||
nBinFromCRE=obj.nBin; %park the current value of nBin
|
||||
obj.nBin=val; %use the check system from CREClass
|
||||
obj.nBinRef=obj.nBin;
|
||||
obj.nBin=nBinFromCRE; %restore the current value of nBin
|
||||
end
|
||||
end
|
||||
methods %user callable functions
|
||||
function CalcEdgesRef(obj)
|
||||
% copy the info for "ref" to a CRE object
|
||||
Stmp=CREClass('ref',obj.DataRef...
|
||||
,'nbin',obj.nBinRef...
|
||||
,'EqualSizeBinFlag',obj.EqualSizeBinFlagRef...
|
||||
,'UseHistProxyFlag',obj.UseHistProxyFlagRef...
|
||||
);
|
||||
Stmp.CalcEdges;
|
||||
obj.EdgesRef=Stmp.Edges;
|
||||
end
|
||||
%% CCRE A|B
|
||||
%
|
||||
% $$CCRE = \varepsilon (A) - E[\varepsilon(A|B)]$$
|
||||
%
|
||||
% $$\varepsilon(A) = -\sum_{\lambda}F_c^A(\lambda)logF_c(A^(\lambda)$$
|
||||
% $$F_c^A(\lambda)=\int_{\lambda}^{\infty}p^A(l)dl$$
|
||||
%
|
||||
% $$E[\varepsilon(A|B)]= \sigma_{\kappa} p^B(\kappa)*\varepsilon(A|B)$$
|
||||
%
|
||||
% $$\varepsilon(A|B) = \sigma_{\lambda}F_c^{A|B}(\lambda)log
|
||||
% F_c^{A|B}(\lambda)$$
|
||||
%
|
||||
% $$F_c^{A|B}(\lambda) = \int_{\lambda}^{\infty}p^{A,B}(l,k)/p^B(\kappa)dl$$
|
||||
%
|
||||
% see equation 7 in wan and Vemuri
|
||||
function Calc(obj)
|
||||
Calc@CREClass(obj); %calculate the CRE for the data.
|
||||
if isempty(obj.DataRef);return;end %if no refdata given, return;
|
||||
%calculate histogram for RefData
|
||||
EVarEpsDataGivenRef=0; %clear previous calculations
|
||||
%get the edge definition.
|
||||
% a bit of a workaround, I don't plan to reprogram the function
|
||||
if isempty(obj.EdgesRef)
|
||||
obj.CalcEdgesRef;
|
||||
end
|
||||
[pDataRef,E]=histcounts(obj.DataRef(:),obj.EdgesRef,'Normalization','probability');
|
||||
for k=1:numel(pDataRef) %loop over the bins in pDataRef NB: must be equal to the bins in JointHist for the Ref
|
||||
if pDataRef(k)==0;continue;end% if no data at this line go to the next
|
||||
ind=E(k)<=obj.DataRef(:)&obj.DataRef(:)<E(k+1); %get an index to all points in Ref data inside the current bin
|
||||
CRE_DataGivenDataRef=CREClass('data',obj.Data(ind)...
|
||||
,'nbin',obj.Edges...
|
||||
);
|
||||
CRE_DataGivenDataRef.Calc;
|
||||
EVarEpsDataGivenRef=EVarEpsDataGivenRef+pDataRef(k)*CRE_DataGivenDataRef.CRE; % I really have to check this line on merit.
|
||||
end
|
||||
obj.CCRE=obj.CRE-EVarEpsDataGivenRef;
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
154
CREClass.m
154
CREClass.m
@@ -1,17 +1,29 @@
|
||||
classdef CREClass < handle
|
||||
% 23 and 24 Nov 2022 I added a new calculation for the RB based on
|
||||
% Shanon entropy given a threshold \lambda. I also think there is a
|
||||
% "mistake" in calculating the RB the classical way.
|
||||
properties
|
||||
Data; % the data set to get the CRE off
|
||||
nBin; %number of bins for the reference
|
||||
nBin; %number of bins
|
||||
Monitor=true; %set to true to get intermediat results/figures
|
||||
UseHistProxy=true; % set this to get a proxy of the survival function using the hist function
|
||||
UseHistProxyFlag=true; % set this to get a proxy of the survival function using the hist function
|
||||
EqualSizeBinFlag=true; % set to true to get equal sized bins, set to false to get equi-content bins,
|
||||
% if set to false: each bin will contain (aproximately) an equal
|
||||
% number of pixel values.
|
||||
end
|
||||
properties %to be made read only
|
||||
properties (SetAccess = private) %read only
|
||||
CRE
|
||||
RB
|
||||
P_RB
|
||||
end
|
||||
|
||||
properties(Dependent)
|
||||
Edges
|
||||
end
|
||||
|
||||
properties (Hidden)
|
||||
WaitBarHandle;
|
||||
HiddenEdges;
|
||||
end
|
||||
properties (Constant,Hidden)
|
||||
alpha=1; %needed for the scaled cummulative redual entropy as defined in zografoss. Will be implemented in the future. (I hope)
|
||||
@@ -33,6 +45,10 @@ classdef CREClass < handle
|
||||
obj.Data=varargin{k};
|
||||
case {'nbin','bin'}
|
||||
obj.nBin=varargin{k};
|
||||
case {'equalsizebinflag'}
|
||||
obj.EqualSizeBinFlag=varargin{k};
|
||||
case {'usehistproxyflag'}
|
||||
obj.UseHistProxyFlag=varargin{k};
|
||||
otherwise
|
||||
error('invalid variable name');
|
||||
end
|
||||
@@ -47,8 +63,17 @@ classdef CREClass < handle
|
||||
end
|
||||
end
|
||||
end
|
||||
%% set functions
|
||||
%% set and get functions
|
||||
methods
|
||||
function Edges=get.Edges(obj)
|
||||
Edges=obj.HiddenEdges;
|
||||
end
|
||||
function set.Edges(obj,val)
|
||||
if ~isnumeric(val);error('provide a array with edge values');end
|
||||
if numel(val)<3;error('too few edges provided. Need at least 3 values, i.e. 2 bins defined');end
|
||||
obj.HiddenEdges=transpose(val(:));
|
||||
obj.nBin=numel(val)-1;
|
||||
end
|
||||
function set.Data(obj,val)
|
||||
if isnumeric(val)
|
||||
obj.Data=val;
|
||||
@@ -66,52 +91,117 @@ classdef CREClass < handle
|
||||
end
|
||||
%% core function(s)
|
||||
methods
|
||||
|
||||
function Calc(obj)
|
||||
function CalcEdges(obj)
|
||||
if isempty(obj.Data)
|
||||
warning('nothing to do');
|
||||
return;
|
||||
end
|
||||
if obj.UseHistProxy
|
||||
if obj.UseHistProxyFlag
|
||||
if isempty(obj.nBin)
|
||||
obj.nBin=floor(numel(obj.Data)./10);
|
||||
end
|
||||
[N,bin]=hist(obj.Data(:),obj.nBin);
|
||||
N=transpose(N);
|
||||
P=N./sum(N);
|
||||
CP=cumsum(P);
|
||||
if obj.EqualSizeBinFlag
|
||||
[~,obj.Edges]=histcounts(obj.Data(:),obj.nBin);
|
||||
else
|
||||
prc=linspace(0,100,obj.nBin+1); % define the edges of the bins as a precentile
|
||||
obj.Edges=prctile(obj.Data(:),prc); %find the edges of the bins
|
||||
end
|
||||
else
|
||||
if obj.UseHistProxyFlag ; error('You cannot use EqualSizeBinFlag=true and UseHistProxyFlag=false as a combination');end
|
||||
q=transpose(unique(sort(obj.Data(:))));
|
||||
dq=diff(q);
|
||||
obj.Edges=cat(2,q(1)-eps,q(1:end-1)+dq/2,q(end)+eps); %the eps is a "shortcut" to avoid value=edge cases. Note that eps will increase the size of the bin by a very small amount. Although this is a systemetic bias, its effect will be neglectable
|
||||
obj.nBin=numel(obj.Edges)-1;
|
||||
end
|
||||
end
|
||||
function CalcRBShannon(obj)
|
||||
% 23Nov22 Here I will try to implement a (hopefully better)
|
||||
% definition of the relevance boundry. The basic idea is
|
||||
% assume a dataset X= {x_1 ... x_i} X
|
||||
% set a threshold \lambda NB \lambda \in of the data X
|
||||
%Shanon entropy for this \lambda will be:
|
||||
% H(\lamda)=p(x<\lambda)log(p(x<lambda))+p(x>=\lambda)log(p(x>=\lambda))
|
||||
% Now the expectation value for H will be :
|
||||
%E(H)=\int p(\lambda)*H(\lambda) d\lambda
|
||||
if isempty(obj.Data)
|
||||
warning('nothing to do');
|
||||
return;
|
||||
end
|
||||
if isempty(obj.Edges);obj.CalcEdges;end
|
||||
[CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf');
|
||||
dl=transpose(diff(bin)./sum(diff(bin))); % will capture the case with non-equidistant bins as well as equidistant bins
|
||||
pdf_lambda=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','pdf');
|
||||
CP=transpose(CP);
|
||||
LogCP=log(CP);
|
||||
LogCP(~isfinite(LogCP))=0; % see comment below
|
||||
FC=1-CP;
|
||||
FC(FC<0)=0;
|
||||
dl=ones(size(FC))./numel(FC);
|
||||
else
|
||||
q=sort(unique(obj.Data(:)));
|
||||
FC=zeros(size(q));
|
||||
dl=zeros(size(q));
|
||||
for k=1:numel(q)
|
||||
FC(k)=sum(obj.Data(:)>=q(k));
|
||||
if k==1
|
||||
dl(k)=(q(k+1)-q(k))/2; %should I divide by 2?
|
||||
elseif k==numel(q)
|
||||
dl(k)=(q(k)-q(k-1))/2;
|
||||
else
|
||||
dl(k)=(q(k+1)-q(k))/2+(q(k)-q(k-1))/2;
|
||||
end
|
||||
end
|
||||
FC=FC./numel(obj.Data);
|
||||
end
|
||||
FC(FC<0)=0; % due to rounding errors, small negative numbers can arrise. These are theoretically not possible.
|
||||
LogFC=log(FC);
|
||||
LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
|
||||
if any(isnan(FC));keyboard;end
|
||||
H=-1*(CP.*LogCP+FC.*LogFC); %Shannon Entropy when a theshold (lambda) is set to the lower bin edge
|
||||
w=(dl.*pdf_lambda);
|
||||
w=w./sum(w); % for one reason or the other, the sum is not always equal to 1.
|
||||
EH=w*H; %calculate expectation value for H
|
||||
RB_ind=[find(H>=EH,1,'first') find(H>=EH,1,'last')];
|
||||
obj.RB=obj.Edges(RB_ind)+dl(RB_ind)/2;
|
||||
obj.P_RB=FC(RB_ind);
|
||||
if obj.Monitor
|
||||
lambda=obj.Edges(1:end-1);
|
||||
scatter(w,H);xlabel('weight');ylabel('Shannon Entropy');snapnow
|
||||
plot(lambda,H);xlabel('lambda');ylabel('Shannon Entropy');
|
||||
line(lambda(RB_ind(:)),[EH;EH],'linestyle',':');snapnow
|
||||
histogram(obj.Data(:));snapnow
|
||||
disp('sum of weights')
|
||||
disp(sum(dl.*pdf_lambda))
|
||||
disp('EH')
|
||||
disp(EH)
|
||||
disp('boundry index');
|
||||
disp(RB_ind);
|
||||
disp('boundry lambda value');
|
||||
disp(obj.RB);
|
||||
disp('boundry p-values');
|
||||
disp(FC(RB_ind));
|
||||
disp('fraction inside interval');
|
||||
disp(abs(diff(FC(RB_ind))));
|
||||
end
|
||||
end
|
||||
function Calc(obj) %calculate the CRE as well as a on-tailed definition of the RB.
|
||||
% RR note: 23Nov22: I am not sure if I still like this RB
|
||||
% definition. I keep having a hard time defending it
|
||||
% theoretically. The code is retained, for now, however for backwards
|
||||
% compatibility.
|
||||
if isempty(obj.Data)
|
||||
warning('nothing to do');
|
||||
return;
|
||||
end
|
||||
if isempty(obj.Edges);obj.CalcEdges;end
|
||||
[CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf');
|
||||
FC=1-transpose(CP);
|
||||
FC(FC<0)=0;
|
||||
dl=diff(bin)./sum(diff(bin)); % will capture the case with non-equidistant bins as well as equidistant bins
|
||||
LogFC=log(FC);
|
||||
LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
|
||||
if any(isnan(FC));error('something went wrong');end %catch a posible error.
|
||||
if transpose(dl)*FC==0
|
||||
%if only one value in the most left bin in the distribution I may get a 0 divided by 0
|
||||
%as the CRE of a delta function is 0, enforce this outcome
|
||||
obj.CRE=0;
|
||||
else
|
||||
obj.CRE=-transpose(dl)*(FC.*LogFC)./(transpose(dl)*FC); %CRE zografos
|
||||
end
|
||||
%% get the RB
|
||||
dl(FC>0.5)=0; %set the weight for all in the histogram to the left side (i.e. <p50) to 0
|
||||
CRE_Med_Inf=-transpose(dl)*(FC.*LogFC)./(transpose(dl)*FC); %CRE zografos
|
||||
ind=find(exp(-CRE_Med_Inf)>=FC,1,'first');
|
||||
if obj.UseHistProxy
|
||||
if obj.UseHistProxyFlag
|
||||
obj.P_RB=FC(ind);
|
||||
if ind==numel(bin)%if RB is (beyond) the last bin,
|
||||
warning(sprintf('RB is hitting the upper bin value\n Maybe increase number of bins')) %#ok<*SPWRN>
|
||||
obj.RB=bin(ind);
|
||||
end
|
||||
obj.RB=(bin(ind)+bin(ind+1))/2; % set the RB to the center of the bin
|
||||
else
|
||||
SortData=sort(obj.Data);
|
||||
SortData=unique(sort(obj.Data(:)));
|
||||
obj.RB=SortData(ind);
|
||||
obj.P_RB=FC(ind);
|
||||
end
|
||||
|
114
TestCREClass.m
114
TestCREClass.m
@@ -1,22 +1,96 @@
|
||||
S=CREClass;
|
||||
S.Data=randn(100000,1);
|
||||
S.nBin=(1000);
|
||||
%% use histogram
|
||||
S.UseHistProxy=true;
|
||||
%% Test CCRE and CRE class
|
||||
% show some default behaviour
|
||||
% and test the error handling.
|
||||
%% set some constants
|
||||
% A=randn(1000,1);
|
||||
% A=rand(1000,1);
|
||||
A=cat(1,randn(1000,1),randn(200,1)+10);
|
||||
%% create instance of CRE class
|
||||
% % % S1=CREClass;
|
||||
% % % %% Calculate by setting labda equal to each of the (unique) values in the data.
|
||||
% % % S1.UseHistProxyFlag=false;
|
||||
% % % S1.EqualSizeBinFlag=false;
|
||||
% % % S1.Data=A;% set gaussian data
|
||||
% % % S1.Calc; %get RB based on the CRE
|
||||
% % % S1.CalcRBShannon; %% get RB based on shannon entropy
|
||||
% % % disp(S1)
|
||||
% % % %% use histogram aproximation with unequal bin sizes in the histogram
|
||||
% % % S2=CREClass;
|
||||
% % % S2.UseHistProxyFlag=true;
|
||||
% % % S2.EqualSizeBinFlag=false;
|
||||
% % % S2.Data=A;% set gaussian data
|
||||
% % % S2.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
|
||||
% % % S2.Calc
|
||||
% % % S2.CalcRBShannon;
|
||||
% % % disp(S2)
|
||||
% % % %% use histogram aproximation with equal bin sizes in the histogram
|
||||
% % % S3=CREClass;
|
||||
% % % S3.UseHistProxyFlag=true;
|
||||
% % % S3.EqualSizeBinFlag=false;
|
||||
% % % S3.Data=A;% set gaussian data
|
||||
% % % S3.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
|
||||
% % % S3.Calc
|
||||
% % % S3.CalcRBShannon;
|
||||
% % % disp(S3)
|
||||
%% Check recalculation of edges
|
||||
S4=CREClass;
|
||||
S4.UseHistProxyFlag=true;
|
||||
S4.EqualSizeBinFlag=true;
|
||||
S4.Data=A;% set gaussian data
|
||||
S4.nBin=numel(A); % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
|
||||
S4.Calc
|
||||
S4.CalcRBShannon;
|
||||
disp(S4)
|
||||
S4.nBin=round(numel(A)/10);
|
||||
S4.CalcEdges ; % force a recalculation of the edges
|
||||
S4.Calc;
|
||||
S4.CalcRBShannon;
|
||||
disp(S4)
|
||||
%%
|
||||
return;
|
||||
%% Show effect of scaling or ofsett the data
|
||||
% Scale
|
||||
S.Data=100*A;
|
||||
S.UseHistProxyFlag=true;
|
||||
S.Calc;
|
||||
S
|
||||
%% Use my aproximation
|
||||
S.UseHistProxy=false;
|
||||
disp(S);
|
||||
% Offset
|
||||
S.Data=A+10;
|
||||
S.UseHistProxyFlag=true;
|
||||
S.Calc;
|
||||
S
|
||||
std(S.Data)
|
||||
%% multiply S
|
||||
S.Data=100*S.Data;
|
||||
S.UseHistProxy=true;
|
||||
S.Calc
|
||||
S
|
||||
std(S.Data)
|
||||
%% ones again the "new" way
|
||||
S.UseHistProxy=false;
|
||||
S.Calc
|
||||
S
|
||||
disp(S);
|
||||
%%
|
||||
|
||||
%% Test CCRE class
|
||||
figure(1);clf;hold on
|
||||
figure(2);clf;
|
||||
figure(3);clf;hold on;
|
||||
A=randn(100);
|
||||
B=A;
|
||||
B(:)=A(randperm(numel(A)));
|
||||
w=-1:0.1:1;
|
||||
out(numel(w),1)=struct('w',[],'CRE',[],'CCRE',[],'R',[]);
|
||||
for k=1:numel(w)
|
||||
out(k).w=w(k);
|
||||
CS=CCREClass;
|
||||
CS.EqualSizeBinFlag=false;
|
||||
CS.Data=1*(1-abs(w(k)))*A+w(k)*B;
|
||||
CS.nBin=50;
|
||||
CS.DataRef=B;
|
||||
CS.nBinRef=500;
|
||||
CS.Calc;
|
||||
out(k).CRE=CS.CRE;
|
||||
out(k).CCRE=CS.CCRE;
|
||||
out(k).R=CS.CCRE./CS.CRE;
|
||||
figure(2);
|
||||
title(sprintf('w: %d',w(k)));
|
||||
scatter(CS.DataRef(:),CS.Data(:));
|
||||
snapnow
|
||||
end
|
||||
disp(CS);
|
||||
figure(1);
|
||||
clf;hold on;
|
||||
plot([out(:).w],[out(:).CCRE],'o');
|
||||
plot([out(:).w],[out(:).CRE],'x');
|
||||
figure(3);clf
|
||||
plot([out(:).w],[out(:).R],'d');
|
Reference in New Issue
Block a user