210 lines
9.4 KiB
Matlab
210 lines
9.4 KiB
Matlab
classdef CREClass < handle
|
|
% 23 and 24 Nov 2022 I added a new calculation for the RB based on
|
|
% Shanon entropy given a threshold \lambda. I also think there is a
|
|
% "mistake" in calculating the RB the classical way.
|
|
properties
|
|
Data; % the data set to get the CRE off
|
|
nBin; %number of bins
|
|
Monitor=true; %set to true to get intermediat results/figures
|
|
UseHistProxyFlag=true; % set this to get a proxy of the survival function using the hist function
|
|
EqualSizeBinFlag=true; % set to true to get equal sized bins, set to false to get equi-content bins,
|
|
% if set to false: each bin will contain (aproximately) an equal
|
|
% number of pixel values.
|
|
end
|
|
properties (SetAccess = private) %read only
|
|
CRE
|
|
RB
|
|
P_RB
|
|
end
|
|
|
|
properties(Dependent)
|
|
Edges
|
|
end
|
|
|
|
properties (Hidden)
|
|
WaitBarHandle;
|
|
HiddenEdges;
|
|
end
|
|
properties (Constant,Hidden)
|
|
alpha=1; %needed for the scaled cummulative redual entropy as defined in zografoss. Will be implemented in the future. (I hope)
|
|
beta=1; %they are currently obsolete.
|
|
% FeedBackOptions={'CRE','RB','All'}
|
|
end
|
|
%% constructor
|
|
methods %constructor
|
|
function obj = CREClass(varargin)
|
|
obj.Data=[];
|
|
obj.nBin=[];
|
|
%obj.FeedBack={'CRE'};
|
|
if nargin==0;return;end % if no input arguments are given, just create an empty instance
|
|
if ischar(varargin{1}) %assume number value pairs.
|
|
if nargin<=2; error('please use number value pairs' ); end
|
|
for k=2:2:nargin
|
|
switch lower(varargin{k-1})
|
|
case {'ref','data'}
|
|
obj.Data=varargin{k};
|
|
case {'nbin','bin'}
|
|
obj.nBin=varargin{k};
|
|
case {'equalsizebinflag'}
|
|
obj.EqualSizeBinFlag=varargin{k};
|
|
case {'usehistproxyflag'}
|
|
obj.UseHistProxyFlag=varargin{k};
|
|
otherwise
|
|
error('invalid variable name');
|
|
end
|
|
end
|
|
else
|
|
if nargin>=1
|
|
obj.Data=varargin{1};
|
|
end
|
|
if nargin==2
|
|
obj.nBin=varargin{2};
|
|
end
|
|
end
|
|
end
|
|
end
|
|
%% set and get functions
|
|
methods
|
|
function Edges=get.Edges(obj)
|
|
Edges=obj.HiddenEdges;
|
|
end
|
|
function set.Edges(obj,val)
|
|
if ~isnumeric(val);error('provide a array with edge values');end
|
|
if numel(val)<3;error('too few edges provided. Need at least 3 values, i.e. 2 bins defined');end
|
|
obj.HiddenEdges=transpose(val(:));
|
|
obj.nBin=numel(val)-1;
|
|
end
|
|
function set.Data(obj,val)
|
|
if isnumeric(val)
|
|
obj.Data=val;
|
|
else
|
|
error('please enter numeric array/matrix as reference');
|
|
end
|
|
end
|
|
function set.nBin(obj,val)
|
|
if isnumeric(val)
|
|
obj.nBin=val;
|
|
else
|
|
error('please enter numeric array or value as bin definition');
|
|
end
|
|
end
|
|
end
|
|
%% core function(s)
|
|
methods
|
|
function CalcEdges(obj)
|
|
if isempty(obj.Data)
|
|
warning('nothing to do');
|
|
return;
|
|
end
|
|
if obj.UseHistProxyFlag
|
|
if isempty(obj.nBin)
|
|
obj.nBin=floor(numel(obj.Data)./10);
|
|
end
|
|
if obj.EqualSizeBinFlag
|
|
[~,obj.Edges]=histcounts(obj.Data(:),obj.nBin);
|
|
else
|
|
prc=linspace(0,100,obj.nBin+1); % define the edges of the bins as a precentile
|
|
obj.Edges=prctile(obj.Data(:),prc); %find the edges of the bins
|
|
end
|
|
else
|
|
if obj.UseHistProxyFlag ; error('You cannot use EqualSizeBinFlag=true and UseHistProxyFlag=false as a combination');end
|
|
q=transpose(unique(sort(obj.Data(:))));
|
|
dq=diff(q);
|
|
obj.Edges=cat(2,q(1)-eps,q(1:end-1)+dq/2,q(end)+eps); %the eps is a "shortcut" to avoid value=edge cases. Note that eps will increase the size of the bin by a very small amount. Although this is a systemetic bias, its effect will be neglectable
|
|
obj.nBin=numel(obj.Edges)-1;
|
|
end
|
|
end
|
|
function CalcRBShannon(obj)
|
|
% 23Nov22 Here I will try to implement a (hopefully better)
|
|
% definition of the relevance boundry. The basic idea is
|
|
% assume a dataset X= {x_1 ... x_i} X
|
|
% set a threshold \lambda NB \lambda \in of the data X
|
|
%Shanon entropy for this \lambda will be:
|
|
% H(\lamda)=p(x<\lambda)log(p(x<lambda))+p(x>=\lambda)log(p(x>=\lambda))
|
|
% Now the expectation value for H will be :
|
|
%E(H)=\int p(\lambda)*H(\lambda) d\lambda
|
|
if isempty(obj.Data)
|
|
warning('nothing to do');
|
|
return;
|
|
end
|
|
if isempty(obj.Edges);obj.CalcEdges;end
|
|
[CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf');
|
|
dl=transpose(diff(bin)./sum(diff(bin))); % will capture the case with non-equidistant bins as well as equidistant bins
|
|
pdf_lambda=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','pdf');
|
|
CP=transpose(CP);
|
|
LogCP=log(CP);
|
|
LogCP(~isfinite(LogCP))=0; % see comment below
|
|
FC=1-CP;
|
|
FC(FC<0)=0; % due to rounding errors, small negative numbers can arrise. These are theoretically not possible.
|
|
LogFC=log(FC);
|
|
LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
|
|
H=-1*(CP.*LogCP+FC.*LogFC); %Shannon Entropy when a theshold (lambda) is set to the lower bin edge
|
|
w=(dl.*pdf_lambda);
|
|
w=w./sum(w); % for one reason or the other, the sum is not always equal to 1.
|
|
EH=w*H; %calculate expectation value for H
|
|
RB_ind=[find(H>=EH,1,'first') find(H>=EH,1,'last')];
|
|
obj.RB=obj.Edges(RB_ind)+dl(RB_ind)/2;
|
|
obj.P_RB=FC(RB_ind);
|
|
if obj.Monitor
|
|
lambda=obj.Edges(1:end-1);
|
|
scatter(w,H);xlabel('weight');ylabel('Shannon Entropy');snapnow
|
|
plot(lambda,H);xlabel('lambda');ylabel('Shannon Entropy');
|
|
line(lambda(RB_ind(:)),[EH;EH],'linestyle',':');snapnow
|
|
histogram(obj.Data(:));snapnow
|
|
disp('sum of weights')
|
|
disp(sum(dl.*pdf_lambda))
|
|
disp('EH')
|
|
disp(EH)
|
|
disp('boundry index');
|
|
disp(RB_ind);
|
|
disp('boundry lambda value');
|
|
disp(obj.RB);
|
|
disp('boundry p-values');
|
|
disp(FC(RB_ind));
|
|
disp('fraction inside interval');
|
|
disp(abs(diff(FC(RB_ind))));
|
|
end
|
|
end
|
|
function Calc(obj) %calculate the CRE as well as a on-tailed definition of the RB.
|
|
% RR note: 23Nov22: I am not sure if I still like this RB
|
|
% definition. I keep having a hard time defending it
|
|
% theoretically. The code is retained, for now, however for backwards
|
|
% compatibility.
|
|
if isempty(obj.Data)
|
|
warning('nothing to do');
|
|
return;
|
|
end
|
|
if isempty(obj.Edges);obj.CalcEdges;end
|
|
[CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf');
|
|
FC=1-transpose(CP);
|
|
FC(FC<0)=0;
|
|
dl=diff(bin)./sum(diff(bin)); % will capture the case with non-equidistant bins as well as equidistant bins
|
|
LogFC=log(FC);
|
|
LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
|
|
if any(isnan(FC));error('something went wrong');end %catch a posible error.
|
|
if transpose(dl)*FC==0
|
|
%if only one value in the most left bin in the distribution I may get a 0 divided by 0
|
|
%as the CRE of a delta function is 0, enforce this outcome
|
|
obj.CRE=0;
|
|
else
|
|
obj.CRE=-transpose(dl)*(FC.*LogFC)./(transpose(dl)*FC); %CRE zografos
|
|
end
|
|
%% get the RB
|
|
dl(FC>0.5)=0; %set the weight for all in the histogram to the left side (i.e. <p50) to 0
|
|
CRE_Med_Inf=-transpose(dl)*(FC.*LogFC)./(transpose(dl)*FC); %CRE zografos
|
|
ind=find(exp(-CRE_Med_Inf)>=FC,1,'first');
|
|
if obj.UseHistProxyFlag
|
|
obj.P_RB=FC(ind);
|
|
if ind==numel(bin)%if RB is (beyond) the last bin,
|
|
warning(sprintf('RB is hitting the upper bin value\n Maybe increase number of bins')) %#ok<*SPWRN>
|
|
obj.RB=bin(ind);
|
|
end
|
|
obj.RB=(bin(ind)+bin(ind+1))/2; % set the RB to the center of the bin
|
|
else
|
|
SortData=unique(sort(obj.Data(:)));
|
|
obj.RB=SortData(ind);
|
|
obj.P_RB=FC(ind);
|
|
end
|
|
end
|
|
end
|
|
end |