see previous comment

Please use EndPoint25Mar2022 as the stable branch.
Changes made to CREClass.m Here I've added an alternative calculation for the Relevance Boundary. CalcRBShannon is now part of the CREClass.m file. The TestCREClass has been updated to allow testing this new feature. NB: this is work in progress
2022-11-24 14:10:27 +01:00 · 2022-11-24 14:09:47 +01:00 · 2022-03-27 17:12:16 +02:00 · 2022-03-26 16:10:39 +01:00 · 2022-03-25 16:06:59 +01:00
3 changed files with 218 additions and 82 deletions
--- a/CCREClass.m
+++ b/CCREClass.m
@ -11,8 +11,15 @@ classdef CCREClass<CREClass
    end
    properties
        DataRef %This parameter will contain the reference set.
-        nBinRef
+        nBinRef %number of "bins" to use for the reference distribution
-        CCRE %parameter containing the conditional Cumulative Residual Entropy
+        EqualSizeBinFlagRef=false;  %set to true to get equally spaced bins, default : false, put an equal number of points per bin. 
        EdgesRef; %edges for the bins of the reference function.  %todo, turn into dep. variable!
    end
    properties(SetAccess=private) %make read only
                CCRE %parameter containing the conditional Cumulative Residual Entropy
    end
    properties(Constant)
        UseHistProxyFlagRef=true; % this value is there to remind the user that we will have to bin the reference image. The actual value is not used in the code
    end
    methods %constructor
@ -38,6 +45,16 @@ classdef CCREClass<CREClass
        end
    end
    methods %user callable functions
        function CalcEdgesRef(obj)
            % copy the info for "ref" to a CRE object
            Stmp=CREClass('ref',obj.DataRef...
                ,'nbin',obj.nBinRef...
                ,'EqualSizeBinFlag',obj.EqualSizeBinFlagRef...
                ,'UseHistProxyFlag',obj.UseHistProxyFlagRef...
                );
            Stmp.CalcEdges;
            obj.EdgesRef=Stmp.Edges;
        end
        %% CCRE A|B
        %
        % $$CCRE = \varepsilon (A) - E[\varepsilon(A|B)]$$
@ -58,14 +75,18 @@ classdef CCREClass<CREClass
            if isempty(obj.DataRef);return;end %if no refdata given, return;
            %calculate histogram for RefData
            EVarEpsDataGivenRef=0; %clear previous calculations
-            [pDataRef,E]=histcounts(obj.DataRef(:),obj.nBinRef,'Normalization','probability');
+            %get the edge definition. 
-            [JointHist,E1,E2]=histcounts2(obj.Data(:),obj.DataRef(:),[obj.nBin,obj.nBinRef],'Normalization','probability');
+            % a bit of a workaround, I don't plan to reprogram the function
            if isempty(obj.EdgesRef)
                obj.CalcEdgesRef;
            end
            [pDataRef,E]=histcounts(obj.DataRef(:),obj.EdgesRef,'Normalization','probability');
            for k=1:numel(pDataRef) %loop over the bins in pDataRef NB: must be equal to the bins in JointHist for the Ref
                if pDataRef(k)==0;continue;end% if no data at this line go to the next
-                ind=E(k)<=obj.DataRef(:)&obj.DataRef(:)<E(k+1);
+                ind=E(k)<=obj.DataRef(:)&obj.DataRef(:)<E(k+1); %get an index to all points in Ref data inside the current bin
-                CRE_DataGivenDataRef=CREClass;
+                CRE_DataGivenDataRef=CREClass('data',obj.Data(ind)...
-                CRE_DataGivenDataRef.Data=obj.Data(ind);
+                    ,'nbin',obj.Edges...
-                CRE_DataGivenDataRef.nBin=E1;
+                    );
                CRE_DataGivenDataRef.Calc;
                EVarEpsDataGivenRef=EVarEpsDataGivenRef+pDataRef(k)*CRE_DataGivenDataRef.CRE; % I really have to check this line on merit. 
            end
--- a/CREClass.m
+++ b/CREClass.m
@ -1,17 +1,29 @@
 classdef CREClass < handle
    % 23 and 24 Nov 2022 I added a new calculation for the RB based on
    % Shanon entropy given a threshold \lambda. I also think there is a
    % "mistake" in calculating the RB the classical way. 
    properties
        Data; % the  data set to get the CRE off
        nBin; %number of bins 
        Monitor=true; %set to true to get intermediat results/figures
-        UseHistProxy=true; % set this to get a proxy of the survival function using the hist function
+        UseHistProxyFlag=true; % set this to get a proxy of the survival function using the hist function
        EqualSizeBinFlag=true; % set to true to get equal sized bins, set to false to get equi-content bins, 
        % if set to false: each bin will contain (aproximately) an equal
        % number of pixel values. 
    end
-    properties %to be made read only
+    properties (SetAccess = private) %read only
        CRE
        RB
        P_RB
    end
    properties(Dependent)
                Edges
    end
    properties (Hidden)
        WaitBarHandle;
        HiddenEdges;
    end
    properties (Constant,Hidden)
        alpha=1; %needed for the scaled cummulative redual entropy as defined in zografoss. Will be implemented in the future. (I hope)
@ -33,6 +45,10 @@ classdef CREClass < handle
                            obj.Data=varargin{k};
                        case {'nbin','bin'}
                            obj.nBin=varargin{k};
                        case {'equalsizebinflag'}
                            obj.EqualSizeBinFlag=varargin{k};
                        case {'usehistproxyflag'}
                            obj.UseHistProxyFlag=varargin{k};
                        otherwise
                            error('invalid variable name');
                    end
@ -47,8 +63,17 @@ classdef CREClass < handle
            end
        end
    end
-    %% set functions
+    %% set and get functions
    methods
        function Edges=get.Edges(obj)
            Edges=obj.HiddenEdges;
        end
        function set.Edges(obj,val)
            if ~isnumeric(val);error('provide a array with edge values');end
            if numel(val)<3;error('too few edges provided. Need at least 3 values, i.e. 2 bins defined');end
            obj.HiddenEdges=transpose(val(:));
            obj.nBin=numel(val)-1;
        end
        function set.Data(obj,val)
            if isnumeric(val)
                obj.Data=val;
@ -66,41 +91,94 @@ classdef CREClass < handle
    end
    %% core function(s)
    methods
-        
+        function CalcEdges(obj)
-        function Calc(obj)
+             if isempty(obj.Data)
                warning('nothing to do');
                return;
             end
             if obj.UseHistProxyFlag
                if isempty(obj.nBin)
                    obj.nBin=floor(numel(obj.Data)./10);
                end
                if obj.EqualSizeBinFlag 
                    [~,obj.Edges]=histcounts(obj.Data(:),obj.nBin);
                else
                         prc=linspace(0,100,obj.nBin+1); % define the edges of the bins as a precentile
                        obj.Edges=prctile(obj.Data(:),prc); %find the edges of the bins
                end
             else
                 if obj.UseHistProxyFlag ; error('You cannot use EqualSizeBinFlag=true and UseHistProxyFlag=false as a combination');end
                 q=transpose(unique(sort(obj.Data(:))));
                 dq=diff(q);
                 obj.Edges=cat(2,q(1)-eps,q(1:end-1)+dq/2,q(end)+eps); %the eps is a "shortcut" to avoid value=edge cases. Note that eps will increase the size of the bin by a very small amount. Although this is a systemetic bias, its effect will be neglectable
                 obj.nBin=numel(obj.Edges)-1;
             end
        end
        function CalcRBShannon(obj)
            % 23Nov22 Here I will try to implement a (hopefully better)
            % definition of the relevance boundry. The basic idea is 
            % assume a dataset X= {x_1 ... x_i} X
            % set a threshold \lambda NB \lambda \in of the data X
            %Shanon entropy for this \lambda will be: 
            % H(\lamda)=p(x<\lambda)log(p(x<lambda))+p(x>=\lambda)log(p(x>=\lambda))
            % Now the expectation value for H will be :
            %E(H)=\int p(\lambda)*H(\lambda) d\lambda
             if isempty(obj.Data)
                warning('nothing to do');
                return;
            end
            if isempty(obj.Edges);obj.CalcEdges;end
            [CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf');
            dl=transpose(diff(bin)./sum(diff(bin))); %  will capture the case with non-equidistant bins as well as equidistant bins
            pdf_lambda=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','pdf');
            CP=transpose(CP);
            LogCP=log(CP);
            LogCP(~isfinite(LogCP))=0; % see comment below
            FC=1-CP;
            FC(FC<0)=0; % due to rounding errors, small negative numbers can arrise. These are theoretically not possible. 
            LogFC=log(FC);
            LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
            H=-1*(CP.*LogCP+FC.*LogFC); %Shannon Entropy when a theshold (lambda) is set to the lower bin edge
            w=(dl.*pdf_lambda);
            w=w./sum(w); % for one reason or the other, the sum is not always equal to 1.  
            EH=w*H; %calculate expectation value for H
            RB_ind=[find(H>=EH,1,'first') find(H>=EH,1,'last')];
            obj.RB=obj.Edges(RB_ind)+dl(RB_ind)/2;
            obj.P_RB=FC(RB_ind);
            if obj.Monitor
                lambda=obj.Edges(1:end-1);
                scatter(w,H);xlabel('weight');ylabel('Shannon Entropy');snapnow
                plot(lambda,H);xlabel('lambda');ylabel('Shannon Entropy');
                line(lambda(RB_ind(:)),[EH;EH],'linestyle',':');snapnow
                histogram(obj.Data(:));snapnow
                disp('sum of weights')
                disp(sum(dl.*pdf_lambda))
                disp('EH')
                disp(EH)
                disp('boundry index');
                disp(RB_ind);
                disp('boundry lambda value');
                disp(obj.RB);
                disp('boundry p-values');
                disp(FC(RB_ind));
                disp('fraction inside interval');
                disp(abs(diff(FC(RB_ind))));
            end
        end
        function Calc(obj) %calculate the CRE as well as a on-tailed definition of the RB. 
            % RR note: 23Nov22: I am not sure if I still like this RB
            % definition. I keep having a hard time defending it
            % theoretically. The code is retained, for now, however for backwards
            % compatibility. 
            if isempty(obj.Data)
                warning('nothing to do');
                return;
            end
-            if obj.UseHistProxy
+            if isempty(obj.Edges);obj.CalcEdges;end
-                if isempty(obj.nBin)
+            [CP,bin]=histcounts(obj.Data(:),transpose(obj.Edges),'Normalization','cdf');
-                    obj.nBin=floor(numel(obj.Data)./10);
+            FC=1-transpose(CP);
-                end
+            FC(FC<0)=0;
-%update part below to prefered matlab commands
+            dl=diff(bin)./sum(diff(bin)); %  will capture the case with non-equidistant bins as well as equidistant bins
 %                 [N,bin]=hist(obj.Data(:),obj.nBin);
 %                 N=transpose(N);
 %                 P=N./sum(N);
 %                 CP=cumsum(P);
                [CP,bin]=histcounts(obj.Data(:),obj.nBin,'Normalization','cdf');
                FC=1-transpose(CP);
                FC(FC<0)=0;
                dl=ones(size(FC))./numel(FC);
            else
                q=sort(unique(obj.Data(:)));
                FC=zeros(size(q));
                dl=zeros(size(q));
                for k=1:numel(q)
                    FC(k)=sum(obj.Data(:)>=q(k));
                    if k==1
                        dl(k)=(q(k+1)-q(k))/2; 
                    elseif k==numel(q)
                        dl(k)=(q(k)-q(k-1))/2;
                    else
                        dl(k)=(q(k+1)-q(k))/2+(q(k)-q(k-1))/2;
                    end
                end
                FC=FC./numel(obj.Data);
            end
            LogFC=log(FC);
            LogFC(~isfinite(LogFC))=0;%the log of 0 is -inf. however in Fc.*logFc it should end up as 0. to avoid conflicts removing the -inf
            if any(isnan(FC));error('something went wrong');end %catch a posible error. 
@ -115,16 +193,16 @@ classdef CREClass < handle
            dl(FC>0.5)=0; %set the weight for all in the histogram to the left side (i.e. <p50) to 0
            CRE_Med_Inf=-transpose(dl)*(FC.*LogFC)./(transpose(dl)*FC); %CRE zografos 
            ind=find(exp(-CRE_Med_Inf)>=FC,1,'first');
-            if obj.UseHistProxy
+            if obj.UseHistProxyFlag
                obj.P_RB=FC(ind);
-                obj.RB=bin(ind);
+                if ind==numel(bin)%if RB is (beyond) the last bin,
                    warning(sprintf('RB is hitting the upper bin value\n Maybe increase number of bins')) %#ok<*SPWRN>
                    obj.RB=bin(ind);
                end
                obj.RB=(bin(ind)+bin(ind+1))/2; % set the RB to the center of the bin
            else
-%                 SortData=sort(obj.Data);
+                SortData=unique(sort(obj.Data(:)));
-%                 obj.RB=SortData(ind);
+                obj.RB=SortData(ind);
                % I think the two lines above are a bug. I should not use
                % "SortData but rather q itsself. Some numbers may be in
                % the data multiple times
                obj.RB=q(ind);
                obj.P_RB=FC(ind);
            end
        end
--- a/TestCREClass.m
+++ b/TestCREClass.m
@ -2,35 +2,63 @@
 % show some default behaviour
 % and test the error handling. 
 %% set some constants
-A=randn(100);
+% A=randn(1000,1);
 % A=rand(1000,1);
 A=cat(1,randn(1000,1),randn(200,1)+10);
 %% create instance of CRE class
-S=CREClass;
+% % % S1=CREClass;
-%% check histogram vs "new" technique 
+% % % %% Calculate by setting labda equal to each of the (unique) values in the data. 
-% NB I come to the conclusion that the new method is not always working.
+% % % S1.UseHistProxyFlag=false;
-% Why? 
+% % % S1.EqualSizeBinFlag=false;
-S.Data=A;% set gaussian data
+% % % S1.Data=A;% set gaussian data
-S.nBin=(numel(S.Data));      % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
+% % % S1.Calc; %get RB based on the CRE
-% use histogram
+% % % S1.CalcRBShannon; %% get RB based on shannon entropy
-S.UseHistProxy=true;
+% % % disp(S1)
-S.Calc;
+% % % %% use histogram aproximation with unequal bin sizes in the histogram
-S
+% % % S2=CREClass;
-% Use my aproximation for the histogram
+% % % S2.UseHistProxyFlag=true;
-% only do this if the number of data points is not too big. 
+% % % S2.EqualSizeBinFlag=false;
-S.UseHistProxy=false;
+% % % S2.Data=A;% set gaussian data
-S.Calc;
+% % % S2.nBin=numel(A);      % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
-S
+% % % S2.Calc
-
+% % % S2.CalcRBShannon;
 % % % disp(S2)
 % % % %% use histogram aproximation with equal bin sizes in the histogram
 % % % S3=CREClass;
 % % % S3.UseHistProxyFlag=true;
 % % % S3.EqualSizeBinFlag=false;
 % % % S3.Data=A;% set gaussian data
 % % % S3.nBin=numel(A);      % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
 % % % S3.Calc
 % % % S3.CalcRBShannon;
 % % % disp(S3)
 %% Check recalculation of edges
 S4=CREClass;
 S4.UseHistProxyFlag=true;
 S4.EqualSizeBinFlag=true;
 S4.Data=A;% set gaussian data
 S4.nBin=numel(A);      % set nbin to npoints; This is fine as we use the cummulative distribution and the formulas as defined in Zografos
 S4.Calc
 S4.CalcRBShannon;
 disp(S4)
 S4.nBin=round(numel(A)/10);
 S4.CalcEdges ; % force a recalculation of the edges
 S4.Calc;
 S4.CalcRBShannon;
 disp(S4)
 %%
 return;
 %% Show effect of scaling or ofsett the data
 % Scale
 S.Data=100*A;
-S.UseHistProxy=true;
+S.UseHistProxyFlag=true;
 S.Calc;
-S
+disp(S);
 % Offset
 S.Data=A+10;
-S.UseHistProxy=true;
+S.UseHistProxyFlag=true;
 S.Calc;
-S
+disp(S);
 %% 
 %% Test CCRE class
@ -39,21 +67,30 @@ figure(2);clf;
 figure(3);clf;hold on;
 A=randn(100);
 B=A;
-B(:)=A(randperm(numel(A)))
+B(:)=A(randperm(numel(A)));
-for k=-1:0.1:1
+w=-1:0.1:1;
 out(numel(w),1)=struct('w',[],'CRE',[],'CCRE',[],'R',[]);
 for k=1:numel(w)
    out(k).w=w(k);
    CS=CCREClass;
-    CS.Data=1*(1-abs(k))*A+k*B;
+    CS.EqualSizeBinFlag=false;
    CS.Data=1*(1-abs(w(k)))*A+w(k)*B;
    CS.nBin=50;
    CS.DataRef=B;
    CS.nBinRef=500;
    CS.Calc;
-    figure(1);
+    out(k).CRE=CS.CRE;
-    plot(k,CS.CCRE,'o')
+    out(k).CCRE=CS.CCRE;
-    plot(k,CS.CRE,'x');
+    out(k).R=CS.CCRE./CS.CRE;
    figure(2);
    title(sprintf('w: %d',w(k)));
    scatter(CS.DataRef(:),CS.Data(:));  
-    figure(3);
+    snapnow
    plot(k,CS.CCRE./CS.CRE,'d');
 end
-CS
+disp(CS);
-snapnow
+    figure(1);
    clf;hold on;
    plot([out(:).w],[out(:).CCRE],'o');
    plot([out(:).w],[out(:).CRE],'x');
    figure(3);clf
    plot([out(:).w],[out(:).R],'d');
Author	SHA1	Message	Date
R.J. Renken	e6e9e593a4	see previous comment	2022-11-24 14:10:27 +01:00
R.J. Renken	cbcee6ac27	Please use EndPoint25Mar2022 as the stable branch. Changes made to CREClass.m Here I've added an alternative calculation for the Relevance Boundary. CalcRBShannon is now part of the CREClass.m file. The TestCREClass has been updated to allow testing this new feature. NB: this is work in progress	2022-11-24 14:09:47 +01:00
R.J. Renken	e68267182d	Major update on CCREClass and some bug fixes in the CRE class. CCRE class is now "more logical" from a programing point of view. Created a fucntion to create the bin-edges for the reference function. "condensed" the calc function (removed redundancies)	2022-03-27 17:12:16 +02:00
R.J. Renken	ce95974c7b	Big modification in the way the histogram is calculated. Now first the edges are calculated based on the two paramters: EqualSizeBinFlag and UseHistProxy. Once edges for bins are known (calculated) creating the cdf is the same for all cases.	2022-03-26 16:10:39 +01:00
R.J. Renken	3998755f40	Allow for constant log(dFC) across bins. i.e. all bins will contain (approximately) and equal number of datum points. In other words binsize for the refence is adapted such that the number of points per bin is equal.	2022-03-25 16:06:59 +01:00