v_gaussmixp

PURPOSE ^

V_GAUSSMIXP calculate probability densities from or plot a Gaussian mixture model

SYNOPSIS ^

function [lp,rp,kh,kp]=v_gaussmixp(y,m,v,w,a,b)

DESCRIPTION ^

V_GAUSSMIXP calculate probability densities from or plot a Gaussian mixture model

 Usage: (1) v_gaussmixp([],m,v,w) % plot a 1D or 2D gaussian mixture pdf

 Inputs: n data values, k mixtures, p parameters, q data vector size

   Y(n,q) = (possibly transformed) input data (or optional plot range if no output arguments)
            Row Y(i,:) represents a single observation of the transformed GMM data
            point X: Y(i,1:q)=X(i,1:p)*A'+B'. If A and B are omitted and q=p, then Y(i,:)=X(i,:).
   M(k,p) = mixture means for x(p)
   V(k,p) or V(p,p,k) variances (diagonal or full) for x(p)
   W(k,1) = weights (must sum to 1)
   A(q,p) = transformation: y=x*A'+ B' (where y and x are row vectors).
   B(q,1)   If A is omitted or null, y=x*I(B,:)' where I is the identity matrix.
            If B is also omitted or null, y=x*I(1:q,:)'.
   Note that most commonly, q=p and A and B are omitted entirely.

 Outputs

  LP(n,1) = log probability of each data point
  RP(n,k) = relative probability of each mixture
  KH(n,1) = highest probability mixture
  KP(n,1) = relative probability of highest probability mixture

 See also: v_gaussmix, v_gaussmixd, v_gaussmixg, v_randvec

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [lp,rp,kh,kp]=v_gaussmixp(y,m,v,w,a,b)
0002 %V_GAUSSMIXP calculate probability densities from or plot a Gaussian mixture model
0003 %
0004 % Usage: (1) v_gaussmixp([],m,v,w) % plot a 1D or 2D gaussian mixture pdf
0005 %
0006 % Inputs: n data values, k mixtures, p parameters, q data vector size
0007 %
0008 %   Y(n,q) = (possibly transformed) input data (or optional plot range if no output arguments)
0009 %            Row Y(i,:) represents a single observation of the transformed GMM data
0010 %            point X: Y(i,1:q)=X(i,1:p)*A'+B'. If A and B are omitted and q=p, then Y(i,:)=X(i,:).
0011 %   M(k,p) = mixture means for x(p)
0012 %   V(k,p) or V(p,p,k) variances (diagonal or full) for x(p)
0013 %   W(k,1) = weights (must sum to 1)
0014 %   A(q,p) = transformation: y=x*A'+ B' (where y and x are row vectors).
0015 %   B(q,1)   If A is omitted or null, y=x*I(B,:)' where I is the identity matrix.
0016 %            If B is also omitted or null, y=x*I(1:q,:)'.
0017 %   Note that most commonly, q=p and A and B are omitted entirely.
0018 %
0019 % Outputs
0020 %
0021 %  LP(n,1) = log probability of each data point
0022 %  RP(n,k) = relative probability of each mixture
0023 %  KH(n,1) = highest probability mixture
0024 %  KP(n,1) = relative probability of highest probability mixture
0025 %
0026 % See also: v_gaussmix, v_gaussmixd, v_gaussmixg, v_randvec
0027 
0028 %      Copyright (C) Mike Brookes 2000-2009
0029 %      Version: $Id: v_gaussmixp.m 10865 2018-09-21 17:22:45Z dmb $
0030 %
0031 %   VOICEBOX is a MATLAB toolbox for speech processing.
0032 %   Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
0033 %
0034 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0035 %   This program is free software; you can redistribute it and/or modify
0036 %   it under the terms of the GNU General Public License as published by
0037 %   the Free Software Foundation; either version 2 of the License, or
0038 %   (at your option) any later version.
0039 %
0040 %   This program is distributed in the hope that it will be useful,
0041 %   but WITHOUT ANY WARRANTY; without even the implied warranty of
0042 %   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0043 %   GNU General Public License for more details.
0044 %
0045 %   You can obtain a copy of the GNU General Public License from
0046 %   http://www.gnu.org/copyleft/gpl.html or by writing to
0047 %   Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
0048 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0049 [k,p]=size(m);
0050 [n,q]=size(y);
0051 if isempty(y)                       % no y data supplied; we must calculate q
0052     if nargin<=4 || (nargin==5 && isempty(a)) || (nargin>=6 && isempty(a) && isempty(b))
0053         q=p;                        % no transformation
0054     elseif ~isempty(a)              % y=x*a' or y=x*a'+b'
0055         q=size(a,1);
0056     else
0057         q=length(b);                % b contains a list of dimensions to extract
0058     end
0059 end
0060 if nargin<4                         % w not supplied; assume uniform weights
0061     w=repmat(1/k,k,1);
0062     if nargin<3
0063         v=ones(k,p);                % v not supplied assume identity covariance matrix
0064     end
0065 end
0066 fv=ndims(v)>2 || size(v,1)>k;       % fv=1 if full covariance matrix is supplied
0067 if nargin>4 && ~isempty(a)          % a is specified so need to transform the data
0068     if nargin<6 || isempty(b)
0069         m=m*a';                     % no offset b specified
0070     else
0071         m=m*a'+repmat(b',k,1);      % offset b is specified
0072     end
0073     v1=v;                           % save the original covariance matrix array
0074     v=zeros(q,q,k);                 % create new full covariance matrix array
0075     if fv
0076         for ik=1:k
0077             v(:,:,ik)=a*v1(:,:,ik)*a';
0078         end
0079     elseif all(sum(a~=0,2)==1)      % a does not disturb diagonality
0080         for ik=1:k
0081             v(ik,:)=v1(ik,:)*(a.^2)';
0082         end
0083     else
0084         for ik=1:k
0085             v(:,:,ik)=(a.*repmat(v1(ik,:),q,1))*a';
0086         end
0087         fv=true;                    % now we definitely have a full covariance matrix
0088     end
0089 elseif q<p || nargin>4              % a is unspecified but need to select coefficient subset
0090     if nargin<6 || isempty(b)       % b not given; just use first q dimensions
0091         b=1:q;
0092     end
0093     m=m(:,b);
0094     if fv
0095         v=v(b,b,:);
0096     else
0097         v=v(:,b);
0098     end
0099 end
0100 
0101 memsize=v_voicebox('memsize');    % set memory size to use
0102 
0103 lp=zeros(n,1);
0104 rp=zeros(n,k);
0105 wk=ones(k,1);                           % index to replicate k times
0106 if n>0                                  % y data is supplied
0107     if ~fv                              % diagonal covariance
0108         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0109         % Diagonal Covariance matrices  %
0110         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0111 
0112         % If data size is large then do calculations in chunks
0113 
0114         nb=min(n,max(1,floor(memsize/(8*q*k))));    % chunk size for testing data points
0115         nl=ceil(n/nb);                  % number of chunks
0116         jx0=n-(nl-1)*nb;                % size of first chunk
0117         im=repmat((1:k)',nb,1);
0118         wnb=ones(1,nb);
0119         wnj=ones(1,jx0);
0120         vi=-0.5*v.^(-1);                % data-independent scale factor in exponent
0121         lvm=log(w)-0.5*sum(log(v),2);   % log of external scale factor (excluding -0.5*q*log(2pi) term)
0122 
0123         % first do partial chunk
0124 
0125         jx=jx0;
0126         ii=1:jx;
0127         kk=repmat(ii,k,1);
0128         km=repmat(1:k,1,jx);
0129         py=reshape(sum((y(kk(:),:)-m(km(:),:)).^2.*vi(km(:),:),2),k,jx)+lvm(:,wnj);
0130         mx=max(py,[],1);                % find normalizing factor for each data point to prevent underflow when using exp()
0131         px=exp(py-mx(wk,:));            % find normalized probability of each mixture for each datapoint
0132         ps=sum(px,1);                   % total normalized likelihood of each data point
0133         rp(ii,:)=(px./ps(wk,:))';                % relative mixture probabilities for each data point (columns sum to 1)
0134         lp(ii)=log(ps)+mx;
0135 
0136         for il=2:nl
0137             ix=jx+1;
0138             jx=jx+nb;                    % increment upper limit
0139             ii=ix:jx;
0140             kk=repmat(ii,k,1);
0141             py=reshape(sum((y(kk(:),:)-m(im,:)).^2.*vi(im,:),2),k,nb)+lvm(:,wnb);
0142             mx=max(py,[],1);                % find normalizing factor for each data point to prevent underflow when using exp()
0143             px=exp(py-mx(wk,:));            % find normalized probability of each mixture for each datapoint
0144             ps=sum(px,1);                   % total normalized likelihood of each data point
0145             rp(ii,:)=(px./ps(wk,:))';                % relative mixture probabilities for each data point (columns sum to 1)
0146             lp(ii)=log(ps)+mx;
0147         end
0148     else
0149         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0150         % Full Covariance matrices  %
0151         %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0152         pl=q*(q+1)/2;
0153         lix=1:q^2;
0154         cix=repmat(1:q,q,1);
0155         rix=cix';
0156         lix(cix>rix)=[];                                        % index of lower triangular elements
0157         lixi=zeros(q,q);
0158         lixi(lix)=1:pl;
0159         lixi=lixi';
0160         lixi(lix)=1:pl;                                        % reverse index to build full matrices
0161         vt=reshape(v,q^2,k);
0162         vt=vt(lix,:)';                                            % lower triangular in rows
0163 
0164         % If data size is large then do calculations in chunks
0165 
0166         nb=min(n,max(1,floor(memsize/(24*q*k))));    % chunk size for testing data points
0167         nl=ceil(n/nb);                  % number of chunks
0168         jx0=n-(nl-1)*nb;                % size of first chunk
0169         wnb=ones(1,nb);
0170         wnj=ones(1,jx0);
0171 
0172         vi=zeros(q*k,q);                    % stack of k inverse cov matrices each size q*q
0173         vim=zeros(q*k,1);                   % stack of k vectors of the form inv(vt)*m
0174         mtk=vim;                             % stack of k vectors of the form m
0175         lvm=zeros(k,1);
0176         wpk=repmat((1:q)',k,1);
0177 
0178         for ik=1:k
0179 
0180             % these lines added for debugging only
0181             %             vk=reshape(vt(k,lixi),q,q);
0182             %             condk(ik)=cond(vk);
0183             %%%%%%%%%%%%%%%%%%%%
0184             [uvk,dvk]=eig(reshape(vt(ik,lixi),q,q));      % convert lower triangular to full and find eigenvalues
0185             dvk=diag(dvk);
0186             if(any(dvk<=0))
0187                 error('Covariance matrix for mixture %d is not positive definite',ik);
0188             end
0189             vik=-0.5*uvk*diag(dvk.^(-1))*uvk';   % calculate inverse
0190             vi((ik-1)*q+(1:q),:)=vik;           % vi contains all mixture inverses stacked on top of each other
0191             vim((ik-1)*q+(1:q))=vik*m(ik,:)';   % vim contains vi*m for all mixtures stacked on top of each other
0192             mtk((ik-1)*q+(1:q))=m(ik,:)';       % mtk contains all mixture means stacked on top of each other
0193             lvm(ik)=log(w(ik))-0.5*sum(log(dvk));       % vm contains the weighted sqrt of det(vi) for each mixture
0194         end
0195         %
0196         %         % first do partial chunk
0197         %
0198         jx=jx0;
0199         ii=1:jx;
0200         xii=y(ii,:).';
0201         py=reshape(sum(reshape((vi*xii-vim(:,wnj)).*(xii(wpk,:)-mtk(:,wnj)),q,jx*k),1),k,jx)+lvm(:,wnj);
0202         mx=max(py,[],1);                % find normalizing factor for each data point to prevent underflow when using exp()
0203         px=exp(py-mx(wk,:));  % find normalized probability of each mixture for each datapoint
0204         ps=sum(px,1);                   % total normalized likelihood of each data point
0205         rp(ii,:)=(px./ps(wk,:))';                % relative mixture probabilities for each data point (columns sum to 1)
0206         lp(ii)=log(ps)+mx;
0207 
0208         for il=2:nl
0209             ix=jx+1;
0210             jx=jx+nb;        % increment upper limit
0211             ii=ix:jx;
0212             xii=y(ii,:).';
0213             py=reshape(sum(reshape((vi*xii-vim(:,wnb)).*(xii(wpk,:)-mtk(:,wnb)),q,nb*k),1),k,nb)+lvm(:,wnb);
0214             mx=max(py,[],1);                % find normalizing factor for each data point to prevent underflow when using exp()
0215             px=exp(py-mx(wk,:));  % find normalized probability of each mixture for each datapoint
0216             ps=sum(px,1);                   % total normalized likelihood of each data point
0217             rp(ii,:)=(px./ps(wk,:))';                % relative mixture probabilities for each data point (columns sum to 1)
0218             lp(ii)=log(ps)+mx;
0219         end
0220     end
0221     lp=lp-0.5*q*log(2*pi);
0222 end
0223 if nargout >2
0224     [kp,kh]=max(rp,[],2);
0225 end
0226 if ~nargout
0227     if q==1
0228         nxx=256;                        % grid size for 1D pdf plot
0229         if size(y,1)<2
0230             nsd=2;                      % number of std deviations
0231             sd=sqrt(v(:));
0232             xax=linspace(min(m-nsd*sd),max(m+nsd*sd),nxx);
0233         else
0234             xax=linspace(min(y),max(y),nxx);
0235         end
0236         plot(xax,v_gaussmixp(xax(:),m,v,w),'-b');
0237         xlabel('Parameter 1');
0238         ylabel('Log probability density');
0239         if n>0
0240             hold on
0241             plot(y,lp,'xr');
0242             hold off
0243         end
0244     else
0245         if q>2 % if dimension >2, take the first two dimensions only
0246             m=m(:,1:2);
0247             if fv
0248                 v=v(1:2,1:2,:);
0249             else
0250                 v=v(:,1:2);
0251             end
0252         end
0253         nxx=256;                        % grid size for 2D pdf plot
0254         if n<2                          % number of data points is 1 or 0
0255             nsd=2;                      % number of std deviations to plot
0256             if fv
0257                 sd=sqrt([v(1:4:end)' v(4:4:end)']); % extract diagonal elements from each mixture covariance
0258             else
0259                 sd=sqrt(v);
0260             end
0261             xax=linspace(min(m(:,1)-nsd*sd(:,1)),max(m(:,1)+nsd*sd(:,1)),nxx);
0262             yax=linspace(min(m(:,2)-nsd*sd(:,2)),max(m(:,2)+nsd*sd(:,2)),nxx);
0263         else
0264             yminmax=(eye(2)+0.1*[1 -1; -1 1])*[min(y,[],1);max(y,[],1)];
0265             xax=linspace(yminmax(1,1),yminmax(2,1),nxx);
0266             yax=linspace(yminmax(1,2),yminmax(2,2),nxx);
0267         end
0268         xx(:,:,1)=repmat(xax',1,nxx);
0269         xx(:,:,2)=repmat(yax,nxx,1);
0270         imagesc(xax,yax,reshape(gaussmixp(reshape(xx,nxx^2,2),m,v,w),nxx,nxx)');
0271         axis 'xy';
0272         colorbar;
0273         xlabel('Parameter 1');
0274         ylabel('Parameter 2');
0275         v_cblabel('Log probability density');
0276         if n>0                              % if y data is supplied
0277             hold on
0278             cmap=colormap;
0279             clim=get(gca,'CLim');           % get colourmap limits
0280             msk=lp>clim*[0.5; 0.5];
0281             if any(msk)
0282                 plot(y(msk,1),y(msk,2),'x','markeredgecolor',cmap(1,:));
0283             end
0284             if any(~msk)
0285                 plot(y(~msk,1),y(~msk,2),'x','markeredgecolor',cmap(64,:));
0286             end
0287             hold off
0288         end
0289     end
0290 end

Generated by m2html © 2003