next up previous index
Next: Nonuniform number generation Up: Minimal Spanning Tree Based Previous: Two-sample test   Index

Many -sample test

Example: Wine data As you may remember, this data was 14 physico-chemical composition varaibles (all continuous), that we are trying to relate to several categorical variables.

acp.vin <- acp.us(vin[, 1:14], q.choix = 2)
#Two components was the choice here
 plot(acp.vin$ind.cords, type = "n")
text(acp.vin$ind.cords[as.numeric(vin[, 15]) == 1,  ], "1")
text(acp.vin$ind.cords[as.numeric(vin[, 15]) == 2,  ], "2")
text(acp.vin$ind.cords[as.numeric(vin[, 15]) == 3,  ], "3")

mst.wine <- mstree(acp.vin$ind.cords, plane = F)
[1] 10 66  6 28  8  4  6 27  7  9  1 14 14 11 14 44 15 25 45 41 39 50 16 76 78
[26] 37 58  5 27 31 62  2 63 31 55 69 40 45 38 18 21 52 51 47 37 26 42 49 76 46
[51] 75 22 54 55 34 53 34 57 33 61 35 33 65 60 78 59 64 77 40 73 72 74 71 43 48
[76] 46 75

segments(acp.vin$ind.cords[seq(mst.wine), 1], acp.vin$ind.cords[seq(mst.wine), 
        2], acp.vin$ind.cords[mst.wine, 1], acp.vin$ind.cords[mst.wine, 2])

title("Wines Minimal Spanning Tree")
> sum(vin[mst.wine,15]==vin[-78,15])
[1] 44

randclass_function(S=1000,data=as.numeric(vin[-78,15]),
compar=as.numeric(vin[mst.wine,15])){
same_rep(0,S)
n_length(compar)
for (i in (1:S))
same[i]_sum(as.numeric(data==compar[sample(n,n)]))
return(same)
}
r1_randclass()
> max(r1)
[1] 39
> max(r2)
[1] 40
 hist(r1,nclass=50)

Matlab MSTREE algorithm:
this doesn't work but should be made to as part of the mstree project.

function out=mst(distm)
%Computes the minimum spanning tree
%form a matrix of distances between n objects
%
n=length(distm);
out=[-n*ones(1,n-1) 0];
distree=distm(n,:);
n1=(1:(n-1));
[a b vectd]=find(tril(distm));
[a b vecti]=find(tril(ones(n,1)* (1:n),-1));
for k =(1:(n-1))
%Find the nearest edge to the tree
    nottree=find(out<0);
    [distmin imin]=min(vectd(out<0));
%Find the index of the minimum
    imin=n1(distm==distmin);
    imin=imin(1);
%Adjoin a new edge
    out(imin)=-out(imin);
%Update list of nearest vertices
    for i = (1:(n-1)) 
      if (out(i)<0 & distm(i,imin)<distm(i,-out(i)))
      out(i)=-imin;
      end;
    end
end



Susan Holmes 2002-01-12