(* Load libraries and prepare session *)
<< LinearAlgebra`MatrixManipulation`;
<< NumericalMath`TrigFit`;
<< Graphics`Graphics`;
<< Graphics`Arrow`;
Off[General::"spell"];
Off[General::"spell1"];
(* Read Yeast Data *)
stream = "/home/lom/tmp/doublecheck_gsvd/Yeast.txt";
matrix = ReadList[stream, Word, RecordLists -> True, NullWords -> True];
{genes, arrays} = Dimensions[matrix] - {2, 6}
Clear[stream];
genenames = TakeRows[
TakeColumns[matrix, {1, 6}],
{3, genes + 2}];
arraynames = TakeColumns[
TakeRows[matrix, {1, 2}],
{7, arrays + 6}];
matrix = TakeColumns[
TakeRows[matrix, {3, genes + 2}],
{7, arrays + 6}];
matrix = ToExpression[matrix];
sizes = Flatten[
Table[
Dimensions[
Characters[
ToString[arraynames[[2, a]]
]]],
{a, 1, arrays}]];
size = Sort[sizes, OrderedQ[{#2, #1}] &][[1]];
Do[
Do[arraynames[[2, a]] = StringJoin[ToString[arraynames[[2, a]]], " "],
{b, 1, size - sizes[[a]]}],
{a, 1, arrays}];
(* Count and locate Null Data *)
counter = Table[Dimensions[Position[matrix[[a]], Null]][[1]], {a, 1, genes}];
Clear[positions];
positions = Table[0, {a, 1, arrays + 1}]
Do[
positions[[a]] = Flatten[
Position[Flatten[counter], a - 1]],
{a, 1, arrays + 1}];
numbers = Flatten[
Table[
Dimensions[positions[[a]]],
{a, 1, Round[arrays*0.2]}]]
(* Create Display Of Gene Position Of Null Data *)
framex = Table[{a, a - 1}, {a, 1, Round[arrays*0.2]}];
framey = {500, 1000, 1500, 2000, 2500, 3000};
labelx = ColumnForm[{"Number of Arrays"}, Center];
labely = ColumnForm[{"Number of Genes"}, Center];
g = BarChart[numbers,
Frame -> True,
Axes -> False,
FrameLabel -> {labelx, labely, None, None},
FrameTicks -> {framex, framey, None, None},
GridLines -> {None, None},
PlotRange -> {{0.5, Round[arrays*0.2] + 0.5}, {0, 3000}},
DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
Text[labely, {b_, c_}, {1., 0.}] ->
Text[labely, {b - 0.75, c}, {0, 0}, {0, 1}];
g[[1, 2]] = g[[1, 2]] /.
Text[labelx, {b_, c_}, {0., 1.}] ->
Text[labelx, {b, c - 400}, {0, 1}, {1, 0}];
g[[1, 2]] = g[[1, 2]] /.
Text[a_, {b_, c_}, {0., 1.}] ->
Text[a, {b, c - 200}, {0, 0}, {0, 1}];
Show[g, PlotRange -> All]
(* Select Genes with no Missing Data Points *)
matrix = AppendRows[Table[{counter[[a]]}, {a, 1, genes}], genenames, matrix];
matrix = Sort[matrix, OrderedQ[{#1, #2}] &];
fullgenenames = TakeColumns[
TakeRows[matrix, {1, numbers[[1]]}],
{2, 7}];
fullmatrix = TakeColumns[
TakeRows[matrix, {1, numbers[[1]]}],
{8, arrays + 7}];
(* Calculate SVD *)
{eigenarrays, eigenexpressions, eigengenes} = SingularValues[fullmatrix];
eigenarrays = Transpose[eigenarrays];
fractions = eigenexpressions^2/Sum[eigenexpressions[[a]]^2, {a, 1, arrays}]
entropy = -N[
Sum[fractions[[a]]*Log[fractions[[a]]], {a, 1, arrays}]/Log[arrays]]
entropy = N[Round[100*entropy]/100]
(* Create Fractions Bar Charts Displays *)
limit = 0.01;
Clear[gridx, framex, framey, sizes];
gridx = Table[a, {a, 0, limit, N[limit/5]}];
framex = gridx;
sizes = Flatten[
Table[
Dimensions[
Characters[
ToString[framex[[a]]
]]], {a, 1, 6}]];
Do[
Do[framex[[a]] = StringJoin[ToString[framex[[a]]], " "],
{b, 1, 5 - sizes[[a]]}],
{a, 1, 6}];
framex = Table[{gridx[[a]], framex[[a]]}, {a, 1, 6}];
gridx = Table[{gridx[[a]], RGBColor[0, 0, 0]}, {a, 1, 6}];
framey = Table[{a + 1, arrays - a - 6}, {a, 0, 12 - 3}];
table = Table[fractions[[arrays - a]], {a, 6, arrays - 3}];
g = BarChart[table,
BarOrientation -> Horizontal,
PlotRange -> {{0, limit*1.0001}, {0.5, 12 - 2 + 0.5}},
AspectRatio -> 1,
Axes -> False,
Frame -> True,
FrameTicks -> {None, framey, framex, None},
FrameLabel -> {None, None, None, None},
GridLines -> {gridx, None},
DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
Text[a_, {b_, c_}, {0., -1.}] ->
Text[a, {b, c + 1.75}, {0, 0}, {0, 1}];
g1 = Show[g,
AspectRatio -> 1.25,
PlotRange -> All,
DisplayFunction -> Identity];
g = BarChart[table,
BarOrientation -> Horizontal,
PlotRange -> {{0, limit*1.0001}, {0.5, 12 - 2 + 0.5}},
AspectRatio -> 1,
Axes -> False,
Frame -> True,
FrameTicks -> {None, framey, framex, None},
FrameLabel -> {None, None, None, None},
GridLines -> {gridx, None},
DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
Text[a_, {b_, c_}, {0., -1.}] ->
Text[a, {b, c + 1.75}, {0, 0}, {0, 1}];
g1 = Show[g,
AspectRatio -> 1.25,
PlotRange -> All,
DisplayFunction -> Identity];
(* Red & Green Raster Display *)
contrast = 3.5;
displaying = Table[
If[contrast*eigengenes[[i, j]] > 0,
If[
contrast*eigengenes[[i, j]] < 1, {contrast*eigengenes[[i, j]],
0}, {1, 0}],
If[
contrast*eigengenes[[i, j]] > -1, {0, -contrast*
eigengenes[[i, j]]}, {0, 1}]],
{i, 1, arrays}, {j, 1, arrays}];
framex = Table[{a - 0.5, arraynames[[2, a]]}, {a, 1, arrays}];
framey = Table[{a + 1 - 0.5, arrays - a}, {a, 0, arrays - 1}];
labely = "Eigengenes";
labelx = ColumnForm[{"(a) Arrays", " ", " "}, Center];
g = Show[
Graphics[
RasterArray[
Table[
RGBColor[displaying[[i, j, 1]], displaying[[i, j, 2]], 0],
{i, arrays, 1, -1}, {j, 1, arrays}]]],
AspectRatio -> 1,
Frame -> True,
FrameTicks -> {None, framey, framex, None},
FrameLabel -> {None, labely, labelx, None},
DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
Text[labely, {b_, c_}, {1., 0.}] ->
Text[labely, {b - 3, c}, {0, 0}, {0, 1}];
g[[1, 2]] = g[[1, 2]] /.
Text[labelx, {b_, c_}, {0., -1.}] ->
Text[labelx, {b, c + 3}, {0, -1}, {1, 0}];
g[[1, 2]] = g[[1, 2]] /.
Text[a_, {b_, c_}, {0., -1.}] ->
Text[a, {b, c + 2.7}, {0, 0}, {0, 1}];
g1 = Show[g,
AspectRatio -> 1.05,
PlotRange -> All,
DisplayFunction -> Identity];
(* Create Selected Eigengenes Graph Display *)
labelx = ColumnForm[{"(c) Arrays"}, Center];
labely = ColumnForm[{" ", "Expression Level"}, Center];
framex = Table[{a - 1, arraynames[[2, a]]}, {a, 1, arrays}];
framey = {-1, -0.5, 0, 0.5, 1};
coordinates = Table[{a - 1, eigengenes[[1, a]]}, {a, 1, arrays}];
points = Table[Point[coordinates[[a]]], {a, 1, arrays}];
line = Line[coordinates];
g = Show[
{Graphics[{RGBColor[1, 0, 0], PointSize[0.022], points}],
Graphics[{RGBColor[1, 0, 0], line}]},
Frame -> True,
FrameLabel -> {None, labely, labelx, None},
GridLines -> {None, {{0, RGBColor[0, 0, 0]}}},
FrameTicks -> {None, framey, framex, None},
PlotRange -> {-1.05, 1.05},
DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
Text[labely, {b_, c_}, {1., 0.}] ->
Text[labely, {b - 5.4, c}, {0, 0}, {0, 1}];
g[[1, 2]] = g[[1, 2]] /.
Text[labelx, {b_, c_}, {0., -1.}] ->
Text[labelx, {b, c + 0.625}, {0, -1}, {1, 0}];
g[[1, 2]] = g[[1, 2]] /.
Text[a_, {b_, c_}, {0., -1.}] ->
Text[a, {b, c + 0.25}, {0, 0}, {0, 1}];
p1 = Show[g,
AspectRatio -> 1.05,
PlotRange -> All,
DisplayFunction -> Identity];
labelx = ColumnForm[{"(c) Arrays"}, Center];
labely = ColumnForm[{" ", "Expression Level"}, Center];
framex = Table[{a - 1, arraynames[[2, a]]}, {a, 1, arrays}];
framey = {-1, -0.5, 0, 0.5, 1};
coordinates = Table[{a - 1, eigengenes[[2, a]]}, {a, 1, arrays}];
points = Table[Point[coordinates[[a]]], {a, 1, arrays}];
line = Line[coordinates];
g = Show[
{Graphics[{RGBColor[0, 0, 1], PointSize[0.022], points}],
Graphics[{RGBColor[0, 0, 1], line}]},
Frame -> True,
FrameLabel -> {None, labely, labelx, None},
GridLines -> {None, {{0, RGBColor[0, 0, 0]}}},
FrameTicks -> {None, framey, framex, None},
PlotRange -> {-1.05, 1.05},
DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
Text[labely, {b_, c_}, {1., 0.}] ->
Text[labely, {b - 5.4, c}, {0, 0}, {0, 1}];
g[[1, 2]] = g[[1, 2]] /.
Text[labelx, {b_, c_}, {0., -1.}] ->
Text[labelx, {b, c + 0.625}, {0, -1}, {1, 0}];
g[[1, 2]] = g[[1, 2]] /.
Text[a_, {b_, c_}, {0., -1.}] ->
Text[a, {b, c + 0.25}, {0, 0}, {0, 1}];
p2 = Show[g,
AspectRatio -> 1.05,
PlotRange -> All,
DisplayFunction -> Identity];
(* Display Selected Eigengenes *)
g3 = Show[{p2, p1},
DisplayFunction -> Identity]
(* Display Eigengenes, Fractions and Selected Eigengenes *)
Show[GraphicsArray[{g1, g2, g3}],
GraphicsSpacing -> -0.15]
|
%clear all previous variables
clear; format compact
%load the yeast data
load /home/lom/tmp/doublecheck_gsvd/yeast.mat
[nGenes, nExps] = size(data)
%Count and locate NaN Data
nNanPerExp=zeros(nExps,1);
for i=0:nExps-1
nNanPerExp(i+1) = (sum(sum(isnan(data),2)==i));
end
%Create and Display nummer of missing values
bar(0:nExps-1 ,nNanPerExp); axis([-.5 3.5 0 3000])
xlabel('Number of Arrays')
ylabel('Number of Genes')
%Take the only rows without missing values
idx = sum(isnan(data),2)==0;
fullmatrix = data(idx,:);
fullgeneNames = geneNames(idx,:);
%Calculate SVD
[U S V] = svd(fullmatrix, 0);
S=diag(S)'
fractions = S.^2/sum(S.^2);
entropy = -sum(fractions.*log(fractions))/log(nExps)
%plot barchart
subplot(1,3,2) ; barh([1:nExps],fractions, 'r')
title('(b) Expresion Fraction')
axes('position', [.45 .3 .16 .62])
barh([3:12], fractions(3:12), 'r')
set(gca, 'Color', [1 1 .5])
%plot rasterplot
subplot(1,3,1);
expNames(expNames=='_')=' ';
rasterplot(V', expNames)
title('(a) Arrays')
colormap(redgreen)
xticklabel_rotate
%plot two first eigengenes
subplot(1,3,3);
plot(V(:,1:2), '-o');
title('(c) Arrays')
ylabel('Expression Level')
set(gca, 'XTick', 1:nExps, 'XTickLabel', expNames)
xticklabel_rotate
|
#import required tools
from Numeric import *
from pylab import *
from scipy.linalg import *
NONUMB = -999999.999;
nGeneAnots=6
#Read file
f = open('/home/lom/tmp/doublecheck_gsvd/Yeast.txt')
firstLine=f.readline()
lines=f.readlines()
firstLine = firstLine.strip().split('\t')
expNames = array(firstLine[nGeneAnots:], PyObject)
nGenes = len(lines)
nExps = len(expNames)
data=ones((nGenes, nExps), 'f')*NONUMB
geneNames=zeros(nGenes, PyObject)
i=0;
for line in lines:
line = line.strip()
cols = line.split('\t')
geneNames[i] = cols[0]#cols[0:nGeneAnots]
for j in range(nGeneAnots, len(cols)):
if cols[j]!='':
data[i][j-nGeneAnots]=float(cols[j])
i=i+1
f.close()
#Count NaNs
nNanPerExp=zeros([nExps,1]);
for i in range(nExps):
nNanPerExp[i] = sum(asarray(sum(asarray(data==NONUMB, 'i'),1)==i, 'i'))
#Create and Display number of missing values
bar(arange(nExps)-.5, nNanPerExp); axis([-.5, 3.5, 0, 3000])
xlabel('Number of Arrays')
ylabel('Number of Genes')
show()
#Take the only rows without missing values
idx = nonzero(sum(data!=NONUMB, 1)==nExps)
fullmatrix = take(data, idx)
fullgeneNames=take(geneNames, idx)
#Calculate SVD
[U, S, Vt] = svd(fullmatrix);
fractions = S**2/sum(S**2);
entropy = -sum(fractions*log(fractions))/log(nExps)
#Plot barchart
subplot(1,3,2); bar(range(nExps),fractions)
title('(b) Expresion Fraction')
axes([.45, .25, .16, .62])
bar(range(3,12), fractions[2:11])
#Plot rasterplot
subplot(1,3,1);
imshow(Vt, cmap=jet(), interpolation='nearest', vmin=-.3, vmax=.3);
title('(a) Arrays')
#plot two first eigengenes
subplot(1,3,3);
plot(range(nExps), Vt[0,:], '-ob',range(nExps), Vt[1,:], '-or' );
axis([0, nExps, -1, 1])
title('(c) Arrays')
ylabel('Expression Level')
show()
|