Mathematica

adapted from Yeast_and_Human_Cell_Cycles.nb.txt

Matlab

source

Python

source
(* Load libraries and prepare session *)

<< LinearAlgebra`MatrixManipulation`;
<< NumericalMath`TrigFit`;
<< Graphics`Graphics`;
<< Graphics`Arrow`;
Off[General::"spell"]; 
Off[General::"spell1"];

(* Read Yeast Data *)
stream = "/home/lom/tmp/doublecheck_gsvd/Yeast.txt";
matrix = ReadList[stream, Word, RecordLists -> True, NullWords -> True];
{genes, arrays} = Dimensions[matrix] - {2, 6}
Clear[stream];

genenames = TakeRows[
      TakeColumns[matrix, {1, 6}],
      {3, genes + 2}];
arraynames = TakeColumns[
      TakeRows[matrix, {1, 2}],
      {7, arrays + 6}];
matrix = TakeColumns[
      TakeRows[matrix, {3, genes + 2}],
      {7, arrays + 6}];
matrix = ToExpression[matrix];

sizes = Flatten[
      Table[
        Dimensions[
          Characters[
            ToString[arraynames[[2, a]]
              ]]],
        {a, 1, arrays}]];
size = Sort[sizes, OrderedQ[{#2, #1}] &][[1]];
Do[
    Do[arraynames[[2, a]] = StringJoin[ToString[arraynames[[2, a]]], " "],
      {b, 1, size - sizes[[a]]}],
    {a, 1, arrays}];



(* Count and locate Null Data *)
counter = Table[Dimensions[Position[matrix[[a]], Null]][[1]], {a, 1, genes}];

Clear[positions];
positions = Table[0, {a, 1, arrays + 1}]
Do[
    positions[[a]] = Flatten[
        Position[Flatten[counter], a - 1]],
    {a, 1, arrays + 1}];
numbers = Flatten[
    Table[
      Dimensions[positions[[a]]],
      {a, 1, Round[arrays*0.2]}]]


(* Create Display Of Gene Position Of Null Data *)
framex = Table[{a, a - 1}, {a, 1, Round[arrays*0.2]}];
framey = {500, 1000, 1500, 2000, 2500, 3000};
labelx = ColumnForm[{"Number of Arrays"}, Center];
labely = ColumnForm[{"Number of Genes"}, Center];
g = BarChart[numbers,
      Frame -> True,
      Axes -> False,
      FrameLabel -> {labelx, labely, None, None},
      FrameTicks -> {framex, framey, None, None},
      GridLines -> {None, None},
      PlotRange -> {{0.5, Round[arrays*0.2] + 0.5}, {0, 3000}},
      DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
      Text[labely, {b_, c_}, {1., 0.}] ->
        Text[labely, {b - 0.75, c}, {0, 0}, {0, 1}];
g[[1, 2]] = g[[1, 2]] /.
      Text[labelx, {b_, c_}, {0., 1.}] ->
        Text[labelx, {b, c - 400}, {0, 1}, {1, 0}];
g[[1, 2]] = g[[1, 2]] /.
      Text[a_, {b_, c_}, {0., 1.}] ->
        Text[a, {b, c - 200}, {0, 0}, {0, 1}];

Show[g, PlotRange -> All]

(* Select Genes with no Missing Data Points *)
matrix = AppendRows[Table[{counter[[a]]}, {a, 1, genes}], genenames, matrix];
matrix = Sort[matrix, OrderedQ[{#1, #2}] &];
fullgenenames = TakeColumns[
      TakeRows[matrix, {1, numbers[[1]]}],
      {2, 7}];
fullmatrix = TakeColumns[
      TakeRows[matrix, {1, numbers[[1]]}],
      {8, arrays + 7}];

(* Calculate SVD *)
{eigenarrays, eigenexpressions, eigengenes} = SingularValues[fullmatrix];
eigenarrays = Transpose[eigenarrays];
fractions = eigenexpressions^2/Sum[eigenexpressions[[a]]^2, {a, 1, arrays}]
entropy = -N[
      Sum[fractions[[a]]*Log[fractions[[a]]], {a, 1, arrays}]/Log[arrays]]
entropy = N[Round[100*entropy]/100]

(* Create Fractions Bar Charts Displays *)
limit = 0.01;

Clear[gridx, framex, framey, sizes];
gridx = Table[a, {a, 0, limit, N[limit/5]}];
framex = gridx;
sizes = Flatten[
      Table[
        Dimensions[
          Characters[
            ToString[framex[[a]]
              ]]], {a, 1, 6}]];
Do[
    Do[framex[[a]] = StringJoin[ToString[framex[[a]]], " "],
      {b, 1, 5 - sizes[[a]]}],
    {a, 1, 6}];
framex = Table[{gridx[[a]], framex[[a]]}, {a, 1, 6}];
gridx = Table[{gridx[[a]], RGBColor[0, 0, 0]}, {a, 1, 6}];
framey = Table[{a + 1, arrays - a - 6}, {a, 0, 12 - 3}];
table = Table[fractions[[arrays - a]], {a, 6, arrays - 3}];

g = BarChart[table,
      BarOrientation -> Horizontal,
      PlotRange -> {{0, limit*1.0001}, {0.5, 12 - 2 + 0.5}},
      AspectRatio -> 1,
      Axes -> False,
      Frame -> True,
      FrameTicks -> {None, framey, framex, None},
      FrameLabel -> {None, None, None, None},
      GridLines -> {gridx, None},
      DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
      Text[a_, {b_, c_}, {0., -1.}] ->
        Text[a, {b, c + 1.75}, {0, 0}, {0, 1}];
g1 = Show[g,
      AspectRatio -> 1.25,
      PlotRange -> All,
      DisplayFunction -> Identity];

g = BarChart[table,
      BarOrientation -> Horizontal,
      PlotRange -> {{0, limit*1.0001}, {0.5, 12 - 2 + 0.5}},
      AspectRatio -> 1,
      Axes -> False,
      Frame -> True,
      FrameTicks -> {None, framey, framex, None},
      FrameLabel -> {None, None, None, None},
      GridLines -> {gridx, None},
      DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
      Text[a_, {b_, c_}, {0., -1.}] ->
        Text[a, {b, c + 1.75}, {0, 0}, {0, 1}];
g1 = Show[g,
      AspectRatio -> 1.25,
      PlotRange -> All,
      DisplayFunction -> Identity];

(* Red & Green Raster Display *)
contrast = 3.5;
displaying = Table[
      If[contrast*eigengenes[[i, j]] > 0,
        If[
          contrast*eigengenes[[i, j]] < 1, {contrast*eigengenes[[i, j]], 
            0}, {1, 0}],
        If[
          contrast*eigengenes[[i, j]] > -1, {0, -contrast*
              eigengenes[[i, j]]}, {0, 1}]],
      {i, 1, arrays}, {j, 1, arrays}];
framex = Table[{a - 0.5, arraynames[[2, a]]}, {a, 1, arrays}];
framey = Table[{a + 1 - 0.5, arrays - a}, {a, 0, arrays - 1}];
labely = "Eigengenes";
labelx = ColumnForm[{"(a) Arrays", " ", " "}, Center];

g = Show[
      Graphics[
        RasterArray[
          Table[
            RGBColor[displaying[[i, j, 1]], displaying[[i, j, 2]], 0],
            {i, arrays, 1, -1}, {j, 1, arrays}]]],
      AspectRatio -> 1,
      Frame -> True,
      FrameTicks -> {None, framey, framex, None},
      FrameLabel -> {None, labely, labelx, None},
      DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
      Text[labely, {b_, c_}, {1., 0.}] ->
        Text[labely, {b - 3, c}, {0, 0}, {0, 1}];
g[[1, 2]] = g[[1, 2]] /.
      Text[labelx, {b_, c_}, {0., -1.}] ->
        Text[labelx, {b, c + 3}, {0, -1}, {1, 0}];
g[[1, 2]] = g[[1, 2]] /.
      Text[a_, {b_, c_}, {0., -1.}] ->
        Text[a, {b, c + 2.7}, {0, 0}, {0, 1}];
g1 = Show[g,
      AspectRatio -> 1.05,
      PlotRange -> All,
      DisplayFunction -> Identity];


(* Create Selected Eigengenes Graph Display *)

labelx = ColumnForm[{"(c) Arrays"}, Center];
labely = ColumnForm[{" ", "Expression Level"}, Center];
framex = Table[{a - 1, arraynames[[2, a]]}, {a, 1, arrays}];
framey = {-1, -0.5, 0, 0.5, 1};
coordinates = Table[{a - 1, eigengenes[[1, a]]}, {a, 1, arrays}];
points = Table[Point[coordinates[[a]]], {a, 1, arrays}];
line = Line[coordinates];
g = Show[
      {Graphics[{RGBColor[1, 0, 0], PointSize[0.022], points}],
        Graphics[{RGBColor[1, 0, 0], line}]},
      Frame -> True,
      FrameLabel -> {None, labely, labelx, None},
      GridLines -> {None, {{0, RGBColor[0, 0, 0]}}},
      FrameTicks -> {None, framey, framex, None},
      PlotRange -> {-1.05, 1.05},
      DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
      Text[labely, {b_, c_}, {1., 0.}] ->
        Text[labely, {b - 5.4, c}, {0, 0}, {0, 1}];
g[[1, 2]] = g[[1, 2]] /.
      Text[labelx, {b_, c_}, {0., -1.}] ->
        Text[labelx, {b, c + 0.625}, {0, -1}, {1, 0}];
g[[1, 2]] = g[[1, 2]] /.
      Text[a_, {b_, c_}, {0., -1.}] ->
        Text[a, {b, c + 0.25}, {0, 0}, {0, 1}];
p1 = Show[g,
      AspectRatio -> 1.05,
      PlotRange -> All,
      DisplayFunction -> Identity];



labelx = ColumnForm[{"(c) Arrays"}, Center];
labely = ColumnForm[{" ", "Expression Level"}, Center];
framex = Table[{a - 1, arraynames[[2, a]]}, {a, 1, arrays}];
framey = {-1, -0.5, 0, 0.5, 1};
coordinates = Table[{a - 1, eigengenes[[2, a]]}, {a, 1, arrays}];
points = Table[Point[coordinates[[a]]], {a, 1, arrays}];
line = Line[coordinates];
g = Show[
      {Graphics[{RGBColor[0, 0, 1], PointSize[0.022], points}],
        Graphics[{RGBColor[0, 0, 1], line}]},
      Frame -> True,
      FrameLabel -> {None, labely, labelx, None},
      GridLines -> {None, {{0, RGBColor[0, 0, 0]}}},
      FrameTicks -> {None, framey, framex, None},
      PlotRange -> {-1.05, 1.05},
      DisplayFunction -> Identity];
g = FullGraphics[g];
g[[1, 2]] = g[[1, 2]] /.
      Text[labely, {b_, c_}, {1., 0.}] ->
        Text[labely, {b - 5.4, c}, {0, 0}, {0, 1}];
g[[1, 2]] = g[[1, 2]] /.
      Text[labelx, {b_, c_}, {0., -1.}] ->
        Text[labelx, {b, c + 0.625}, {0, -1}, {1, 0}];
g[[1, 2]] = g[[1, 2]] /.
      Text[a_, {b_, c_}, {0., -1.}] ->
        Text[a, {b, c + 0.25}, {0, 0}, {0, 1}];
p2 = Show[g,
      AspectRatio -> 1.05,
      PlotRange -> All,
      DisplayFunction -> Identity];

(* Display Selected Eigengenes *)
g3 = Show[{p2, p1},
    DisplayFunction -> Identity]

(* Display Eigengenes, Fractions and Selected Eigengenes *)
Show[GraphicsArray[{g1, g2, g3}],
  GraphicsSpacing -> -0.15]
%clear all previous variables
clear; format compact







%load the yeast data
load /home/lom/tmp/doublecheck_gsvd/yeast.mat
[nGenes, nExps] = size(data)





























%Count and locate NaN Data
nNanPerExp=zeros(nExps,1);
for i=0:nExps-1
  nNanPerExp(i+1) = (sum(sum(isnan(data),2)==i));
end










%Create and Display nummer of missing values
bar(0:nExps-1 ,nNanPerExp); axis([-.5 3.5 0 3000])
xlabel('Number of Arrays')
ylabel('Number of Genes')






















%Take the only rows without missing values
idx = sum(isnan(data),2)==0;
fullmatrix = data(idx,:);
fullgeneNames = geneNames(idx,:);






%Calculate SVD
[U S V] = svd(fullmatrix, 0);
S=diag(S)'
fractions = S.^2/sum(S.^2);
entropy = -sum(fractions.*log(fractions))/log(nExps)



%plot barchart
subplot(1,3,2) ; barh([1:nExps],fractions, 'r')
title('(b) Expresion Fraction')
axes('position', [.45 .3 .16 .62])
barh([3:12], fractions(3:12), 'r')
set(gca, 'Color', [1 1 .5])





















































%plot rasterplot
subplot(1,3,1); 
expNames(expNames=='_')=' ';
rasterplot(V', expNames)
title('(a) Arrays')
colormap(redgreen)
xticklabel_rotate




































%plot two first eigengenes
subplot(1,3,3);
plot(V(:,1:2), '-o');
title('(c) Arrays')
ylabel('Expression Level')
set(gca, 'XTick', 1:nExps, 'XTickLabel', expNames)
xticklabel_rotate

#import required tools
from Numeric import *
from pylab import *
from scipy.linalg import *

NONUMB = -999999.999;
nGeneAnots=6


#Read file
f = open('/home/lom/tmp/doublecheck_gsvd/Yeast.txt')
firstLine=f.readline()
lines=f.readlines()
firstLine = firstLine.strip().split('\t')

expNames = array(firstLine[nGeneAnots:], PyObject)
nGenes = len(lines)
nExps = len(expNames)
data=ones((nGenes, nExps), 'f')*NONUMB
geneNames=zeros(nGenes, PyObject)

i=0;
for line in lines:
    line = line.strip()
    cols = line.split('\t')
    geneNames[i] = cols[0]#cols[0:nGeneAnots]
    for j in range(nGeneAnots, len(cols)):
        if cols[j]!='':
            data[i][j-nGeneAnots]=float(cols[j])
    i=i+1
f.close()










#Count NaNs
nNanPerExp=zeros([nExps,1]);
for i in range(nExps):
    nNanPerExp[i] = sum(asarray(sum(asarray(data==NONUMB, 'i'),1)==i, 'i'))











#Create and Display number of missing values
bar(arange(nExps)-.5, nNanPerExp); axis([-.5, 3.5, 0, 3000])
xlabel('Number of Arrays')
ylabel('Number of Genes')
show()





















#Take the only rows without missing values
idx = nonzero(sum(data!=NONUMB, 1)==nExps)
fullmatrix = take(data, idx)
fullgeneNames=take(geneNames, idx)






#Calculate SVD
[U, S, Vt] = svd(fullmatrix);
fractions = S**2/sum(S**2);
entropy = -sum(fractions*log(fractions))/log(nExps)




#Plot barchart
subplot(1,3,2); bar(range(nExps),fractions)
title('(b) Expresion Fraction')
axes([.45, .25, .16, .62])
bar(range(3,12), fractions[2:11])






















































#Plot rasterplot
subplot(1,3,1);
imshow(Vt, cmap=jet(), interpolation='nearest', vmin=-.3, vmax=.3);
title('(a) Arrays')







































#plot two first eigengenes
subplot(1,3,3);
plot(range(nExps), Vt[0,:], '-ob',range(nExps), Vt[1,:], '-or' );
axis([0, nExps, -1, 1])
title('(c) Arrays')
ylabel('Expression Level')
show()