From b600a6b80ac9136d87ed2d84c22d879dd57f5546 Mon Sep 17 00:00:00 2001 From: rosieluo2021 Date: Mon, 14 Oct 2024 17:30:33 +0100 Subject: [PATCH] update overlap maps to symmetric maps --- .../analysis/compareXomicsModels.m | 150 +++++++------ .../analysis/plotOverlapResults.m | 206 +++++++----------- 2 files changed, 158 insertions(+), 198 deletions(-) diff --git a/src/dataIntegration/XomicsToModel/analysis/compareXomicsModels.m b/src/dataIntegration/XomicsToModel/analysis/compareXomicsModels.m index 6edf2abe73..d4e5a54952 100644 --- a/src/dataIntegration/XomicsToModel/analysis/compareXomicsModels.m +++ b/src/dataIntegration/XomicsToModel/analysis/compareXomicsModels.m @@ -25,7 +25,7 @@ % This function is used to compare generated models from xomicsToModel % pipeline % -%Author(s): - Xi Luo +%Author(s): - Xi Luo, update at 2024/10 % - Hanneke Leegwater (2022) %% Check input params @@ -36,89 +36,105 @@ error('printFlag should be a number or a bool') end -%% -models=multiModels; +%% Initialize +models = multiModels; + if isstruct(models) && length(fieldnames(models))>1 - fields=fieldnames(models); - statMets=cell(length(fields)); - statRxns=cell(length(fields)); - statGenes=cell(length(fields)); - for i=1:length(fields) - for j=1:length(fields) - %mets - overlap_mets.(fields{i}).(fields{j})=models.(fields{i}).mets(ismember(models.(fields{i}).mets,models.(fields{j}).mets)); - statMets{i,j}=sum(ismember(models.(fields{i}).mets,models.(fields{j}).mets)); - %rxns - overlap_rxns.(fields{i}).(fields{j})=models.(fields{i}).rxns(ismember(models.(fields{i}).rxns,models.(fields{j}).rxns)); - statRxns{i,j}=sum(ismember(models.(fields{i}).rxns,models.(fields{j}).rxns)); - %genes - overlap_genes.(fields{i}).(fields{j})=models.(fields{i}).genes(ismember(models.(fields{i}).genes,models.(fields{j}).genes)); - statGenes{i,j}=sum(ismember(models.(fields{i}).genes,models.(fields{j}).genes)); + fields = fieldnames(models); + numModels = length(fields); + + % Initialize matrices for statistics and results + statMets = cell(numModels); + statRxns = cell(numModels); + statGenes = cell(numModels); + overlap_mets = struct(); + overlap_rxns = struct(); + overlap_genes = struct(); + + % Loop over all model pairs to compute overlaps + for i = 1:numModels + for j = 1:numModels + % Mets overlap + overlap_mets.(fields{i}).(fields{j}) = models.(fields{i}).mets(ismember(models.(fields{i}).mets, models.(fields{j}).mets)); + statMets{i,j} = length(overlap_mets.(fields{i}).(fields{j})); + + % Rxns overlap + overlap_rxns.(fields{i}).(fields{j}) = models.(fields{i}).rxns(ismember(models.(fields{i}).rxns, models.(fields{j}).rxns)); + statRxns{i,j} = length(overlap_rxns.(fields{i}).(fields{j})); + + % Genes overlap + overlap_genes.(fields{i}).(fields{j}) = models.(fields{i}).genes(ismember(models.(fields{i}).genes, models.(fields{j}).genes)); + statGenes{i,j} = length(overlap_genes.(fields{i}).(fields{j})); end end - colname=cell2table(fields); - statMets=cell2table(statMets,"VariableNames",fields); - statMets=[colname statMets]; - statRxns=cell2table(statRxns,"VariableNames",fields); - statRxns=[colname statRxns]; - statGenes=cell2table(statGenes,"VariableNames",fields); - statGenes=[colname statGenes]; - %all overlapped - overlap_mets.alloverlap=models.(fields{1}).mets(ismember(models.(fields{1}).mets,models.(fields{2}).mets)); - overlap_rxns.alloverlap=models.(fields{1}).rxns(ismember(models.(fields{1}).rxns,models.(fields{2}).rxns)); - overlap_genes.alloverlap=models.(fields{1}).genes(ismember(models.(fields{1}).genes,models.(fields{2}).genes)); - for i=1:length(fields)-2 - overlap_mets.alloverlap=overlap_mets.alloverlap(ismember(overlap_mets.alloverlap,models.(fields{i+2}).mets)); - overlap_rxns.alloverlap=overlap_rxns.alloverlap(ismember(overlap_rxns.alloverlap,models.(fields{i+2}).rxns)); - overlap_genes.alloverlap=overlap_genes.alloverlap(ismember(overlap_genes.alloverlap,models.(fields{i+2}).genes)); - end - overlapResults.mets=overlap_mets; - overlapResults.rxns=overlap_rxns; - overlapResults.genes=overlap_genes; + % Convert statistics to tables + statMets = cell2table(statMets, 'VariableNames', fields, 'RowNames', fields); + statRxns = cell2table(statRxns, 'VariableNames', fields, 'RowNames', fields); + statGenes = cell2table(statGenes, 'VariableNames', fields, 'RowNames', fields); + + % Store results in output structure + overlapResults.mets = overlap_mets; + overlapResults.rxns = overlap_rxns; + overlapResults.genes = overlap_genes; + % Calculate all overlap across all models + overlapResults.mets.alloverlap = intersectMultipleModels(models, fields, 'mets'); + overlapResults.rxns.alloverlap = intersectMultipleModels(models, fields, 'rxns'); + overlapResults.genes.alloverlap = intersectMultipleModels(models, fields, 'genes'); + statistic.overlapnumber_mets=statMets; statistic.overlapnumber_rxns=statRxns; statistic.overlapnumber_genes=statGenes; - %met proportion - metsdata=statistic.overlapnumber_mets{:,2:end}; - [max_a,index]=max(metsdata); - xa=repmat(max_a',[1 length(statistic.overlapnumber_mets{:,1})]); - pro=round(metsdata./xa*100,2); - pro=num2cell(pro); - pro=cell2table(pro,"VariableNames",fields); - pro=[colname pro]; - statistic.overlaporportion_mets=pro; - %rxns propotion - metsdata=statistic.overlapnumber_rxns{:,2:end}; - [max_a,index]=max(metsdata); - xa=repmat(max_a',[1 length(statistic.overlapnumber_rxns{:,1})]); - pro=round(metsdata./xa*100,2); - pro=num2cell(pro); - pro=cell2table(pro,"VariableNames",fields); - pro=[colname pro]; - statistic.overlaporportion_rxns=pro; - %gene propotion - metsdata=statistic.overlapnumber_genes{:,2:end}; - [max_a,index]=max(metsdata); - xa=repmat(max_a',[1 length(statistic.overlapnumber_genes{:,1})]); - pro=round(metsdata./xa*100,2); - pro=num2cell(pro); - pro=cell2table(pro,"VariableNames",fields); - pro=[colname pro]; - statistic.overlaporportion_genes=pro; + + % Calculate symmetric proportion matrix (A ∩ B) / (A ∪ B) + statistic.overlapproportion_mets = calculateProportionMatrix(models, fields, 'mets', statMets); + statistic.overlapproportion_rxns = calculateProportionMatrix(models, fields, 'rxns', statRxns); + statistic.overlapproportion_genes = calculateProportionMatrix(models, fields, 'genes', statGenes); + else disp('please check the input variable') end -%% Print tables with output if printFlag = 1 +% Print tables with output if printFlag = 1 if printFlag ==1 disp('Number of overlapping mets between models is:') statistic.overlapnumber_mets - + disp('Number of overlapping rxns between models is:') statistic.overlapnumber_rxns - + disp('Number of overlapping genes between models is:') statistic.overlapnumber_genes end +end + +% Function to calculate the intersection across multiple models +function allOverlap = intersectMultipleModels(models, fields, featureType) + allOverlap = models.(fields{1}).(featureType); + for i = 2:length(fields) + allOverlap = intersect(allOverlap, models.(fields{i}).(featureType)); + end +end + +% Function to calculate proportion matrix (A ∩ B) / (A ∪ B) +function proportionTable = calculateProportionMatrix(models, fields, featureType, statTable) + numModels = length(fields); + proportionMatrix = cell(numModels); + + for i = 1:numModels + for j = 1:numModels + % Calculate the union (A ∪ B) for proportion + unionCount = length(union(models.(fields{i}).(featureType), models.(fields{j}).(featureType))); + if unionCount > 0 + proportionMatrix{i,j} = round(100 * statTable{i,j} / unionCount, 2); % (A ∩ B) / (A ∪ B) + else + proportionMatrix{i,j} = NaN; % Avoid division by zero + end + end + end + + % Convert to table for easy reading + proportionMatrix = cell2table(proportionMatrix, 'VariableNames', fields, 'RowNames', fields); + proportionTable = proportionMatrix; +end diff --git a/src/dataIntegration/XomicsToModel/analysis/plotOverlapResults.m b/src/dataIntegration/XomicsToModel/analysis/plotOverlapResults.m index 6bff68d068..5852fdafd1 100644 --- a/src/dataIntegration/XomicsToModel/analysis/plotOverlapResults.m +++ b/src/dataIntegration/XomicsToModel/analysis/plotOverlapResults.m @@ -11,147 +11,91 @@ function plotOverlapResults(overlapresults,statistic,savepath) % a heat map plot % % Author(s): -% Xi Luo, 2023/02 +% Xi Luo, update 2024/10 % % -%use proportion to create map +%% use proportion data to create map figure('units','normalized','outerposition',[0 0 1 1]) -%mets -a=statistic.overlapnumber_mets{:,1}; -metsdata=statistic.overlapnumber_mets{:,2:end}; -%find model size -for i=1:size(metsdata,1) -[max_a(i),index(i)]=max(metsdata(i,:)); -end -%generate proportion -xa=repmat(max_a',[1 length(statistic.overlapnumber_mets{:,1})]); -pro=round(metsdata./xa*100,2); -ax = subplot(3,1,1); -h = imagesc(ax, pro); -daspect([1 4 1]); -title(['All Overlapped Mets=' num2str(length(overlapresults.mets.alloverlap))]) -ax.TickLength(1) = 0; -% Create heatmap's colormap -n=256; -cmap = [linspace(.9,0,n)', linspace(.9447,.447,n)', linspace(.9741,.741,n)']; -colormap(ax, cmap); -colorbar(ax) -hold on -%add text label (proportion (accuracy)) -label1=reshape(pro',[],1); -% accuracy_mets=accuracy.mets{:,2:end}; -% label2=round(reshape(accuracy_mets,[],1),2); -% labels=append(string(label1),'%','(',string(label2),')'); -try - %labels(find(label1==100))=append(string(max_a'),'(',string(label2(find(label1==100))),')'); - labels=append(string(label1)); - labels(find(label1==100))=append(string(max_a')); -catch ME - disp(ME) -end -[xTxt, yTxt] = ndgrid(1:size(metsdata,1), 1:size(metsdata,1)); -th = text(xTxt(:), yTxt(:), labels(:), ... - 'VerticalAlignment', 'middle','HorizontalAlignment','Center'); -set(ax,'XTick',1:size(metsdata,1),'YTick',1:size(metsdata,1)) -xticklabels(a) -yticklabels(a) -set(gca,'XTickLabelRotation',0); - - -%rxns -rxnsdata=statistic.overlapnumber_rxns{:,2:end}; -%find model size -for i=1:size(rxnsdata,1) -[max_a(i),index(i)]=max(rxnsdata(i,:)); -end -%generate proportion -xa=repmat(max_a',[1 length(statistic.overlapnumber_rxns{:,1})]); -pro=round(rxnsdata./xa*100,2); -ax = subplot(3,1,2); -h = imagesc(ax, pro); -daspect([1 4 1]); -title(['All Overlapped Rxns=' num2str(length(overlapresults.rxns.alloverlap))]) -ax.TickLength(1) = 0; -% Create heatmap's colormap -n=256; -cmap = [linspace(.9,0,n)', linspace(.9447,.447,n)', linspace(.9741,.741,n)']; -colormap(ax, cmap); -colorbar(ax) -hold on -%add text label (proportion (accuracy)) -label1=reshape(pro',[],1); -% accuracy_rxns=accuracy.rxns{:,2:end}; -% label2=round(reshape(accuracy_rxns,[],1),2); -% labels=append(string(label1),'%','(',string(label2),')'); -try - %labels(find(label1==100))=append(string(max_a'),'(',string(label2(find(label1==100))),')'); - labels=append(string(label1)); - labels(find(label1==100))=append(string(max_a')); -catch ME - disp(ME) -end -[xTxt, yTxt] = ndgrid(1:size(rxnsdata,1), 1:size(rxnsdata,1)); -th = text(xTxt(:), yTxt(:), labels(:), ... - 'VerticalAlignment', 'middle','HorizontalAlignment','Center'); -set(ax,'XTick',1:size(metsdata,1),'YTick',1:size(metsdata,1)) -xticklabels(a) -yticklabels(a) -set(gca,'XTickLabelRotation',0); - -%genes -genesdata=statistic.overlapnumber_genes{:,2:end}; -%find model size -for i=1:size(genesdata,1) -[max_a(i),index(i)]=max(genesdata(i,:)); -end -%generate proportion -xa=repmat(max_a',[1 length(statistic.overlapnumber_genes{:,1})]); -pro=round(genesdata./xa*100,2); -ax = subplot(3,1,3); -h = imagesc(ax, pro); -daspect([1 4 1]); -title(['All Overlapped Genes=' num2str(length(overlapresults.genes.alloverlap))]) -ax.TickLength(1) = 0; -% Create heatmap's colormap -n=256; -cmap = [linspace(.9,0,n)', linspace(.9447,.447,n)', linspace(.9741,.741,n)']; -colormap(ax, cmap); -colorbar(ax) -hold on -%add text label (proportion (accuracy)) -label1=reshape(pro',[],1); -% accuracy_genes=accuracy.genes{:,2:end}; -% label2=round(reshape(accuracy_genes,[],1),2); -% labels=append(string(label1),'%','(',string(label2),')'); -try - %labels(find(label1==100))=append(string(max_a'),'(',string(label2(find(label1==100))),')'); - labels=append(string(label1)); - labels(find(label1==100))=append(string(max_a')); -catch ME - disp(ME) -end -[xTxt, yTxt] = ndgrid(1:size(genesdata,1), 1:size(genesdata,1)); -th = text(xTxt(:), yTxt(:), labels(:), ... - 'VerticalAlignment', 'middle','HorizontalAlignment','Center'); -set(ax,'XTick',1:size(genesdata,1),'YTick',1:size(genesdata,1)) -xticklabels(a) -yticklabels(a) -set(gca,'XTickLabelRotation',0); - -% add annotation -annotation('textbox',... - [0.29 0.90 0.2 0.08],... - 'String',{'Colorbar = overlapped proportion(%); textlabel = proportion; Diagonal number = model size'},... - 'FontSize',12,'FitBoxToText','on','LineStyle','none'); + +%% Mets data +a=statistic.overlapnumber_mets.Properties.RowNames; +metsdata = statistic.overlapproportion_mets{:, :}; + +% Plot Mets Data +ax1 = subplot(3,1,1); +plotHeatmapWithLabels(ax1, metsdata, a, 'Mets', overlapresults.mets, 'All Overlapped Mets'); + +%% Rxns Heatmap +rxnsdata = statistic.overlapproportion_rxns{:, :}; + +% Plot Rxns Data +ax2 = subplot(3,1,2); +plotHeatmapWithLabels(ax2, rxnsdata, a, 'Rxns', overlapresults.rxns, 'All Overlapped Rxns'); + +%% Genes Heatmap +genesdata = statistic.overlapproportion_genes{:, :}; + +% Plot Genes Data +ax3 = subplot(3,1,3); +plotHeatmapWithLabels(ax3, genesdata, a, 'Genes', overlapresults.genes, 'All Overlapped Genes'); + +annotation('textbox', [0.35 0.92 0.3 0.07], ... + 'String', {'Colorbar = Overlapped Proportion (%), Text Label = Proportion, Diagonal = Model Size'}, ... + 'FontSize', 12, 'HorizontalAlignment', 'center', 'FitBoxToText', 'on', 'LineStyle', 'none', 'EdgeColor', 'none'); if exist('savepath', 'var') iterationMethod=extractAfter(savepath,'models_'); sgtitle(['Overlapped result of ' iterationMethod]) - % myAxes=findobj(ax,'Type','Axes'); - % exportgraphics(myAxes,['overlap.pdf']); cd(savepath) saveas(ax, ['overlap_' iterationMethod '.fig']) end end +function plotHeatmapWithLabels(ax, data, labels, featureType, overlapData, titleText) +%plot a heat map of the overlapped number + +% Create heatmap +imagesc(ax, data); +daspect([1 4 1]); % Equal aspect ratio for squares +title([titleText ' = ' num2str(size(overlapData.alloverlap, 1))]); % Title with all overlapped count +ax.TickLength(1) = 0; % No ticks + +% Create heatmap's colormap +n = 256; +cmap = [linspace(.9,0,n)', linspace(.9447,.447,n)', linspace(.9741,.741,n)']; +colormap(ax, cmap); +colorbar(ax); +hold on; + +% Set the diagonal labels with corresponding sizes +for i = 1:length(labels) + modelName = labels{i}; + if isfield(overlapData, modelName) && isfield(overlapData.(modelName), modelName) + modelSize = length(overlapData.(modelName).(modelName)); + text(i, i, num2str(modelSize), 'HorizontalAlignment', 'center', 'FontSize', 10, 'Color', 'k', 'FontWeight', 'bold'); % Bold size on diagonal + end +end + +% Add text labels for each square +textStrings = num2str(data(:), '%0.2f'); % Convert data to string +textStrings = strtrim(cellstr(textStrings)); % Remove any padding +[x, y] = meshgrid(1:size(data, 1)); % Create grid for positions + +% Plot the text labels, skipping the diagonal +for i = 1:size(data, 1) + for j = 1:size(data, 2) + if i ~= j % Skip diagonal + text(x(i,j), y(i,j), textStrings((i-1)*size(data,1) + j), ... + 'HorizontalAlignment', 'center', 'FontSize', 10); + end + end +end + +% Set axes labels and formatting +set(ax, 'XTick', 1:length(labels), 'YTick', 1:length(labels)); +set(ax, 'XTickLabel', labels, 'YTickLabel', labels); +set(ax, 'XTickLabelRotation', 0); % Rotate X-axis labels for readability +hold off; + +end \ No newline at end of file