Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 52 additions & 24 deletions probe_construction/MERFISHProbeDesign.m
Original file line number Diff line number Diff line change
Expand Up @@ -660,13 +660,15 @@ function MERFISHProbeDesign(varargin)

fprintf(logFID, '%s - Building transcriptome object.\n', datestr(datetime));

% Build transcriptome using existing abundance data
%Build transcriptome using existing abundance data
transcriptome = Transcriptome(rawTranscriptomeFasta, ...
'abundPath', fpkmPath, ...
'verbose', true, ...
'headerType', transcriptomeHeaderType, ...
'IDType', transcriptomeIDType);

%transcriptome = Transcriptome(rawTranscriptomeFasta, ...
% 'abundPath', fpkmPath, ...
% 'verbose', true);
transcriptome.Save(transcriptomePath);
fprintf(logFID, '%s - Transcriptome object saved to %s\n', datestr(datetime), transcriptomePath);

Expand Down Expand Up @@ -718,7 +720,7 @@ function MERFISHProbeDesign(varargin)
% Generate a OTTable for isoforms for the given gene
isoSpecificityTables(i) = OTTable(localTranscriptome, ...
isoSpecificityTable_lengthOfExactHomology, ... % lengthOfExactHomology is the length of exact homology used to calculate penalties
'verbose', false, ...
'verbose', false, ...
'transferAbund', false);

else
Expand Down Expand Up @@ -798,7 +800,7 @@ function MERFISHProbeDesign(varargin)
%% Create parallel pool... speeds up the construction of the TRDesigner and the construction of libraries
if isempty(gcp('nocreate'))
fprintf(logFID, '%s - Start parallel processing pool\n', datestr(datetime));
p = parpool(5); % Insert a number here appropriate to the used computational resources
p = parpool(4); % Insert a number here appropriate to the used computational resources
else
p = gcp;
end
Expand Down Expand Up @@ -915,6 +917,11 @@ function MERFISHProbeDesign(varargin)
fprintf(logFID, '%s - Target regions loaded from %s\n', datestr(datetime), trRegionsPath);
end

%write the genename
genenames={targetRegions.geneName}';
geneids={targetRegions.id}';
numRegions={targetRegions.numRegions}';
writetable(table(genenames,geneids,numRegions),[analysisSavePath 'targetRegions.csv'],'WriteRowNames',false)
%% ------------------------------------------------------------------------
% Step 2: Compile the library
% The target regions designed above will be compiled into template
Expand Down Expand Up @@ -956,13 +963,20 @@ function MERFISHProbeDesign(varargin)
% string miss-match. Replacing '_' with ',' in original codebook solves
% this issue.

if ~strcmp(transcriptomeIDType, 'NCBI')
finalIds = strrep(finalIds, '_', ',');
end
%if ~strcmp(transcriptomeIDType, 'NCBI')
% finalIds = strrep(finalIds, '_', ',');
%end

finalGenes = {codebook.name}; % Extract gene common names from codebook
barcodes = char({codebook.barcode}) == '1'; % Extract string barcodes and convert to logical matrix

%check if gene and ids match the reference
genes_from_ids=targetRegions(ismember({targetRegions.id}, finalIds));
genenames=targetRegions(ismember({targetRegions.geneName}, finalGenes));
if length(setdiff({genenames.geneName},{genes_from_ids.geneName}))>0
warning([char(setdiff({genenames.geneName},{genes_from_ids.geneName})),' ',char(setdiff({genes_from_ids.id},{genenames.id})),' is different from the reference']);
fprintf(logFID, ['WARNING! ', char(setdiff({genenames.geneName},{genes_from_ids.geneName})),' ',char(setdiff({genes_from_ids.id},{genenames.id})),' is different from the reference\n']);
end

if versionMatch
finalTargetRegions = targetRegions(ismember({targetRegions.id}, finalIds)); % Extract only the desired target regions
Expand Down Expand Up @@ -1035,13 +1049,16 @@ function MERFISHProbeDesign(varargin)
if keepGoingFlag
oligos = [];

allOligos = [];
lastGene = '';

if keepAllPossibleProbes
allHeaders = cell(sum(vertcat(finalTargetRegions.numRegions)), 1);
allSeqs = cell(sum(vertcat(finalTargetRegions.numRegions)), 1);
seqCount = 1;
end

Genes={};
ProbeNumbers={};
%if keepAllPossibleProbes
% allHeaders = cell(sum(vertcat(finalTargetRegions.numRegions)), 1);
% allSeqs = cell(sum(vertcat(finalTargetRegions.numRegions)), 1);
% seqCount = 1;
%end

for i=1:length(finalIds)
% Save local gene
Expand Down Expand Up @@ -1176,15 +1193,19 @@ function MERFISHProbeDesign(varargin)
display(['... ' headers{indsToRemove(r)}]);
end

% display(indsToKeep)

indsToKeepForReal = [indsToKeepForReal, indsToKeep];
% display(indsToKeep)

%indsToKeepForReal = [indsToKeepForReal, indsToKeep];
indsToKeepForReal = indsToKeep;
end

indsToKeepForAll = indsToKeepForReal(1:length(indsToKeepForReal));
indsToKeepForReal = indsToKeepForReal(randperm(length(indsToKeepForReal), min([length(indsToKeepForReal) numProbesPerGene])));
display(['... keeping ' num2str(length(indsToKeepForReal)) ' probes']);
fprintf(logFID, '%s - Retaining %d probes\n', datestr(datetime), length(indsToKeepForReal));
%write the genename
Genes=[Genes,localGeneName];
ProbeNumbers=[ProbeNumbers,length(indsToKeepForReal)];

% Check on number
if length(indsToKeepForReal) < numProbesPerGene
Expand Down Expand Up @@ -1253,14 +1274,18 @@ function MERFISHProbeDesign(varargin)
oligos(end+1).Header = headers{indsToKeepForReal(s)};
oligos(end).Sequence = seqs{indsToKeepForReal(s)};
end

if keepAllPossibleProbes
% Append all headers + seqs to cell
allHeaders(seqCount:(seqCount + length(headers) - 1)) = headers;
allSeqs(seqCount:(seqCount + length(seqs) - 1)) = seqs;
seqCount = seqCount + length(seqs);
for s=1:length(indsToKeepForAll)
allOligos(end+1).Header = headers{indsToKeepForAll(s)};
allOligos(end).Sequence = seqs{indsToKeepForAll(s)};
end

%if keepAllPossibleProbes
% Append all headers + seqs to cell
% allHeaders(seqCount:(seqCount + length(headers) - 1)) = headers;
% allSeqs(seqCount:(seqCount + length(seqs) - 1)) = seqs;
% seqCount = seqCount + length(seqs);
%end


else
fprintf(1, 'Empty!\n');
Expand All @@ -1281,10 +1306,13 @@ function MERFISHProbeDesign(varargin)
fastawrite(possibleOligosPath, oligos);
display(['... completed in ' num2str(toc(writeTimer))]);
fprintf(logFID, '%s - Completed in %d s\n', datestr(datetime), toc(writeTimer));

%%%%%Write out how many probes per gene as a table
Genes=Genes'
ProbeNumbers=ProbeNumbers'
writetable(table(Genes,ProbeNumbers),[analysisSavePath 'probesPerGene.csv'],'WriteRowNames',false)
if keepAllPossibleProbes

allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2);
%allOligos = cell2struct([allHeaders, allSeqs], {'Header', 'Sequence'}, 2);

PageBreak();
fprintf(1, 'Writing: %s\n', allOligosPath);
Expand Down