Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce number of required file handles #650

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion scripts_of/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1681,7 +1681,7 @@ def CheckOptions(options, speciesToUse):
# check can open enough files
n_extra = 50
q_do_orthologs = not any((options.qStopAfterPrepare, options.qStopAfterGroups, options.qStopAfterSeqs, options.qStopAfterAlignments, options.qStopAfterTrees))
if q_do_orthologs and not options.qStartFromTrees:
if q_do_orthologs and not options.qStartFromTrees and not ("USE_MEM" in os.environ and os.environ["USE_MEM"] == "1"):
n_sp = len(speciesToUse)
wd = files.FileHandler.GetWorkingDirectory_Write()
wd_files_test = wd + "Files_test/"
Expand Down
37 changes: 32 additions & 5 deletions scripts_of/trees2ologs_of.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import itertools
import multiprocessing as mp
from collections import defaultdict, deque
import io
import shutil

from . import tree as tree_lib
from . import resolve, util, files, parallel_task_manager
Expand Down Expand Up @@ -987,13 +989,38 @@ def __init__(self, directory, speciesDict, iSpeciesToUse, nSpecies, sp_to_index)
self.nSpecies = nSpecies
self.sp_to_index = sp_to_index
self.dPutativeXenologs = files.FileHandler.GetPutativeXenelogsDir()
self.inMemory = ("USE_MEM" in os.environ and os.environ["USE_MEM"] == "1")
if self.inMemory:
util.PrintTime("Handling OrthologsFiles in memory")
self.ortholog_file_handles = [[None for _ in self.iSpeciesToUse] for _ in self.iSpeciesToUse]
self.xenolog_file_handles = [None for _ in self.iSpeciesToUse]

def my_open(self, path, mode):
tmp = None
if self.inMemory and not PY2:
tmp = io.StringIO()
setattr(tmp,'filepath',path)
if os.path.exists(path):
with open(path, "r") as tmp_fh:
shutil.copyfileobj(tmp_fh,tmp)
else:
tmp = open(xenolog_path, csv_append_mode)
return tmp

def my_close(self, orig_fh):
if self.inMemory and not PY2:
fh = open(orig_fh.filepath, "w")
shutil.copyfileobj(orig_fh,fh)
fh.close()
orig_fh.close()
else:
orig_fh.close()

def __enter__(self):
for i in xrange(self.nSpecies):
sp0 = str(self.iSpeciesToUse[i])
self.xenolog_file_handles[i] = open(self.dPutativeXenologs + "%s.tsv" % self.speciesDict[sp0], csv_append_mode)
xenolog_path = self.dPutativeXenologs + "%s.tsv" % self.speciesDict[sp0]
self.xenolog_file_handles[i] = self.my_open(xenolog_path, csv_append_mode)
strsp0 = sp0 + "_"
isp0 = self.sp_to_index[sp0]
d0 = self.d + "Orthologues_" + self.speciesDict[sp0] + "/"
Expand All @@ -1003,17 +1030,17 @@ def __enter__(self):
strsp1 = sp1 + "_"
isp1 = self.sp_to_index[sp1]
d1 = self.d + "Orthologues_" + self.speciesDict[sp1] + "/"
self.ortholog_file_handles[i][j] = open(d0 + '%s__v__%s.tsv' % (self.speciesDict[sp0], self.speciesDict[sp1]), csv_append_mode)
self.ortholog_file_handles[j][i] = open(d1 + '%s__v__%s.tsv' % (self.speciesDict[sp1], self.speciesDict[sp0]), csv_append_mode)
self.ortholog_file_handles[i][j] = self.my_open(d0 + '%s__v__%s.tsv' % (self.speciesDict[sp0], self.speciesDict[sp1]), csv_append_mode)
self.ortholog_file_handles[j][i] = self.my_open(d1 + '%s__v__%s.tsv' % (self.speciesDict[sp1], self.speciesDict[sp0]), csv_append_mode)
return self.ortholog_file_handles, self.xenolog_file_handles

def __exit__(self, type, value, traceback):
for fh in self.xenolog_file_handles:
fh.close()
self.my_close(fh)
for fh_list in self.ortholog_file_handles:
for fh in fh_list:
if fh is not None:
fh.close()
self.my_close(fh)

@staticmethod
def flush_olog_files(ortholog_file_handles):
Expand Down
21 changes: 12 additions & 9 deletions scripts_of/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,9 @@ def number_open_files_exception_advice(n_species, q_at_trees):
"""
# parallel_task_manager.RunCommand("ulimit -Hn")
n_req = n_species*n_species + 100
msg="\nERROR: The system limits on the number of files a process can open is too low. For %d species \
msg="\nERROR: The system limits on the number of files a process can open is too low."
if not ("USE_MEM" in os.environ and os.environ["USE_MEM"] == "1"):
msg += "For %d species \
OrthoFinder needs to be able to open at least r=%d files. Please increase the limit and restart OrthoFinder\n\
1. Check the hard and soft limits on the number of open files for your system:\n\
$ ulimit -Hn\n\
Expand All @@ -398,16 +400,17 @@ def number_open_files_exception_advice(n_species, q_at_trees):
To increase the limit to %d for user called 'emms' add the lines:\n\
emms hard nofile %d\n\
emms soft nofile %d\n" % (n_species, n_req, n_req, n_req, n_req, n_req)
msg +=" (edit these lines to match your username)\n\
msg +=" (edit these lines to match your username)\n\
4. Check the limit has now been updated (if you changed the hard limit you'll need to open a new session and confirm it's updated):\n\
$ ulimit -Sn"

if q_at_trees:
msg_part_2 = "5. Once the limit is updated restart OrthoFinder 'from trees' using the '-ft' command"
$ ulimit -Sn\n"
if q_at_trees:
msg += "\n5. Once the limit is updated restart OrthoFinder 'from trees' using the '-ft' command"
else:
msg += "\n5. Once the limit is updated restart OrthoFinder with the original command"
else:
msg_part_2 = "5. Once the limit is updated restart OrthoFinder with the original command"
msg_part_3 = "\nFor full details see: https://github.com/davidemms/OrthoFinder/issues/384"
print(msg + "\n" + msg_part_2 + "\n" + msg_part_3 + "\n")
msg += "Please try to run OrthoFinder with USE_MEM=1 as environment variable. You need more memory for it"
msg += "\nFor full details see: https://github.com/davidemms/OrthoFinder/issues/384"
print(msg + "\n")
"""
-------------------------------------------------------------------------------
"""
Expand Down