1616Contains the EnsemblRelease class, which extends the Genome class
1717to be specific to (a particular release of) Ensembl.
1818"""
19+ from weakref import WeakValueDictionary
1920
2021from .genome import Genome
2122from .ensembl_release_versions import check_release_number , MAX_ENSEMBL_RELEASE
@@ -32,18 +33,53 @@ class EnsemblRelease(Genome):
3233 Bundles together the genomic annotation and sequence data associated with
3334 a particular release of the Ensembl database.
3435 """
35- def __init__ (self ,
36- release = MAX_ENSEMBL_RELEASE ,
37- species = human ,
38- server = ENSEMBL_FTP_SERVER ):
39- self .release = check_release_number (release )
40- self .species = check_species_object (species )
41- self .server = server
36+
37+ @classmethod
38+ def normalize_init_values (cls , release , species , server ):
39+ """
40+ Normalizes the arguments which uniquely specify an EnsemblRelease
41+ genome.
42+ """
43+ release = check_release_number (release )
44+ species = check_species_object (species )
45+ return (release , species , server )
46+
47+ # Using a WeakValueDictionary instead of an ordinary dict to prevent a
48+ # memory leak in cases where we test many different releases in sequence.
49+ # When all the references to a particular EnsemblRelease die then that
50+ # genome should also be removed from this cache.
51+ _genome_cache = WeakValueDictionary ()
52+
53+ @classmethod
54+ def cached (
55+ cls ,
56+ release = MAX_ENSEMBL_RELEASE ,
57+ species = human ,
58+ server = ENSEMBL_FTP_SERVER ):
59+ """
60+ Construct EnsemblRelease if it's never been made before, otherwise
61+ return an old instance.
62+ """
63+ init_args_tuple = cls .normalize_init_values (release , species , server )
64+ if init_args_tuple in cls ._genome_cache :
65+ genome = cls ._genome_cache [init_args_tuple ]
66+ else :
67+ genome = cls ._genome_cache [init_args_tuple ] = cls (* init_args_tuple )
68+ return genome
69+
70+ def __init__ (
71+ self ,
72+ release = MAX_ENSEMBL_RELEASE ,
73+ species = human ,
74+ server = ENSEMBL_FTP_SERVER ):
75+ self .release , self .species , self .server = self .normalize_init_values (
76+ release = release , species = species , server = server )
4277
4378 self .gtf_url = make_gtf_url (
4479 ensembl_release = self .release ,
45- species = species ,
46- server = server )
80+ species = self .species ,
81+ server = self .server )
82+
4783 self .transcript_fasta_url = make_fasta_url (
4884 ensembl_release = self .release ,
4985 species = self .species .latin_name ,
@@ -53,7 +89,7 @@ def __init__(self,
5389 ensembl_release = self .release ,
5490 species = self .species .latin_name ,
5591 sequence_type = "pep" ,
56- server = server )
92+ server = self . server )
5793
5894 self .reference_name = self .species .which_reference (self .release )
5995
@@ -92,3 +128,10 @@ def to_dict(self):
92128 "species" : self .species ,
93129 "server" : self .server
94130 }
131+
132+ @classmethod
133+ def from_dict (cls , state_dict ):
134+ """
135+ Deserialize EnsemblRelease without creating duplicate instances.
136+ """
137+ return cls .cached (** state_dict )
0 commit comments