Skip to content

Commit 0baaa2c

Browse files
authored
Merge pull request #380 from eweitz/visualize-variant-cache
Show clinical variants on gene models
2 parents 7501f0b + f1952fa commit 0baaa2c

23 files changed

+1215
-217
lines changed
Binary file not shown.
1.2 MB
Binary file not shown.
105 KB
Binary file not shown.

karma.conf.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ module.exports = function(config) {
1616
// list of files / patterns to load in the browser
1717
files: [
1818
'src/js/index.js',
19-
'test/offline/**.test.js',
20-
'test/online/**.test.js',
19+
// 'test/offline/**.test.js',
20+
// 'test/online/**.test.js',
2121
// 'test/online/related-genes.test.js',
22-
// 'test/offline/gene-structure.test.js',
22+
'test/offline/gene-structure.test.js',
2323
// 'test/offline/tissue.test.js',
2424
{pattern: 'dist/data/**', watched: false, included: false, served: true, nocache: false}
2525
],

scripts/python/cache/clinvar_cache.py

Lines changed: 0 additions & 140 deletions
This file was deleted.

scripts/python/cache/gene_structure_cache.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,8 @@ def parse_mrna(raw_mrna, biotypes_list):
262262
def build_structures(structures_by_id):
263263
biotypes_list = list(biotypes.keys())
264264

265+
prev_gene = ''
266+
265267
structures = []
266268
for id in structures_by_id:
267269
structure_lists = structures_by_id[id]
@@ -273,8 +275,19 @@ def build_structures(structures_by_id):
273275

274276
for structure_list in structure_lists[1:]:
275277
subpart = parse_transcript_subpart(structure_list, mrna_start)
276-
structure += [";".join(subpart) ]
277-
278+
structure += [";".join(subpart)]
279+
280+
# Set transcript start coordinate relative to most-upstream transcript
281+
# This enables projecting genomic features (e.g. variants) onto
282+
# transcript coordinates. It also enables viewing multiple transcripts
283+
# in genomic coordinates, like typical genome browsers (Ensembl, IGV).
284+
gene_name = structure[1].split('-')[0]
285+
if gene_name != prev_gene:
286+
gene_start = int(mrna_start) # Start of 1st transcript is gene start
287+
prev_gene = gene_name
288+
mrna_start_offset = str(int(mrna_start) - gene_start)
289+
290+
structure.insert(2, mrna_start_offset)
278291
structures.append(structure)
279292

280293
return structures
@@ -288,8 +301,8 @@ def parse_structures(canonical_ids, gff_path, gff_url):
288301
289302
Parts of a transcript that comprise "gene structure" here:
290303
* Exons: regions of gene not removed by RNA splicing
291-
* 3'-UTR: Three prime untranslated region; start region
292-
* 5'-UTR: Fix prime untranslated region; end region
304+
* 5'-UTR: Fix prime untranslated region; start region (for +, end for -)
305+
* 3'-UTR: Three prime untranslated region; end region (for +, start for -)
293306
294307
(Introns are the regions between 3'- and 5'-UTRs that are not exons.
295308
These are implied in the structure, and not modeled explicitly.)

0 commit comments

Comments
 (0)