Skip to content

Commit

Permalink
DNA insertions, duplications, subs, deletions working
Browse files Browse the repository at this point in the history
  • Loading branch information
Yogiski committed May 27, 2024
1 parent bd665d2 commit e2d6f85
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 19 deletions.
43 changes: 24 additions & 19 deletions genefeatures/sequence_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,12 +262,7 @@ def get_codon_index(self):
return self._codon_index

# mutation methods
def _match_protein_change_pattern(self, change: str) -> tuple:
pass

def _protein_change(self, change):
pass

# DNA changes
def _match_dna_change_pattern(self, change: str) -> tuple:

patterns = {
Expand Down Expand Up @@ -315,30 +310,23 @@ def _mutate_sequence(
ref: str = "",
alt: str = ""
) -> Seq:
print(sequence[start:end])
print(ref)
if sequence[start:end] != ref:
raise ValueError(
f"Reference base(s) {ref} do not match "
f"at position {start+1}-{end+1} "
f"found {sequence[start:end + len(ref)]}"
f"found {sequence[start:end]}"
)
return sequence[:start] + alt + sequence[end:]

def _get_mutated_sequences(
self,
pos: int,
end: int,
ref: str = "",
alt: str = ""
) -> tuple:

code = self.get_coding_seq()
if ref == "":
ref = code[pos]
if len(ref) >= 1:
end = pos + len(ref)
else:
end = pos
mutated_code = self._mutate_sequence(code, pos, end, ref, alt)

full = self.get_full_seq()
Expand All @@ -347,7 +335,6 @@ def _get_mutated_sequences(
alt = reverse_complement(alt)
full_pos = self._coding_index[end] + 1
full_end = self._coding_index[pos] + 1
print(pos, end, full_pos, full_end)
else:
full_pos = self._coding_index[pos]
full_end = self._coding_index[end]
Expand All @@ -360,16 +347,34 @@ def _get_mutated_sequences(
def _dna_snv(self, groups: tuple) -> tuple:
pos, ref, alt = groups
pos = int(pos) - 1
return self._get_mutated_sequences(pos, ref, alt)
end = pos + 1
return self._get_mutated_sequences(pos, end, ref=ref, alt=alt)

def _dna_point_deletion(self, groups: tuple) -> tuple:
pos, ref = groups
pos = int(pos) - 1
return self._get_mutated_sequences(pos, ref)
if ref == "":
ref = self.get_coding_seq()[pos]
end = pos + 1
return self._get_mutated_sequences(pos, end, ref=ref)

def _dna_range_deletion(self, groups: tuple) -> tuple:
start, end, ref = groups
start, end = int(start) - 1, int(end)
if ref == "":
ref = self.get_coding_seq()[start:end]
return self._get_mutated_sequences(start, ref)
return self._get_mutated_sequences(start, end, ref=ref)

def _dna_insertion(self, groups: tuple) -> tuple:
start, end, alt = groups
start, end = int(start), int(end)
end = start
ref = self.get_coding_seq()[start:end]
return self._get_mutated_sequences(start, end, ref=ref, alt=alt)

def _dna_duplication(self, groups: tuple) -> tuple:
start, end = groups
start, end = int(start) - 1, int(end)
ref = self.get_coding_seq()[start:end]
alt = ref + ref
return self._get_mutated_sequences(start, end, ref=ref, alt=alt)
16 changes: 16 additions & 0 deletions tests/test_sequence_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,19 @@ def test_dna_range_deletion(self):
self.assertEqual(mt_full[40101:40104], "GCC")
with self.assertRaises(ValueError):
self.rev._dna_range_deletion(("34", "36", "AAA"))

def test_dna_insertion(self):
self.rev.get_coding_seq()
mt_coding, mt_full = self.rev._dna_insertion(("33", "34", "AAA"))
self.assertEqual(mt_coding[33:36], "AAA")
self.assertEqual(mt_coding[36:39], "GGT")
self.assertEqual(mt_full[40107:40110], "TTT")
self.assertEqual(mt_full[40104:40107], "ACC")

def test_dna_duplication(self):
self.rev.get_coding_seq()
mt_coding, mt_full = self.rev._dna_duplication(("34", "36"))
self.assertEqual(mt_coding[33:36], "GGT")
self.assertEqual(mt_coding[36:39], "GGT")
self.assertEqual(mt_full[40104:40107], "ACC")
self.assertEqual(mt_full[40107:40110], "ACC")

0 comments on commit e2d6f85

Please sign in to comment.