-
Notifications
You must be signed in to change notification settings - Fork 1
/
seqel-pro-mode.el
348 lines (274 loc) · 11.4 KB
/
seqel-pro-mode.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
;;; seqel-pro-mode.el --- A minor mode for editing protein sequences
;; Copyright (C) 2021 Zech Xu
;; Author: Zech Xu
;; Version: 1.0
;; License: BSD-3
;; URL: https://github.com/RNAer/seqel
;;; Commentary:
;; A minor mode that provides collection of functions for editing protein sequences.
;;; Code:
(require 'seqel-util)
(defvar seqel-pro-mode-hook nil
"*Hook to setup `seqel-pro-mode'.")
(defvar seqel-pro-aa-alist
;; put a question mark before the char will evaluate it to digit ascii code
'((?A "Ala" 71.09 ?a)
(?B "Asx" 114 ?n ?d) ; Asn Asp
(?C "Cys" 103.15 ?c)
(?D "Asp" 115.09 ?d)
(?E "Glu" 129.12 ?e)
(?F "Phe" 147.18 ?f)
(?G "Gly" 57.05 ?g)
(?H "His" 137.14 ?h)
(?I "Ile" 113.16 ?i)
(?J "Xle" 113.16 ?i ?l) ;Leu Ile
(?K "Lys" 128.17 ?k)
(?L "Leu" 113.16 ?l)
(?M "Met" 131.19 ?m)
(?N "Asn" 114.11 ?n)
(?P "Pro" 97.12 ?p)
(?Q "Gln" 128.14 ?q)
(?R "Arg" 156.19 ?r)
(?S "Ser" 87.08 ?s)
(?T "Thr" 101.11 ?t)
(?V "Val" 99.14 ?v)
(?W "Trp" 186.21 ?w)
(?X "Xaa" 92 ?.) ; unknown aa; set it weight to average weight
(?Y "Tyr" 163.18 ?y)
(?Z "Glx" 128 ?e ?q)) ; Glu Gln
"*A association list of 1-letter, 3-letter IUPAC codes and molecular weights.
This is the molecular weight with H2O subtracted.
For each inner list, the first element is allowed AA; the second
element is the three-letter code of the first, and the last is
the molecular weight. for the first.")
;;;;; END OF USER CUSTOMIZABLE VARIABLES
(defvar seqel-pro-alphabet-set
(let ((alphabet-set (make-hash-table :test 'eq :size (* 2 (length seqel-pro-aa-alist)))))
(dolist (l seqel-pro-aa-alist)
(puthash (downcase (car l)) t alphabet-set)
(puthash (car l) t alphabet-set))
alphabet-set)
"The set of all legal alphabets in protein sequences.
This is a hash table: keys are char (including both lower case
and upper case) and values are t. It serves like a set object
similar in Python language.")
(defvar seqel-pro-aa-hydrophobic '(?A ?I ?L ?M ?F ?V ?W))
(defvar seqel-pro-aa-pos '(?K ?R))
(defvar seqel-pro-aa-neg '(?E ?D))
(defvar seqel-pro-aa-polar '(?N ?Q ?S ?T))
(defvar seqel-pro-aa-aromatic '(?H ?Y))
(eval (macroexpand `(seqel--def-char-face "hydrophobic" "RoyalBlue" "white" "pro-aa-background")))
(eval (macroexpand `(seqel--def-char-face "pos" "brown" "white" "pro-aa-background")))
(eval (macroexpand `(seqel--def-char-face "neg" "purple" "white" "pro-aa-background")))
(eval (macroexpand `(seqel--def-char-face "polar" "green yellow" "white" "pro-aa-background")))
(eval (macroexpand `(seqel--def-char-face "aromatic" "cyan4" "white" "pro-aa-background")))
(eval (macroexpand `(seqel--def-char-face "hydrophobic" "white" "RoyalBlue" "pro-aa-foreground")))
(eval (macroexpand `(seqel--def-char-face "pos" "white" "brown" "pro-aa-foreground")))
(eval (macroexpand `(seqel--def-char-face "neg" "white" "purple" "pro-aa-foreground")))
(eval (macroexpand `(seqel--def-char-face "polar" "white" "green yellow" "pro-aa-foreground")))
(eval (macroexpand `(seqel--def-char-face "aromatic" "white" "cyan4" "pro-aa-foreground")))
(defvar seqel-pro-aa-mw
(let ((mw-vec (make-vector 256 nil))
aa mw)
(dolist (element seqel-pro-aa-alist)
(setq aa (car element))
(setq mw (nth 2 element))
(aset mw-vec aa mw)
(aset mw-vec (downcase aa) mw))
mw-vec)
"A vector of amino acid molecular weights in Dalton.")
(defvar seqel-pro-aa-1-vec
(let ((vec (make-vector 256 nil))
aa1 aa3)
(dolist (element seqel-pro-aa-alist)
(setq aa1 (car element))
(setq aa3 (nth 1 element))
(aset vec aa1 aa3)
(aset vec (downcase aa1) aa3))
vec)
"A vector of 3-letter amino acid codes.
It is used to convert 1-letter codes to 3-letter codes.")
(defvar seqel-pro-aa-3-hash
(let ((my-hash (make-hash-table :test 'equal :size (length seqel-pro-aa-alist))))
(dolist (element seqel-pro-aa-alist)
(puthash (nth 1 element) (car element) my-hash))
my-hash)
"A hash table with 3-letter code as key and 1-letter code as value.
It is used to convert 3-letter codes to 1-letter codes.")
(defun seqel-pro-weight (beg end)
"Return molecular weight of the region BEG and END or the current line."
(interactive (seqel-region-or-line))
(let ((sum-mw 0) (times (- end beg)) char mw)
(save-mark-and-excursion
(goto-char beg)
(dotimes (x times)
(setq char (char-after))
(setq mw (aref seqel-pro-aa-mw char))
(cond (mw
(setq sum-mw (+ sum-mw mw)))
((not (gethash char seqel-cruft-set))
(error "Ambiguous or illegal char %s at position line %d column %d"
char (line-number-at-pos) (current-column))))
(forward-char)))
(message "The molecular weight is %.2f" sum-mw)
sum-mw))
(defun seqel-pro-1-2-3 (beg end)
"Convert 1-letter IUPAC code to 3-letter IUPAC code.
BEG and END defines the region to operate on."
(interactive (seqel-region-or-line))
(condition-case err
(let ((times (- end beg)) char)
(goto-char beg)
(dotimes (x times)
(setq char (char-after))
(cond ((gethash char seqel-pro-alphabet-set)
(insert (aref seqel-pro-aa-1-vec (char-after)))
(delete-char 1))
(t
(error "Ambiguous or illegal amino acid letter %c at line %d column %d"
char (line-number-at-pos) (current-column))))))
((debug error)
(primitive-undo 1 buffer-undo-list)
(error "%s" (error-message-string err)))))
(defun seqel-pro-3-2-1 (beg end)
"Convert 3-letter IUPAC code to 1-letter IUPAC code.
Currently it only converts 3-letter codes without any characters
separating them.
Interactively, BEG and END are the begin and end of the active
region or the current line if no region is active."
(interactive (seqel-region-or-line))
(condition-case err
(let ((times (/ (- end beg) 3))
code letter)
(goto-char beg)
(dotimes (x times)
(setq code (buffer-substring (point) (+ 3 (point))))
(setq letter (gethash code seqel-pro-aa-3-hash))
(if letter
(insert-char letter)
(error "Unknown 3-letter amino acid code '%s' at position %d"
code (point)))
(delete-char 3)))
;; return to the original state if error is met.
((debug error)
(primitive-undo 1 buffer-undo-list)
(error "%s" (error-message-string err)))))
;;;###autoload
(defun seqel-pro-move-forward (count)
"Move forward COUNT number of amino acids.
See `seqel-nuc-move-forward'"
(interactive "p")
(seqel-forward-char count seqel-pro-alphabet-set))
(defun seqel-pro-move-backward (count)
"Move backward COUNT number of amino acides, similar to `seqel-pro-move-forward'."
(interactive "p")
;; (proceed-char-repeatedly count 'backward-char))
(seqel-forward-char (- count) seqel-pro-alphabet-set))
;;; delete
(defun seqel-pro-delete-forward (count)
"Delete COUNT number of amino acids starting from the point.
See also `nuc-delete-forward'."
(interactive "p")
(let ((pos (point)))
(seqel-forward-char count seqel-pro-alphabet-set)
(delete-region pos (point))))
(defun seqel-pro-delete-backward (count)
"Delete backward COUNT number of AA from the point.
See also `seqel-nuc-delete-backward'."
(interactive "p")
(let ((pos (point)))
(seqel-forward-char (- count) seqel-pro-alphabet-set)
(delete-region pos (point))))
(defun seqel-pro-count (beg end)
"Count the amino acid in the region or in the current line).
Return the count if the region contains only legal amino acid
characters, including `seqel-pro-alphabet-set', `seqel-cruft-set';
otherwise return nil and report the location of the invalid
characters in the echo region.
Interactively, BEG and END are the begin and end of the active
region or the current line if no region is active."
(interactive (seqel-region-or-line))
(let ((length (seqel-count beg end seqel-pro-alphabet-set)))
(and length
(called-interactively-p 'interactive)
(message "Amino acid count: %d" length))
length))
(defalias 'seqel-pro-p 'seqel-pro-count)
(defun seqel-pro-summary (beg end)
"Summarize the frequencies of amino acids in the region or the current line.
Interactively, BEG and END are the begin and end of the active
region or the current line if no region is active.
See also `seqel-summary'."
(interactive (seqel-region-or-line))
(seqel-summary beg end seqel-pro-alphabet-set))
;; define aa faces belonging to pro-aa-face group
(defvar seqel-pro-aa-colors
(seqel--zip #'(lambda (x y) (cons (car x) y)) seqel-pro-aa-alist seqel-color-pairs-cycle)
"Background and foreground colors for each IUPAC bases.
This is a list of lists. For each inner list, it contains 3 atoms:
a nuc base in char type, hex-code colors for foreground and background")
;; create faces
(mapc (lambda (elem)
(let ((l (format "%c" (nth 0 elem)))
(f (nth 2 elem))
(b (nth 1 elem)))
(eval (macroexpand `(seqel--def-char-face ,l ,b ,f "pro-aa-face")))))
seqel-pro-aa-colors)
;;;###autoload
(defun seqel-pro-paint-alternative (beg end)
(interactive "r")
(if (not (use-region-p))
(setq beg (line-beginning-position)
end (line-end-position)))
(save-mark-and-excursion
(let (char face)
(goto-char beg)
(dotimes (i (- end beg))
(setq char (char-after))
(cond ((member char seqel-pro-aa-hydrophobic)
(setq face 'pro-aa-background-hydrophobic))
((member char seqel-pro-aa-polar)
(setq face 'pro-aa-background-polar))
((member char seqel-pro-aa-pos)
(setq face 'pro-aa-background-pos))
((member char seqel-pro-aa-neg)
(setq face 'pro-aa-background-neg))
((member char seqel-pro-aa-aromatic)
(setq face 'pro-aa-background-aromatic))
(t (setq face 'default)))
(with-silent-modifications
(put-text-property (+ beg i) (+ beg i 1) 'font-lock-face face))
(forward-char)))))
;;;###autoload
(defun seqel-pro-paint (beg end &optional case)
"Color the protein sequence from BEG to END.
If the CASE is nil, upcase and lowercase base chars will be colored the same;
otherwise, not. See `seqel-paint' for details."
(interactive "r\nP")
(if (not (use-region-p))
(setq beg (line-beginning-position)
end (line-end-position)))
(seqel-paint beg end "pro-aa-face" case))
;;;###autoload
(defalias 'seqel-pro-unpaint 'seqel-unpaint
"Uncolor the region of protein sequence.
This is an alias to `seqel-unpaint'.")
(defvar seqel-pro-mode-map
(let ((map (make-sparse-keymap)))
(define-key map "\C-c\C-p\C-f" 'seqel-pro-move-forward)
(define-key map "\C-c\C-p\C-b" 'seqel-pro-move-backward)
(define-key map "\C-c\C-p\C-w" 'seqel-pro-weight)
(define-key map "\C-c\C-p\C-s" 'seqel-pro-summary)
map)
"Keymap for 'seqel-pro-mode' minor mode.")
(define-minor-mode seqel-pro-mode
"Protein mode
It should be not enabled with `nuc-mode' at the same time."
:init-value nil
;; the name, a string, to show in the modeline
:lighter " protein"
:keymap seqel-pro-mode-map
(setq-local seqel-isearch-p t)
(run-hooks 'seqel-pro-mode-hook))
(provide 'seqel-pro-mode)
;;; seqel-pro-mode.el ends here