From ae49e7540cfc154a74e617a52d5e8fe17fb932e8 Mon Sep 17 00:00:00 2001
From: Damiano Clementel <damiano.clementel@gmail.com>
Date: Thu, 1 Aug 2024 00:10:51 +0200
Subject: [PATCH] Adds utils to ngx-structure-viewer and ngx-sequence-viewer

---
 projects/ngx-sequence-viewer/package.json  |   2 +-
 projects/ngx-structure-viewer/package.json |   2 +-
 projects/utils/fasta.spec.ts               |   0
 projects/utils/fasta.ts                    |  36 ++++++
 projects/utils/index.ts                    |   2 +
 projects/utils/mmcif.spec.ts               |   0
 projects/utils/mmcif.ts                    | 123 +++++++++++++++++++++
 projects/utils/parser.spec.ts              |   0
 projects/utils/parser.ts                   |  31 ++++++
 9 files changed, 194 insertions(+), 2 deletions(-)
 create mode 100644 projects/utils/fasta.spec.ts
 create mode 100644 projects/utils/fasta.ts
 create mode 100644 projects/utils/index.ts
 create mode 100644 projects/utils/mmcif.spec.ts
 create mode 100644 projects/utils/mmcif.ts
 create mode 100644 projects/utils/parser.spec.ts
 create mode 100644 projects/utils/parser.ts

diff --git a/projects/ngx-sequence-viewer/package.json b/projects/ngx-sequence-viewer/package.json
index b9c7afc..069fc05 100644
--- a/projects/ngx-sequence-viewer/package.json
+++ b/projects/ngx-sequence-viewer/package.json
@@ -1,6 +1,6 @@
 {
   "name": "ngx-sequence-viewer",
-  "version": "0.0.5",
+  "version": "0.0.6",
   "peerDependencies": {
     "@angular/common": "^17.3.0",
     "@angular/core": "^17.3.0"
diff --git a/projects/ngx-structure-viewer/package.json b/projects/ngx-structure-viewer/package.json
index e88171c..5ddcbf1 100644
--- a/projects/ngx-structure-viewer/package.json
+++ b/projects/ngx-structure-viewer/package.json
@@ -1,6 +1,6 @@
 {
   "name": "ngx-structure-viewer",
-  "version": "0.0.16",
+  "version": "0.0.17",
   "license": "MIT",
   "author": {
     "name": "Damiano Clementel",
diff --git a/projects/utils/fasta.spec.ts b/projects/utils/fasta.spec.ts
new file mode 100644
index 0000000..e69de29
diff --git a/projects/utils/fasta.ts b/projects/utils/fasta.ts
new file mode 100644
index 0000000..3a60c70
--- /dev/null
+++ b/projects/utils/fasta.ts
@@ -0,0 +1,36 @@
+import { Parser } from './parser';
+
+export type Sequence = { sequence: string, label: string };
+
+class FastaParser extends Parser<Sequence[]> {
+
+  public override parseText(text: string): Sequence[] {
+    // Split line by newline character
+    const lines = text.split(/[\n\r]+/);
+    // Define output
+    const parsed: { sequence: string, label: string }[] = [];
+    // Define current index
+    let index = -1;
+    // Loop through each line
+    for (let line of lines) {
+      // Sanitize line
+      line = line.trim();
+      // In case line starts with '>' character, then define new sequence entry
+      if (line.startsWith('>')) {
+        // Define new sequence entry
+        parsed.push({ sequence: '', label: line.slice(1) });
+        // Update index
+        index++
+      }
+      // In case index (0) has been defined beforehand, then current line is sequence
+      else if (index > -1) parsed[index].sequence += line;
+      // Otherwise, fine is not fasta formatted and an error is thrown
+      else throw new Error('Provided text is not in fasta format');
+    }
+    // Return parsed sequences and labels
+    return parsed;
+  }
+
+}
+
+export const FASTA = new FastaParser();
\ No newline at end of file
diff --git a/projects/utils/index.ts b/projects/utils/index.ts
new file mode 100644
index 0000000..b79a817
--- /dev/null
+++ b/projects/utils/index.ts
@@ -0,0 +1,2 @@
+export { MMCIF, Residues, Residue } from './mmcif';
+export { FASTA, Sequence } from './fasta';
diff --git a/projects/utils/mmcif.spec.ts b/projects/utils/mmcif.spec.ts
new file mode 100644
index 0000000..e69de29
diff --git a/projects/utils/mmcif.ts b/projects/utils/mmcif.ts
new file mode 100644
index 0000000..1d497e2
--- /dev/null
+++ b/projects/utils/mmcif.ts
@@ -0,0 +1,123 @@
+import { Parser } from './parser';
+
+export interface Residue {
+  // Define sequence number
+  authSeqId: number;
+  // Define residue's insertion code
+  pdbInsCode: string;
+  // Define residue's name (one letter code)
+  authCompId: string;
+}
+
+export type Residues = {
+  [model: number]: {
+    [chain: string]: Residue[]
+  }
+}
+
+class MMCIFParser extends Parser<Residues> {
+
+  readonly ThreeToOne = {
+    'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C',
+    'GLN': 'Q', 'GLU': 'E', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I',
+    'LEU': 'L', 'LYS': 'K', 'MET': 'M', 'PHE': 'F', 'PRO': 'P',
+    'SER': 'S', 'THR': 'T', 'TRP': 'W', 'TYR': 'Y', 'VAL': 'V'
+  }
+
+  readonly OneToThree = {
+    'A': 'ALA', 'R': 'ARG', 'N': 'ASN', 'D': 'ASP', 'C': 'CYS',
+    'Q': 'GLN', 'E': 'GLU', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE',
+    'L': 'LEU', 'K': 'LYS', 'M': 'MET', 'F': 'PHE', 'P': 'PRO',
+    'S': 'SER', 'T': 'THR', 'W': 'TRP', 'Y': 'TYR', 'V': 'VAL'
+  };
+
+  public override parseText(text: string): Residues {
+    // Split text in lines
+    const lines = text.split('\n');
+    // Initialize table, each item is a column
+    const table: Record<string, unknown[]> = {};
+    // Initialize column to index mapping
+    const columns: Record<number, string> = {};
+    // Loop through each line
+    for (let i = 0; i < lines.length; i++) {
+      // Sanitize line
+      let line = lines[i] = (lines[i]).trim();
+      // Check for lines starting with _atom
+      if (line.startsWith('_atom_site.')) {
+        // Initialize index for culumns
+        let j = 0;
+        // Loop through each following line
+        for (j; i + j < lines.length; j++) {
+          // Sanitize line
+          line = lines[i + j] = (lines[i + j]).trim();
+          // Loop through each column in table
+          if (line.startsWith('_atom_site.')) {
+            // Initialize column key
+            const column = columns[j] = line;
+            // Initialize column values
+            table[column] = [];
+          }
+          // Otherwise, break loop
+          else break;
+        }
+        // Update index
+        i = i + j;
+      }
+      // Otherwise, check if columns are defined
+      else if (table['_atom_site.id']) {
+        // Loop through each row
+        for (let j = 0; i + j < lines.length; j++) {
+          // Define current line
+          line = lines[i + j] = (lines[i + j]).trim();
+          // Case line does not contain stop character
+          if (line !== '#') {
+            // Split line in columns
+            const values = line.split(/\s+/);
+            // Loop through each value
+            for (const [index, value] of values.entries()) {
+              // Get column key
+              const column = columns[index];
+              // Add value to column
+              table[column].push(value.replace(/\?/g, ''));
+            }
+          }
+          // Otherwise, break all loops
+          else i = j = lines.length;
+        }
+      }
+    }
+    // Initialize residues
+    const residues: Residues = {};
+    // Define length of table (as number of items in first column)
+    const length = table['_atom_site.id'].length;
+    // Group each item in table by `author_asym_id`
+    for (let i = 0; i < length; i++) {
+      // Get chain identifier
+      const authAsymId = '' + table['_atom_site.auth_asym_id'][i];
+      // Get model number
+      const pdbxPDBModelNum = parseInt('' + table['_atom_site.pdbx_PDB_model_num'][i]);
+      // Get residue name
+      const authCompId = '' + table['_atom_site.label_comp_id'][i];
+      // Get residue number
+      const authSeqId = parseInt('' + table['_atom_site.auth_seq_id'][i]);
+      // Get residue insertion code
+      const pdbxPDBInsCode = '' + table['_atom_site.pdbx_PDB_ins_code'][i];
+      // Initialize model
+      residues[pdbxPDBModelNum] = residues[pdbxPDBModelNum] || {};
+      // Initialize chain
+      const residueList = residues[pdbxPDBModelNum][authAsymId] = residues[pdbxPDBModelNum][authAsymId] || [];
+      // Initialize residue
+      const currentResidue = { authSeqId, pdbInsCode: pdbxPDBInsCode, authCompId: authCompId };
+      const previousResidue = residueList.length > 0 ? residueList[residueList.length - 1] : undefined;
+      // Define previous residue
+      if (!previousResidue || previousResidue.authSeqId !== authSeqId || previousResidue.pdbInsCode !== pdbxPDBInsCode) {
+        // Add residue to chain
+        residueList.push(currentResidue);
+      }
+    }
+    // Return residues
+    return residues;
+  }
+}
+
+export const MMCIF = new MMCIFParser();
\ No newline at end of file
diff --git a/projects/utils/parser.spec.ts b/projects/utils/parser.spec.ts
new file mode 100644
index 0000000..e69de29
diff --git a/projects/utils/parser.ts b/projects/utils/parser.ts
new file mode 100644
index 0000000..76e78e4
--- /dev/null
+++ b/projects/utils/parser.ts
@@ -0,0 +1,31 @@
+export abstract class Parser<T> {
+
+  protected abstract parseText(text: string): T;
+
+  protected parseFile(file: Blob): Promise<T> {
+    // Cast input file to string
+    const reader = new FileReader();
+    // Read file as text
+    reader.readAsText(file, 'utf-8');
+    // Return promise
+    return new Promise((resolve, reject) => {
+      // Resolve promise with parsed text
+      reader.onload = () => resolve(this.parseText('' + reader.result));
+      // Reject promise with error
+      reader.onerror = error => reject(error);
+    });
+
+  }
+
+  public parse(input: string): T;
+  public parse(input: Blob): Promise<T>;
+  public parse(input: Blob | string): T | Promise<T> {
+    // Case input is not a string
+    if (typeof input !== 'string') {
+      // Then parse file
+      return this.parseFile(input);
+    }
+    // Otherwise, just parse text
+    return this.parseText('' + input);
+  }
+}
\ No newline at end of file