From c66cc41afffbb9eeb4992a2dad56e0e322163b45 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Fri, 1 Sep 2023 15:57:35 -0500 Subject: [PATCH] add notebook as example --- .gitignore | 1 + scripts/PxBLAT.ipynb | 365 ++++++++++++++++++++++++++++++++++++ src/pxblat/server/client.py | 2 +- src/pxblat/server/server.py | 2 +- tests/test_server.py | 1 + 5 files changed, 369 insertions(+), 2 deletions(-) create mode 100644 scripts/PxBLAT.ipynb diff --git a/.gitignore b/.gitignore index 3893e62c..1d123690 100755 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ test_server/ bugs/ docs/apidocs/* docs/api/* +!PxBLAT.ipynb diff --git a/scripts/PxBLAT.ipynb b/scripts/PxBLAT.ipynb new file mode 100644 index 00000000..7bf64dc7 --- /dev/null +++ b/scripts/PxBLAT.ipynb @@ -0,0 +1,365 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "b8f3f44b-7413-4a13-a69c-29bab549053b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/ylk4626/miniforge3/bin/python\n" + ] + } + ], + "source": [ + "!which python" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0aa2f56a-07c3-43e0-956c-587e667c189d", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "fc656558-7a8d-4328-8660-8a855f24f971", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[38;5;4m\u001b[1m fas\u001b[0m  test_case1_ground.psl  test_ref.fa\n", + " test_bit2fa_tmp.fa  test_case2.fa  test_ref_for_test.2bit\n", + " test_case1.fa  test_ref.2bit  test_ref_ground.2bit\n" + ] + } + ], + "source": [ + "!ls ../tests/data" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "14366611-affb-4918-aa99-d072b4a09858", + "metadata": {}, + "outputs": [], + "source": [ + "test_fasta = Path(\"../tests/data/test_ref.fa\") \n", + "test_2bit = Path(\"../tests/data/test_ref.2bit\")\n", + "seq_dir = Path(\"../tests/data/\")\n", + "port = 65000\n", + "\n", + "fa_seq1 = \"TGAGAGGCATCTGGCCCTCCCTGCGCTGTGCCAGCAGCTTGGAGAACCCACACTCAATGAACGCAGCACTCCACTACCCAGGAAATGCCTTCCTGCCCTCTCCTCATCCCATCCCTGGGCAGGGGACATGCAACTGTCTACAAGGTGCCAA\"\n", + "fa_seq2 = Path(\"../tests/data/test_case1.fa\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "b93c6aaa-4609-478b-b289-6627a2083e39", + "metadata": {}, + "outputs": [], + "source": [ + "def print_query(results):\n", + " for idx, res in enumerate(results):\n", + " print(f\"\\n{idx} query: \\n {res}\")\n", + " if res is not None:\n", + " for idh, hsp in enumerate(res.hsps):\n", + " print(f\"\\n{idh} hit: \\n {hsp}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "bebb5e94-5f62-4e17-ab96-3dda912597f0", + "metadata": {}, + "outputs": [], + "source": [ + "from pxblat import Server, Client" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "c209cefc-4ec9-4eb8-9fa2-36a93ebb1f2b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "0 query: \n", + " Program: blat (v.37x1)\n", + " Query: /var/folders/s3/vs6nrrg52sdfjk3z90p7ndt94gg4tq/T/tmp5_hjp7s_ (151)\n", + " \n", + " Target: \n", + " Hits: ---- ----- ----------------------------------------------------------\n", + " # # HSP ID + description\n", + " ---- ----- ----------------------------------------------------------\n", + " 0 1 chr1 \n", + "\n", + "0 hit: \n", + " Query: /var/folders/s3/vs6nrrg52sdfjk3z90p7ndt94gg4tq/T/tmp5_hjp7s_ \n", + "Query range: [0:151] (1)\n", + " Hit range: [12699:12850] (1)\n", + "Quick stats: evalue ?; bitscore ?\n", + " Fragments: 1 (? columns)\n", + "\n", + "0 query: \n", + " Program: blat (v.37x1)\n", + " Query: /var/folders/s3/vs6nrrg52sdfjk3z90p7ndt94gg4tq/T/tmpno6grxt8 (151)\n", + " \n", + " Target: \n", + " Hits: ---- ----- ----------------------------------------------------------\n", + " # # HSP ID + description\n", + " ---- ----- ----------------------------------------------------------\n", + " 0 1 chr1 \n", + "\n", + "0 hit: \n", + " Query: /var/folders/s3/vs6nrrg52sdfjk3z90p7ndt94gg4tq/T/tmpno6grxt8 \n", + "Query range: [0:151] (1)\n", + " Hit range: [12699:12850] (1)\n", + "Quick stats: evalue ?; bitscore ?\n", + " Fragments: 1 (? columns)\n", + "\n", + "1 query: \n", + " Program: blat (v.37x1)\n", + " Query: case1 (151)\n", + " \n", + " Target: \n", + " Hits: ---- ----- ----------------------------------------------------------\n", + " # # HSP ID + description\n", + " ---- ----- ----------------------------------------------------------\n", + " 0 1 chr1 \n", + "\n", + "0 hit: \n", + " Query: case1 \n", + " Hit: chr1 \n", + "Query range: [0:151] (1)\n", + " Hit range: [12699:12850] (1)\n", + "Quick stats: evalue ?; bitscore ?\n", + " Fragments: 1 (? columns)\n" + ] + } + ], + "source": [ + "client = Client(\n", + " host=\"localhost\",\n", + " port=port,\n", + " seq_dir=seq_dir,\n", + " min_score=20,\n", + " min_identity=90,\n", + ")\n", + "server_option = Server.create_option().withCanStop(True).withStepSize(5).build()\n", + "\n", + "with Server(\"localhost\", port, test_2bit, server_option) as server:\n", + " server.wait_ready()\n", + " assert server.is_ready()\n", + " status = server.status(instance=True)\n", + " ret1 = client.query(fa_seq1)\n", + " ret2 = client.query([fa_seq1, fa_seq2])\n", + " \n", + " print_query(ret1)\n", + " print_query(ret2)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "93b2a02b-3c9c-45da-a1e8-c1cc49858db3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Server(localhost, 65000, ready: False open: False\n", + "ServerOption(canStop: true, log: , logFacility: , mask: false, maxAaSize: 8000, maxDnaHits: 100, maxGap: 2, maxNtSize: 40000, maxTransHits: 200, minMatch: 2, repMatch: 2252, seqLog: false, ipLog: false, debugLog: false, tileSize: 11, stepSize: 5, trans: false, syslog: false, perSeqMax: , noSimpRepMask: false, indexFile: , timeout: 90, genome: , genomeDataDir: , threads: 1, allowOneMismatch: false))" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "server" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "cf765aa2-4516-4573-9394-d3834efb6d6c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[QueryResult(id='/var/folders/s3/vs6nrrg52sdfjk3z90p7ndt94gg4tq/T/tmpege3hbu1', 1 hits)]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ret1" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "ddce6b47-28a3-4d92-962a-9e26c323c66f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[QueryResult(id='/var/folders/s3/vs6nrrg52sdfjk3z90p7ndt94gg4tq/T/tmpu1629f8l', 1 hits),\n", + " QueryResult(id='case1', 1 hits)]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ret2" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "9da9c6af-95b6-4b94-90ec-ee4a46bcc9b1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Query: case1 \n", + " Hit: chr1 \n", + "Query range: [0:151] (1)\n", + " Hit range: [12699:12850] (1)\n", + "Quick stats: evalue ?; bitscore ?\n", + " Fragments: 1 (? columns)\n" + ] + } + ], + "source": [ + "print(ret2[1].hsps[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "e4583d99-d0a1-4427-9a35-742d5977bc1c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "0 query: \n", + " Program: blat (v.37x1)\n", + " Query: /var/folders/s3/vs6nrrg52sdfjk3z90p7ndt94gg4tq/T/tmpu1629f8l (151)\n", + " \n", + " Target: \n", + " Hits: ---- ----- ----------------------------------------------------------\n", + " # # HSP ID + description\n", + " ---- ----- ----------------------------------------------------------\n", + " 0 1 chr1 \n", + "\n", + "0 hit: \n", + " Query: /var/folders/s3/vs6nrrg52sdfjk3z90p7ndt94gg4tq/T/tmpu1629f8l \n", + "Query range: [0:151] (1)\n", + " Hit range: [12699:12850] (1)\n", + "Quick stats: evalue ?; bitscore ?\n", + " Fragments: 1 (? columns)\n", + "\n", + "1 query: \n", + " Program: blat (v.37x1)\n", + " Query: case1 (151)\n", + " \n", + " Target: \n", + " Hits: ---- ----- ----------------------------------------------------------\n", + " # # HSP ID + description\n", + " ---- ----- ----------------------------------------------------------\n", + " 0 1 chr1 \n", + "\n", + "0 hit: \n", + " Query: case1 \n", + " Hit: chr1 \n", + "Query range: [0:151] (1)\n", + " Hit range: [12699:12850] (1)\n", + "Quick stats: evalue ?; bitscore ?\n", + " Fragments: 1 (? columns)\n" + ] + } + ], + "source": [ + "print_query(ret2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b92da0eb-9b79-42c1-b2e0-d1b9323a6408", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "814c3678-8dab-42bf-8ea7-0b5df3f0b538", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/pxblat/server/client.py b/src/pxblat/server/client.py index efd2ed97..71655ba5 100644 --- a/src/pxblat/server/client.py +++ b/src/pxblat/server/client.py @@ -548,4 +548,4 @@ def query(self, in_seqs: INSEQS | list[str] | list[Path] | INSEQ): ) group = Pool(1) - yield from group.imap(self._query, in_seqs) + return list(group.imap(self._query, in_seqs)) diff --git a/src/pxblat/server/server.py b/src/pxblat/server/server.py index 2f213d02..c314a0bf 100644 --- a/src/pxblat/server/server.py +++ b/src/pxblat/server/server.py @@ -350,7 +350,7 @@ def create_option() -> ServerOption: def __str__(self) -> str: """Return server option as a string.""" - return f"Server({self.host}, {self.port}, ready: {self.is_ready()} open: {self.is_open()}\n {self.option})" + return f"Server({self.host}, {self.port}, ready: {self.is_ready()} open: {self.is_open()}\n{self.option})" __repr__ = __str__ diff --git a/tests/test_server.py b/tests/test_server.py index b3f55ed6..6d5dc4f0 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -1,6 +1,7 @@ import time import pytest +from kent.src.hg.utils.otto.sarscov2phylo.bioSampleJsonToTab import epiIdFromRecordAttrs from pxblat import Client from pxblat import ClientOption from pxblat import UsageStats