diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404/index.html b/404/index.html new file mode 100644 index 0000000..7143455 --- /dev/null +++ b/404/index.html @@ -0,0 +1,12 @@ +
XBB.1.16(omicron):India sees the Spike earlier than the rest of the countries. The lowest frequency in india is higher than peaks of all other countries.
\nXBB : India and Germany records the high frequencies 70.92%,50% respectively, while other countries have frequnency less than 20%
\nNorway doesn't have occurances of XBB.1.9.2,XBB.1.16 while other countries has recorded occurances.
\nEG.5 is more prevalent in Norway and India for two months straight than in other countries
\nXBB.1.9.1,EG.5 : Norway records occurances only for two months and it is highest than frequencies of other countries. No gradual increase or decrease recorded.
\n\n\nThe CI would say how true are these sudden occurances.
\n
XBB.1.9.1 : not present in Australia
\nB.1.1.529(Omicron): The Final spike after a downhill trend during the last month of occurance(jun23) in Norway(66.66%) and Germany(16%) can be interesting.
\nCH.1.1: Norway records high frequencies (61%) compared to other countries.
\nXBB.2.3: Starts in Jan23 in India and is continuous till May23. There is a break of 3 months Jun23,Jul23,Aug23. Then there is a spike(highest frequency) in the month of Sept23-Oct23.(intereseting), while in other countries the trend is monotonous.
\n\n\n\n10. BA.2.86
\n\n
\n- Occurances only observed in Denmark, Germany,Spain,Uk,USA
\n- Though the frequency of all the occuraces in all these countries are less than 10%, by the 95% CI Denmark records the highest frequncy (40%) of occurance in the month of Jul23.
\n- Denmark records occurances from Jul23-Sep23, spain and UK records occurances from Sep23-Oct23. Germany,USA records occurances only in oct23 which is very low 0.9% and 0.3% respectively.
\n
\n\nAll the following plots might change if the lineage mapping is done based on the characteristic mutations obtained from OutBreakInfo. Presently the mapping is based on mutations obtained from the GISAID database for each country.
\n
B.1.429(Epsilon), B.1.525(Eta), B.1.526(Iota), C.37(Lambda), B.1.621(Mu), P.3(Theta), P.2(Zeta), B.1.640 No data for these variants.
\nDendron (the tool used to generate this site) lets authors selective publish content. You will see this page whenever you click on a link to an unpublished page
\n","noteIndex":{"id":"paa0s59lp320n6q8rghycjw","title":"Work on SARS Cov2","desc":"","updated":1700240746536,"created":1700233379150,"custom":{"nav_order":0,"permalink":"/"},"fname":"root","type":"note","vault":{"fsPath":".","selfContained":true,"name":"Literature"},"contentHash":"6124b7f80065618c049c13a49f0dbed1","links":[],"anchors":{},"children":["1hnn8maoednnxdw5feqd4kq","nve3ld8zkeg6b2hgygrh1rp","v4t96j5kch5mq12272qsyih","r423m96u71ix4pb458fk8u2"],"parent":null,"data":{},"body":"\n"},"collectionChildren":null,"customHeadContent":null,"config":{"version":5,"dev":{"enablePreviewV2":true,"enableSelfContainedVaults":true},"commands":{"lookup":{"note":{"selectionMode":"extract","confirmVaultOnCreate":true,"vaultSelectionModeOnCreate":"smart","leaveTrace":false,"bubbleUpCreateNew":true,"fuzzThreshold":0.2}},"randomNote":{},"insertNoteLink":{"aliasMode":"none","enableMultiSelect":false},"insertNoteIndex":{"enableMarker":false},"copyNoteLink":{"aliasMode":"title"},"templateHierarchy":"template"},"workspace":{"vaults":[{"fsPath":".","selfContained":true,"name":"Literature"}],"journal":{"dailyDomain":"daily","name":"journal","dateFormat":"y.MM.dd","addBehavior":"childOfDomain"},"scratch":{"name":"scratch","dateFormat":"y.MM.dd.HHmmss","addBehavior":"asOwnDomain"},"task":{"name":"task","dateFormat":"y.MM.dd","addBehavior":"asOwnDomain","statusSymbols":{"":" ","wip":"w","done":"x","assigned":"a","moved":"m","blocked":"b","delegated":"l","dropped":"d","pending":"y"},"taskCompleteStatus":["done","x"],"prioritySymbols":{"H":"high","M":"medium","L":"low"},"todoIntegration":false,"createTaskSelectionType":"selection2link"},"graph":{"zoomSpeed":1,"createStub":false},"enableAutoCreateOnDefinition":false,"enableXVaultWikiLink":false,"enableRemoteVaultInit":true,"enableUserTags":true,"enableHashTags":true,"workspaceVaultSyncMode":"noCommit","enableAutoFoldFrontmatter":false,"enableEditorDecorations":true,"maxPreviewsCached":10,"maxNoteLength":204800,"enableFullHierarchyNoteTitle":false},"preview":{"enableFMTitle":true,"enableNoteTitleForLink":true,"enableFrontmatterTags":true,"enableHashesForFMTags":false,"enablePrettyRefs":true,"enableKatex":true,"automaticallyShowPreview":false},"publishing":{"enableFMTitle":true,"enableNoteTitleForLink":true,"enablePrettyRefs":true,"enableKatex":true,"copyAssets":true,"siteHierarchies":["root"],"writeStubs":false,"siteRootDir":"docs","seo":{"title":"Dendron","description":"Personal Knowledge Space"},"github":{"enableEditLink":true,"editLinkText":"Edit this page on GitHub","editBranch":"main","editViewMode":"tree"},"enableSiteLastModified":true,"enableFrontmatterTags":true,"enableHashesForFMTags":false,"enableRandomlyColoredTags":true,"enableTaskNotes":true,"enablePrettyLinks":true,"searchMode":"search","siteUrl":"https://vishnushiri02.github.io","assetsPrefix":"/SARS-Cov2","siteFaviconPath":"favicon.ico","siteIndex":"root"}}},"__N_SSG":true} \ No newline at end of file diff --git a/_next/data/FciE7KLU5cJc2fcCtWpj3/notes/drw8w7u2kcr0z6nl2ekgoea.json b/_next/data/FciE7KLU5cJc2fcCtWpj3/notes/drw8w7u2kcr0z6nl2ekgoea.json new file mode 100644 index 0000000..3fcc93b --- /dev/null +++ b/_next/data/FciE7KLU5cJc2fcCtWpj3/notes/drw8w7u2kcr0z6nl2ekgoea.json @@ -0,0 +1 @@ +{"pageProps":{"note":{"id":"drw8w7u2kcr0z6nl2ekgoea","title":"Expected_immunity","desc":"Work done to compute expected immunity","updated":1715068525419,"created":1712142695419,"custom":{},"fname":"Work_documented.Expected_immunity","type":"note","vault":{"fsPath":".","selfContained":true,"name":"Literature"},"contentHash":"230c3df63d89fd16c1694c31f9d47e17","links":[],"anchors":{"objective":{"type":"header","text":"Objective:","value":"objective","line":7,"column":0,"depth":2},"introduction":{"type":"header","text":"Introduction:","value":"introduction","line":11,"column":0,"depth":2},"calculation-of-probability-of-neutralisation":{"type":"header","text":"Calculation of probability of neutralisation:","value":"calculation-of-probability-of-neutralisation","line":19,"column":0,"depth":2},"calculating-the-expected-immunity-against-a-variantaa_substitution":{"type":"header","text":"calculating the expected immunity against a variant/aa_substitution:","value":"calculating-the-expected-immunity-against-a-variantaa_substitution","line":33,"column":0,"depth":2},"algorithm-of-computation-of-expected-immunity":{"type":"header","text":"Algorithm of computation of expected immunity:","value":"algorithm-of-computation-of-expected-immunity","line":44,"column":0,"depth":2},"observations-and-modifications":{"type":"header","text":"Observations and modifications:","value":"observations-and-modifications","line":87,"column":0,"depth":2},"visualisation-and-analysis":{"type":"header","text":"Visualisation and analysis:","value":"visualisation-and-analysis","line":96,"column":0,"depth":2},"further-questions":{"type":"header","text":"Further questions:","value":"further-questions","line":111,"column":0,"depth":2}},"children":[],"parent":"r423m96u71ix4pb458fk8u2","data":{}},"body":"Finding the expected immunity towards an amino acid substitution. This can hint the possibility of occurance of a mutation. When high number of people are immune to a aa_substitution over a time, then this substitution is more likely to undergo another substitution/change.
\n\n\ny=Spike_F140IX={Spike_F140I,Spike_F140F}t=03−01−2022E[S_f140iimmune(03−01−2022)]={[πS_f140i(01−01−2022)∗P1Neut(pos=140,t=01−01−2022)]+[πS_f140i(02−01−2022)∗P1Neut(pos=140,t=02−01−2022)]}+{[πS_f140f(01−01−2022)∗P2Neut(pos=140,t=01−01−2022)]+[πS_f140f(02−01−2022)∗P2Neut(pos=140,t=02−01−2022)]}
\n
This way for each variant/aa_substitution the expected immunity is calculated for everyday in the period of observation.
\nFunction get_dis_fact:\n Pass In: start_date,end_date\n num_day=end_date-start_date\n k - is the half life\n For i=0 to num_day\n discount_factor_vector.append(round(exp(-k*i),3))\n return(discount_factor_vector)\n\nFunction get_bv:\n Pass In: fr,IC50\n Constants: start_date, End_date\n calling get_dis_fact function\n time_iter=start_date\n while(time_iter<End_date):\n days_dif=time_iter-start_date\n bv_vec.append(discount_factor_vector[days_dif]/\n (fr*IC50+discount_factor_vector[days_diff]))\n increment time_iter\n return(bv_vec)\n\nFunction Exp_immune:\n Pass In: Frequency_df,P1_neut,P2_neut, incidence\n Constant: Start_day\n For y in variants:\n For d in observation_dates:\n initialise data_frames freq_df,pneut_df,incidence_df\n dates=[start_date:d) (d not included s<t)\n dates_diff=[start_date-dates]\n For x in variants with mutation on same position as y:\n freq_df.append(Frequency_df[dates,x])\n incidence_df.append(incidence[dates])\n if y==x:\n pneut_df.append(p1[pos,dates_diff])\n elif y!=x:\n pneut_df.append(p2[pos,dates_diff])\n double_sum.sum(freq_df*pneut_df*incidence_df)\n immune_df.append(double_sum)\n return(immune_df) \n
\nAll the scripts are present in \"/Users/vishnushirishyamsaisundar/Documents/Master_Thesis_work/Work/Data_Analysis/Expected_immunity_computation\"
The next question would be how to incorporate multiple infections.
\nAt what time stamps should another infection be introduced
\ndssp 6xlu.pdb 6xlu.dssp\n
\n\n\nNote:\nA pariwise sequence alignment was done with the uniprot spike sequence and the sequence obtained from the 6xlu structure. The sequences align fairly well.
\n
Work/Data_Analysis/chosen_ten_country_submission.csv
do not match to the number of downloaded entries. month_numbers<-list()\nfor(i in names(ten_country_mut_data)){\n month_numbers[[i]]<-data.frame(ten_country_mut_data[[i]] %>% \\\n group_by(format(Collected_date,\"%Y-%m\")) %>% count())\n colnames(month_numbers[[i]])<-c(\"Month\",\"number\")\n}\n
\nThe result of the above code was manually compared with the numbers in the file. Months where the deviation occured were noted.
\nGISAID was again queried for number of entries for one of the months for which the values deviated in South_korea data (OCT23).
\nThe result of the query neighter matched with the number in the file or with the number got from the snippet. It was higher than both.
\nThis it when I realised that entries from older months are uploaded to GISAID anytime. This can be seen through the submission date column in GISAID - Entries with old collection date and recent submission date.
\nSo the difference between the numbers in the file and the numbers from downloaded entries could be reasoned out to be the data submitted during the time gaap between compiling the csv file and downloading the data (nearly 1 week).
\nIs it possible to predict the positions that are vulnerable for forth coming mutations.
\nWork/Data_Analysis/Monthwise_data_submission.Rmd
\nWork/Data_Analysis/country_monthly_submission.csv
Work/Data_Analysis/chosen_ten_country_submission.csv
Work/Data_Analysis/plots/ten_Country_data_trend_plots.pdf
\nWork/Data_Analysis/ten_country_crude_data_seq
and the essential column files are in the folder Work/Data_Analysis/ten_country_ess_data
Work/Data_Analysis/Modified_GISAIDRdownload.R
.\n\n\nThis can be done by
\nfix(Download)
and modifying the script as we need
Work/Data_Analysis/ten_country_mut_data
\n\nThere were some inconsistencies in the Date entries (only year no month or year and month an no date) such entries were removed.\nOn the course of doing this it was identified that the number of entries that was downloaded for each countries do not exactly match the numbers in
\nWork/Data_Analysis/chosen_ten_country_submission.csv
excluding few countries.\nAfter removing the inconsistent date entries the numbers(downloaded and number in file) for India match.\nThe inconsistencies are reported in the fileWork/Data_Analysis/inconsistencies_in_data.numbers
.\nThe number of downloaded entries are more than then numbers in file.\nAfter looking into it, it was observed that there were few more entries added to these countries in the time gap(around a week) between the compilingWork/Data_Analysis/chosen_ten_country_submission.csv
and downloading data. checking inconsistenciesSummary
\nSummary of the downloaded data
\n
Country | Total number of entries downloaded (Jan22-Oct23) | Entries after removing the date inconsistencies | Number of unassigned | Percentage of unassigned |
---|---|---|---|---|
Asia / India | 18106 | 15783 | 0 | 0 % |
Asia / South Korea | 13808 | 13808 | 0 | 0 % |
Europe / Denmark | 163900 | 163900 | 7 | 0,004 % |
Europe / Germany | 66374 | 66236 | 0 | 0 % |
Europe / Norway | 12264 | 12264 | 0 | 0 % |
Europe / Spain | 17991 | 17921 | 0 | 0 % |
Europe / United Kingdom | 32835 | 30597 | 74 | 0,242 % |
North America / Canada | 20791 | 20791 | 0 | 0 % |
North America / USA | 75487 | 74971 | 5 | 0,007 % |
Oceania / Australia | 13267 | 13257 | 5 | 0,038 % |
Mapping of lineages to their parental lineage is done based on the Pangolin lineage naming string and the spike mutations:
\nWork/Data_Analysis/ten_country_mut_data
the columns corresponding to the mutations and pango lineage were extracted and stored in a different file Work/Data_Analysis/ten_country_mut_data/ten_country_lineage_mut.csv
.\nThis is done using awk commandawk -F\";\" '{gsub(/[()]/,\"\",$6);print $5\";\"$6}' \nten_country_mut_data/* \\\n>ten_country_mut_data/ten_country_lineage_mut.csv\n
\n\n\ngsub is used to remove the brackets in the AA_mutations column.
\n
\nColumns of the CSV files are separated by a semicolon ';'
All the following steps are coded in the Rmd file Work/Data_Analysis/Lineage_mapping.Rmd
All the mutations pertaining to each of the lineage were combined.
\nFrom this only the spike mutations were considered for the downstream work.
\nJaccard index (intersections of sets/Union of sets). If the Jaccard value calculated is less than 0.5 then the lineages being compared are either considered as parental or neighbour depending on their pangolin string. Question on the treshold
\nFor Pangolin lineages that are considered as VOI/VUM/VOC, the mapping is directly given in GISAID. This can be found in the file Work/Data_Analysis/GISAID_VOI_VOC_VOM_list.txt
.
This list was obtained by first downloading the Clade/Lineage,variants(tsv) from the GISAID Downloads prompt.
\nThis tsv file has multiple columns. Interesting columns were type and Value. Using the following bash commands the list of GISAID_VOI_VOC_VOMlist.txt was compiled
\nawk -F\\t '{if($3==\"Variant\") print $4}' \\\ngisaid_variants_statistics_from_gisaid.tsv | \\\nsort -u >GISAID_VOI_VOC_VOM_list.txt\n
\nThe resulting text has long lines like VOI GRA (EG.5+EG.5.*) first detected in Indonesia/France\nfrom this only the variant and sublineage names EG.5+EG.5.* are retained while other texts including the brackets are removed(manually).
\nLines are rearranged in such a way that parental variant comes after the subvariant, example: XBB+XBB.* would come only after XBB.1.16+XBB.1.16.*. This rearrangement was done manually.\n
\nIn total there are 1628 unique lingeages which includes variants also. Running the mapping algorithm on this gives 715 lineages mapped to 16 variants and 913 lineages getting mapped to 531 lineages which includes Unknown.
\n\n\nIf muation list is obtained using the outbreakinfo function getMutationsByLineage then the results vary for the non-variant lineages. 912(without unknown) lineages gets mapped to 676 lineages. This is present in assigning_the_unassigned.RMD
\n
Work/Data_Analysis/plots/country_wise_frequency_trend.pdf
without CI and Work/Data_Analysis/plots/country_wise_frequency_trend_CI.pdf
with CI.Work/Data_Analysis/plots/Variant_wise_trend_all_countries.pdf
without CI and Work/Data_Analysis/plots/Variant_wise_trend_all_countries_CI.pdf
with CIWork/Data_Analysis/plot_Analysis_report.numbers
, and the summary of the analysis is in Analysis of the varient trend plots file.Work/Data_Analysis/plots/Lineages_no_var_freq_countrywise_CI
with CI and in Work/Data_Analysis/plots/Lineages_no_var_freq_countrywise
without CI.unassigned_sequences.fasta
was uploaded in the https://pangolin.cog-uk.io to get the assignment.\n\nThe https://pangolin.cog-uk.io was not able to analyse the 5 sequences from australia. No reasons was provided.
\n
Work/Data_Analysis/assigning_the_unassigned.Rmd
The big goal is to find the positions under pressure. To obtain this, firstly the frequency of mutation on each position of Spike RBD and NTD Epitopes are calculated. (RBD spike mutations in position 330-530, NTD(14-20,140-158,245-264) mutations)(Question on the position) For this the aa_substitution data obtained from GISAID was used.\n
\nSTEPS:
\n\n\nFrequency of pos_373 on 01-01-2022 = Number of sequences on 01−01−2022count of mutations in pos_373 on 01−01−2022
\n
With the formula (given by Prof.Max) the pressure on each position from t0 to t was computed\nP(pos,t)=∑s=t0texp−k[t−s]×f(pos,s)
\n\n\nThe categorising treshold is set as 25% similar to Netsurf3.0. This would need advice.
\n
The number of surface residues flagged by the 3 tools vary.
\nResidues flagged by each of the tools are fished from the ten country's position under pressure dataframe. The numbers are tabulated .
\nThe positions being picked by each of the tools are compared with the positions given by Prof.Max(screenshot). The tabulation is available in Work/Data_Analysis/Positions_being_picked.numbers
Like previous variant trend visualization, the pressure trend was also visualised using geom_line in two ways, positionwise and countrywise. These plots are present in Work/Data_Analysis/pressure_plots
. There are 6 plots - 3 countrywise, 3 positionsise. The 3 denotes the 3 masking tool outputs that were used to generate the plots.
The amino acid substitution on a position is not constant. Example: on position 14 mutations found in india include Spike_Q14H,Spike_Q14del,Spike_Q14R. So calculating the number of such prevailing mutations on a position is the entropy. This was calculated to weigh down the pressure on a postiion .... Not processed further yet
\nFor knowing how impactful would a mutation on a site be, the probaility of neutralisation is being calculated. The Probability of neutralisation of a variant y by the antibodies elicited by variant x at time t.
\nTo compute this probability the binding probability bv(t,x,y) of an antibody of a particular epitope class with the variant is needed.
\nWork/Data_Analysis/pressure_plots/pos_pressure_trend_avg_wtpressure.pdf
, Work/Data_Analysis/pressure_plots/pressure_trend_avg_wtpressure.pdf
Work/Data_Analysis/Big_goal.Rmd
, Work/Data_Analysis/neutralisation_probability.Rmd
has all the scripts regarding the big goal.\n\nAnother method: binding probability for all the time steps for all the antibodies were computed first and then averaging these binding probability of antibodies across an antibody class was done and used to find the probability of neutralisation. The weight values did not differ drasitically to be precise there are only 116 values that differ. Just sticking to the earlier method.\nDefinition Reference
\n
Overall trend across all the countries:
\n\n\nThe plots Pictures represents the overall trend for the grouped positions
\n
Grouping positions by similar pressure trends
\nThe similar trend positions were visualised to see if, the are very near to experience same trend of pressure.
\n373,142,501,19,440,505,478,371,417,376,339,408,375,405,498,477, 484 \n
\n\n\nposition 19 is missing in the structure
\n
496,143,145 \n
\n\n\nposition 145 is missing in the structure
\n
493 the trend is like the previous but with an offset in the peak \n
\n\n\nThis position is 8.1A˚ away from pos 496
\n
446,346 \n
\n\n\npos 446 is missing in the structure
\n
157,452 \n
\n158,156 \n
\n144 similar to the previous but many countries have trends of high pressure at the end of the observation timeline
\n\n\nposition 144 not found in the structure
\n
245,153,450,18 \n
\n\n\npos 18,153 are absent in the struct
\n
146,445,368,252,490,460,486 \n
\n\n\n146, 252, 445 are absent in the structure
\n
152,147,257,356,444 \n
\n\n\n152, 147, 257 are absent in the structure
\n
253,521 \n
\n\n\npos 253 is absent in the structure
\n
455,456 \n
\nVisually the positions that have similar pressure trends are not always in proximity on the folded protein. Since this is a static structure, there might be dynamics which can bring these positions together but this is just an hypothesis.
\n256 high pressure is observed in only australia at the end of the observation period
\n344,494 Germany has higher pressure than others
\n248 India and Norway records high pressure comparing to others
\nIf the outlier is obtained based on the past,current and future data then it is called the estimation method. If the outlier is obtained based on only past data then the method is prediction method.
\nSome methods:
\nDescriptive statistics :
\nStatistical tests : These tests requires the data is normally distributed. This can be checked by either visualsing the data using a histogram or using shapiro-Wilk normality test - shapiro.text().
\nThis is also part of descriptive statistics. Considers values outside the interval I=[median−3∗MAD,median+3∗MAD] as outliers. To understand MAD(median absolute deviation)
\nIn general a 95% confidence interval means there is 95% probability that the confidence interval contains the mean.2 To understand what is CI of a sample proportion, the term population proportion is defined first.
\n\n\nA population proportion is the proportion of individuals in a population sharing a certain trait, denoted as p. The sample proportion is the proportion of individuals in a sample sharing a certain trait, denoted ˆp.3
\n
Just like the estimating the CI of mean the CI of proportion is estimated by adding and subtracting margin of error from ^p to get the limits of CI.
\nWhere z is the z-score for 95% confidence level.4\nFor multinomial sample prortions the confidence intervals are often approximated by single binomial confidence interval, I assume the trait of iterest is considered as p^ while others become (1−p^). There are also methods to to calculate confidence interval simultaneously. One such method sisonglaz was used in the work through function MultinomCI from DescTools package.
\nFinding a new datapoint based on the preexisting data point is called the interpolation. Common methods of interpolation includes linear,polynomial, spline interpolation. Linear interpolation fits a stright line between known points and uses the slope off the line to interpolate the missing data points. In both polynomial and spline interpolation polynomials are used to do the interpolation. The difference is that spline fits multiple piecewise polynomials to the subset of data to do the interpolation, on the other hand polynomial interpolation fits one polynomial to the entire data to do the interpolation.5
\nFunction mapping_lineages (lineage_cmut,alias_df)\n\n Function chunk_lineage(lineage_cmut,alias_df)\n \n 1.Chunks lineages and sublineages by the first character of Pangolin string. \n One element of the input data frame is considered at an instance and all the \n members of the clade to which it belongs are chunked and passed to the downstream \n processing.\n\n 2. If there are no lineages having the same first \n character but there are entries in the input df, the lineage is \n mapped to it's own and is stored in alias_df.\n\n 3. else If the there are no more entries in the \n lineage_cmut then the lineage is mapped to itself and return is called.\n\n 4. else the chunk stored in temp_df is passed to\n long_sublineage\n\n End\n\n Function long_sublineage(temp_df,lineage_cmut,\n alias_df,alias_df_temp)\n\n 1. Finds the sublineage with longest character string\n and stores it in longlineage_df\n\n 2. If multiple lineages have long character string\n both the lineages are stored in longlineage_df\n\n 3. If linegaes length is just one then it is the\n parental lineage and it is mapped to it's own and is\n removed from temp_df amd is stored in alias_df\n\n END\n\n Function match_merge(longlineage_df,temp_df\n lineage_cmut,alias_df,alias_df_temp)\n\n 1. Iterates through the longlineage_df, forms pattern\n from the first element taken and tries to find\n neighbours in longlineage_df based on jaccard value\n using function find_jaccard\n\n 1. If neighbours are found their mutations are\n combined (union).\n\n 2. Checks if these neighbors are paretnal lineage to\n some other lineage in the alias_temp_df\n\n 3. Checking if there is a parental lineage to the\n neighbours in the temp_df\n\n 4. If parental lineage is found and if the length of the parental \n lineage string is more than one, the mutations of the\n neighbours and the parernal lineages are again\n combined (union) and stored in the place of \n mutations of the parental lineage in temp_df. Neighbours are \n mapped to their found parent and are stored in alias_df_temp, \n since there is potential for surther mapping. This parental \n lineage also becomes the parental lineage for the \n sublineages that had these neighbours as parental \n lineage in alias_df_temp.These neighbors are removed from \n longlineage_df and the loop is iterated for the next round.\n \n 5. Else if the length of the parental lineage is \n equal to 1 then everything in the previous point \n that was written in the alias_df_temp is written to\n alias_df. Mutations are not meddled with, since it is \n the ultimate paretnal lineage and there is no go further.\n\n 6. If no parental lineage was found then the the\n Neighbours are mapped to the pattern which is the\n name of the neigbors without the last character. This\n pattern concatinated with x becomes the parental \n lineage of the neighbors. This also becomes the \n parental lineage for those sublineages for which the \n nighbors were parental lineage.\n\n 2. If there are no neighbors found\n\n 1. Code directly starts finding the parental \n lineage for the element being considered.\n\n 2. If paretnal lineage is found and the length of \n the lineage is more than 1, the element in hand \n is mapped to the found paretnal lineage and jaccard \n value is stored in the alias_df_temp. Mutations of \n the element and the found parental lineage is combined and \n stored in the place of the parental mutations in temp_df\n\n 3. Sublineages for which the lineage in hand is the parental\n lineage in alias_df_temp gets mapped to the newly found parental lineage.\n\n 4. If the length of the parental lineage being \n found is equal to one then point 2,3 is repeated \n but difference would be that instaed of \n alias_temp_df, alias_df is used and mutations are not meddeled with.\n\n 3. If no parerntal and neighbors were found\n\n 1. The lineage being considered is mapped to \n itself.\n\n 2. For sublineages in the alias_df_temp that has \n the lineage being considered as parental lineage is \n remains the same. It is just transfered to \n alias_df with no changes.\n\n \n Once the longlineage_df has been fully processed if \n there are entries in temp_df long_sublineage is called else chunk lineage is called.\n \n\n\n\n END\n\nEnd\n
\nFunction find_jaccard(pat,search_df,pat_mutations=0)\n\n search_lineage_loc<-grep(pat,search_df$lineage)\n\n 1. If pat_mutations==0 means the function is finding the\n neighbours. Else the function is overloaded\n to find the parental lineage.\n\n 2. If pat_mutations==0 the length(search_lineage_loc) should be\n more than 1 - neighbours other than the lineage in hand.\n\n 3. If no neighbours were found then function returns \n neighbours=\"0\", jaccard_value=-1,neighbour_loc=0\n\n 4. Other than point 2 the overloaded function does the\n same functionality for both the overloaded purposes\n and returns jaccard value, neighbours, neighbour_loc \nEnd\n
\nFunction Find_parental(pat,parental_df)\n\n 1. Recursively searches with the pattern \n until it finds the parental lineage satisfying \n the conditions \n 2. The pattern is shortened every iteration.\nEnd\n
\n","noteIndex":{"id":"paa0s59lp320n6q8rghycjw","title":"Work on SARS Cov2","desc":"","updated":1700240746536,"created":1700233379150,"custom":{"nav_order":0,"permalink":"/"},"fname":"root","type":"note","vault":{"fsPath":".","selfContained":true,"name":"Literature"},"contentHash":"6124b7f80065618c049c13a49f0dbed1","links":[],"anchors":{},"children":["1hnn8maoednnxdw5feqd4kq","nve3ld8zkeg6b2hgygrh1rp","v4t96j5kch5mq12272qsyih","r423m96u71ix4pb458fk8u2"],"parent":null,"data":{},"body":"\n"},"collectionChildren":null,"customHeadContent":null,"config":{"version":5,"dev":{"enablePreviewV2":true,"enableSelfContainedVaults":true},"commands":{"lookup":{"note":{"selectionMode":"extract","confirmVaultOnCreate":true,"vaultSelectionModeOnCreate":"smart","leaveTrace":false,"bubbleUpCreateNew":true,"fuzzThreshold":0.2}},"randomNote":{},"insertNoteLink":{"aliasMode":"none","enableMultiSelect":false},"insertNoteIndex":{"enableMarker":false},"copyNoteLink":{"aliasMode":"title"},"templateHierarchy":"template"},"workspace":{"vaults":[{"fsPath":".","selfContained":true,"name":"Literature"}],"journal":{"dailyDomain":"daily","name":"journal","dateFormat":"y.MM.dd","addBehavior":"childOfDomain"},"scratch":{"name":"scratch","dateFormat":"y.MM.dd.HHmmss","addBehavior":"asOwnDomain"},"task":{"name":"task","dateFormat":"y.MM.dd","addBehavior":"asOwnDomain","statusSymbols":{"":" ","wip":"w","done":"x","assigned":"a","moved":"m","blocked":"b","delegated":"l","dropped":"d","pending":"y"},"taskCompleteStatus":["done","x"],"prioritySymbols":{"H":"high","M":"medium","L":"low"},"todoIntegration":false,"createTaskSelectionType":"selection2link"},"graph":{"zoomSpeed":1,"createStub":false},"enableAutoCreateOnDefinition":false,"enableXVaultWikiLink":false,"enableRemoteVaultInit":true,"enableUserTags":true,"enableHashTags":true,"workspaceVaultSyncMode":"noCommit","enableAutoFoldFrontmatter":false,"enableEditorDecorations":true,"maxPreviewsCached":10,"maxNoteLength":204800,"enableFullHierarchyNoteTitle":false},"preview":{"enableFMTitle":true,"enableNoteTitleForLink":true,"enableFrontmatterTags":true,"enableHashesForFMTags":false,"enablePrettyRefs":true,"enableKatex":true,"automaticallyShowPreview":false},"publishing":{"enableFMTitle":true,"enableNoteTitleForLink":true,"enablePrettyRefs":true,"enableKatex":true,"copyAssets":true,"siteHierarchies":["root"],"writeStubs":false,"siteRootDir":"docs","seo":{"title":"Dendron","description":"Personal Knowledge Space"},"github":{"enableEditLink":true,"editLinkText":"Edit this page on GitHub","editBranch":"main","editViewMode":"tree"},"enableSiteLastModified":true,"enableFrontmatterTags":true,"enableHashesForFMTags":false,"enableRandomlyColoredTags":true,"enableTaskNotes":true,"enablePrettyLinks":true,"searchMode":"search","siteUrl":"https://vishnushiri02.github.io","assetsPrefix":"/SARS-Cov2","siteFaviconPath":"favicon.ico","siteIndex":"root"}}},"__N_SSG":true}
\ No newline at end of file
diff --git a/_next/data/FciE7KLU5cJc2fcCtWpj3/notes/xhw6w5ghbjhkzbo7huxzhwg.json b/_next/data/FciE7KLU5cJc2fcCtWpj3/notes/xhw6w5ghbjhkzbo7huxzhwg.json
new file mode 100644
index 0000000..cc5be89
--- /dev/null
+++ b/_next/data/FciE7KLU5cJc2fcCtWpj3/notes/xhw6w5ghbjhkzbo7huxzhwg.json
@@ -0,0 +1 @@
+{"pageProps":{"note":{"id":"xhw6w5ghbjhkzbo7huxzhwg","title":"Possible_questions","desc":"This note is regarding all the question that are to be rised to understand the work","updated":1706780755936,"created":1701937898390,"custom":{},"fname":"Work_documented.possible_questions","type":"note","vault":{"fsPath":".","selfContained":true,"name":"Literature"},"contentHash":"899a417cbfd1e36499e6f811d5dcc5be","links":[{"from":{"fname":"Work_documented","id":"r423m96u71ix4pb458fk8u2","vaultName":"Literature"},"type":"backlink","position":{"start":{"line":12,"column":3,"offset":464},"end":{"line":12,"column":165,"offset":626},"indent":[]},"value":"Work_documented.possible_questions"},{"from":{"fname":"Work_documented","id":"r423m96u71ix4pb458fk8u2","vaultName":"Literature"},"type":"backlink","position":{"start":{"line":72,"column":224,"offset":7764},"end":{"line":72,"column":376,"offset":7916},"indent":[]},"value":"Work_documented.possible_questions"},{"from":{"fname":"Work_documented","id":"r423m96u71ix4pb458fk8u2","vaultName":"Literature"},"type":"backlink","position":{"start":{"line":122,"column":249,"offset":14231},"end":{"line":122,"column":374,"offset":14356},"indent":[]},"value":"Work_documented.possible_questions"},{"from":{"fname":"Work_documented","id":"r423m96u71ix4pb458fk8u2","vaultName":"Literature"},"type":"backlink","position":{"start":{"line":137,"column":1,"offset":15789},"end":{"line":137,"column":130,"offset":15918},"indent":[]},"value":"Work_documented.possible_questions"},{"from":{"fname":"Work_documented","id":"r423m96u71ix4pb458fk8u2","vaultName":"Literature"},"type":"backlink","position":{"start":{"line":161,"column":115,"offset":17682},"end":{"line":161,"column":454,"offset":18021},"indent":[]},"value":"Work_documented.possible_questions"},{"from":{"fname":"Work_documented.Mapping_lineages","id":"vhn93xdheb909udqychwrcl","vaultName":"Literature"},"type":"backlink","position":{"start":{"line":154,"column":1,"offset":5694},"end":{"line":154,"column":153,"offset":5846},"indent":[]},"value":"Work_documented.possible_questions"}],"anchors":{"1-whyhow-was-this-particular-time-period-jan22-oct23-chosen-while-pandemic-has-been-prevelant-2020-onwards":{"type":"header","text":"1. Why/How was this particular time period (Jan22-Oct23) chosen, while pandemic has been prevelant 2020 onwards?","value":"1-whyhow-was-this-particular-time-period-jan22-oct23-chosen-while-pandemic-has-been-prevelant-2020-onwards","line":10,"column":0,"depth":3},"2-there-are-continuous-deposition-of-sequences-to-gisaid-even-for-the-earlier-dates-the-following-tables-shows-the-difference-in-numbers-will-calculating-ci-account-these-changes-":{"type":"header","text":"2. There are continuous deposition of sequences to GISAID even for the earlier dates, the following tables shows the difference in numbers. Will calculating CI account these changes ?","value":"2-there-are-continuous-deposition-of-sequences-to-gisaid-even-for-the-earlier-dates-the-following-tables-shows-the-difference-in-numbers-will-calculating-ci-account-these-changes-","line":14,"column":0,"depth":3},"3-the-jaccard-index-treshold-chosen-to-decide-a-parent-or-a-neighbour-is-50-is-this-okay":{"type":"header","text":"3. The jaccard index treshold chosen to decide a parent or a neighbour is 50%. Is this okay?","value":"3-the-jaccard-index-treshold-chosen-to-decide-a-parent-or-a-neighbour-is-50-is-this-okay","line":31,"column":0,"depth":3},"4-why-do-we-do-linear-interpolation-why-not-spline-interpolation":{"type":"header","text":"4. Why do we do linear interpolation, why not spline interpolation?","value":"4-why-do-we-do-linear-interpolation-why-not-spline-interpolation","line":35,"column":0,"depth":3},"5-aaccording-to-uniprot-the-rbd-region-in-spike--is-319-541aa":{"type":"header","text":"5. Aaccording to Uniprot the RBD region in spike is 319-541aa","value":"5-aaccording-to-uniprot-the-rbd-region-in-spike--is-319-541aa","line":39,"column":0,"depth":3},"6-if-a-rbd-spike-position-in-the-wildtype-is-occupied-by-a-hydrophobic-residue-and-it-is-replaced-by-hydrophilic-residue-the-solvent-accessibility-might-change-probably-due-to-the-difference-in-the-fold--in-that-case-should-we-study-these-positions-in-each-of-the-voi":{"type":"header","text":"6. If a RBD spike position in the wildtype is occupied by a hydrophobic residue and it is replaced by hydrophilic residue, the solvent accessibility might change probably due to the difference in the fold. In that case should we study these positions in each of the VOI?","value":"6-if-a-rbd-spike-position-in-the-wildtype-is-occupied-by-a-hydrophobic-residue-and-it-is-replaced-by-hydrophilic-residue-the-solvent-accessibility-might-change-probably-due-to-the-difference-in-the-fold--in-that-case-should-we-study-these-positions-in-each-of-the-voi","line":43,"column":0,"depth":3},"7--the-computed-pressure-trend-for-a-position-is-obtained-by-calculating-pressure-at-multiple-time-points-for-now-pressure-is-computed-every-180-days-the-time-horizon-is-not-exactly-divisible-into-bins-of-180-days-there-will-be-difference-in-the-final-bin-will-this-be-a-problem":{"type":"header","text":"7. The computed pressure trend for a position is obtained by calculating pressure at multiple time points. For now pressure is computed every 180 days. The time horizon is not exactly divisible into bins of 180 days. There will be difference in the final bin. Will this be a problem?","value":"7--the-computed-pressure-trend-for-a-position-is-obtained-by-calculating-pressure-at-multiple-time-points-for-now-pressure-is-computed-every-180-days-the-time-horizon-is-not-exactly-divisible-into-bins-of-180-days-there-will-be-difference-in-the-final-bin-will-this-be-a-problem","line":47,"column":0,"depth":3}},"children":[],"parent":"r423m96u71ix4pb458fk8u2","data":{}},"body":"Country | Number of downloaded(18-20 Nov 23) | Number in GISAID at later date(8 Dec 2023) |
---|---|---|
India | 15783 | 15809 |
South_korea | 13808 | 13826 |
Denmark | 163900 | 163905 |
Germany | 66236 | 66244 |
Norway | 12264 | 12264 |
Spain | 17921 | 18039 |
UK | 30597 | 30606 |
Canada | 20791 | 20802 |
USA | 74971 | 75165 |
australia | 13257 | 13261 |
-ANS: In a sparse data using a spline interpolation might give unexpected results which might not be right.
\nANS: There are no interesting epitopes before position 330 and after position 530 so it's fine.
\nANS: The Mutation doesnt alter the fold to a great extent, if that happens it will affect the function of the protein, so using the wildtype to compute the solvent accessibility is not so bad idea.
\n{let{isMatch:h,score:m,indices:v}=B(e,t,r,{location:i+p,distance:o,threshold:a,findAllMatches:s,minMatchCharLength:u,includeMatches:n,ignoreLocation:c});h&&(d=!0),f+=m,h&&v&&(l=[...l,...v])});let p={isMatch:d,score:d?f/this.chunks.length:1};return d&&n&&(p.indices=l),p}}class V{constructor(e){this.pattern=e}static isMultiMatch(e){return z(e,this.multiRegex)}static isSingleMatch(e){return z(e,this.singleRegex)}search(){}}function z(e,t){let n=e.match(t);return n?n[1]:null}class H extends V{constructor(e){super(e)}static get type(){return"exact"}static get multiRegex(){return/^="(.*)"$/}static get singleRegex(){return/^=(.*)$/}search(e){let t=e===this.pattern;return{isMatch:t,score:t?0:1,indices:[0,this.pattern.length-1]}}}class W extends V{constructor(e){super(e)}static get type(){return"inverse-exact"}static get multiRegex(){return/^!"(.*)"$/}static get singleRegex(){return/^!(.*)$/}search(e){let t=e.indexOf(this.pattern),n=-1===t;return{isMatch:n,score:n?0:1,indices:[0,e.length-1]}}}class $ extends V{constructor(e){super(e)}static get type(){return"prefix-exact"}static get multiRegex(){return/^\^"(.*)"$/}static get singleRegex(){return/^\^(.*)$/}search(e){let t=e.startsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[0,this.pattern.length-1]}}}class G extends V{constructor(e){super(e)}static get type(){return"inverse-prefix-exact"}static get multiRegex(){return/^!\^"(.*)"$/}static get singleRegex(){return/^!\^(.*)$/}search(e){let t=!e.startsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[0,e.length-1]}}}class q extends V{constructor(e){super(e)}static get type(){return"suffix-exact"}static get multiRegex(){return/^"(.*)"\$$/}static get singleRegex(){return/^(.*)\$$/}search(e){let t=e.endsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[e.length-this.pattern.length,e.length-1]}}}class K extends V{constructor(e){super(e)}static get type(){return"inverse-suffix-exact"}static get multiRegex(){return/^!"(.*)"\$$/}static get singleRegex(){return/^!(.*)\$$/}search(e){let t=!e.endsWith(this.pattern);return{isMatch:t,score:t?0:1,indices:[0,e.length-1]}}}class Y extends V{constructor(e,{location:t=T.location,threshold:n=T.threshold,distance:r=T.distance,includeMatches:i=T.includeMatches,findAllMatches:o=T.findAllMatches,minMatchCharLength:a=T.minMatchCharLength,isCaseSensitive:s=T.isCaseSensitive,ignoreLocation:u=T.ignoreLocation}={}){super(e),this._bitapSearch=new Z(e,{location:t,threshold:n,distance:r,includeMatches:i,findAllMatches:o,minMatchCharLength:a,isCaseSensitive:s,ignoreLocation:u})}static get type(){return"fuzzy"}static get multiRegex(){return/^"(.*)"$/}static get singleRegex(){return/^(.*)$/}search(e){return this._bitapSearch.searchIn(e)}}class X extends V{constructor(e){super(e)}static get type(){return"include"}static get multiRegex(){return/^'"(.*)"$/}static get singleRegex(){return/^'(.*)$/}search(e){let t=0,n,r=[],i=this.pattern.length;for(;(n=e.indexOf(this.pattern,t))>-1;)t=n+i,r.push([n,t-1]);let o=!!r.length;return{isMatch:o,score:o?0:1,indices:r}}}let J=[H,X,$,G,K,q,W,Y],Q=J.length,ee=/ +(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/,et="|";function en(e,t={}){return e.split(et).map(e=>{let n=e.trim().split(ee).filter(e=>e&&!!e.trim()),r=[];for(let i=0,o=n.length;i>1,l=-7,f=n?i-1:0,d=n?-1:1,p=e[t+f];for(f+=d,o=p&(1<<-l)-1,p>>=-l,l+=s;l>0;o=256*o+e[t+f],f+=d,l-=8);for(a=o&(1<<-l)-1,o>>=-l,l+=r;l>0;a=256*a+e[t+f],f+=d,l-=8);if(0===o)o=1-c;else{if(o===u)return a?NaN:(p?-1:1)*(1/0);a+=Math.pow(2,r),o-=c}return(p?-1:1)*a*Math.pow(2,o-r)},t.write=function(e,t,n,r,i,o){var a,s,u,c=8*o-i-1,l=(1<