Skip to content

Commit

Permalink
add unique stringdb<->uniprot ids loader
Browse files Browse the repository at this point in the history
  • Loading branch information
mbsimonovic committed May 16, 2014
1 parent e2e0b98 commit c426cb9
Show file tree
Hide file tree
Showing 6 changed files with 11,267 additions and 11,239 deletions.
7 changes: 7 additions & 0 deletions src/main/java/org/string_db/ProteinRepository.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,11 @@ public interface ProteinRepository {
*/
Map<Integer, String> loadProteinSequences(Integer speciesId);

/**
* load unique (1:1) mapping of protein_ids-UniProt_ids for this species
*
* @param speciesId
* @return {protein_id -> UniprotAC} map
*/
Map<Integer, UniprotAC> loadUniqueUniProtIds(Integer speciesId);
}
2 changes: 1 addition & 1 deletion src/main/java/org/string_db/UniprotAC.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public static void isValidUniprotAC(String ac) {
throw new IllegalArgumentException("null AC");
}
if (ac.length() != 6) {
throw new IllegalArgumentException("AC must be exactly 6 chars long, not: " + ac.length());
throw new IllegalArgumentException("AC must be exactly 6 chars long, not: " + ac.length() + ": " + ac);
}
if (STARTS_WITH_OPQ_FORMAT.matcher(ac).matches() || OTHER_FORMAT.matcher(ac).matches()) {
return;
Expand Down
12 changes: 5 additions & 7 deletions src/main/java/org/string_db/jdbc/Postgres2HSQLDB.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,6 @@
*/
public class Postgres2HSQLDB {
final JdbcTemplate jdbcTemplate;

public Postgres2HSQLDB(JdbcTemplate jdbcTemplate) {
this.jdbcTemplate = jdbcTemplate;
}

private final String selectIndexDef =
"SELECT pg_get_indexdef(i.indexrelid) \n" +
"FROM pg_class c, pg_namespace n, pg_index i, pg_class t\n" +
Expand All @@ -58,6 +53,9 @@ public Postgres2HSQLDB(JdbcTemplate jdbcTemplate) {
" AND n.nspname = ?\n" +
" AND t.relname = ?";

public Postgres2HSQLDB(JdbcTemplate jdbcTemplate) {
this.jdbcTemplate = jdbcTemplate;
}

public static void main(final String[] args) throws FileNotFoundException, UnsupportedEncodingException, SQLException {
final ApplicationContext ctx = new AnnotationConfigApplicationContext(DbConfig.class, DriverDataSourceConfig.class);
Expand All @@ -68,9 +66,9 @@ public static void main(final String[] args) throws FileNotFoundException, Unsup
// skip checksum (not used) and annotation_word_vectors columns (postgresql specific data type - tsvector)
converter.dumpTable("items", "proteins", "species_id in (511145)", "hsql-data.sql",
new String[]{"protein_id", "protein_external_id", "species_id", "annotation", "preferred_name"});
//m.pneumoniae has fewest names:
// m.pneumoniae has fewest names:
converter.dumpTable("items", "proteins_names", "species_id in (272634)", "hsql-data.sql",
new String[]{"protein_id", "protein_name", "species_id", /*escape keyword*/"\"source\""});
new String[]{"protein_id", "protein_name", "species_id", /*escape keyword*/"\"source\"", "linkout"});
}

private void dumpTable(String schema, String table, String filter, String fileName, String[] columnsToInclude) {
Expand Down
27 changes: 22 additions & 5 deletions src/main/java/org/string_db/jdbc/ProteinRepositoryJdbc.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@

package org.string_db.jdbc;

import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
import org.springframework.stereotype.Component;
import org.string_db.ProteinExternalId;
import org.string_db.ProteinRepository;
import org.string_db.UniprotAC;

import java.util.Map;
import java.util.Set;
Expand All @@ -33,16 +35,25 @@
@Component
public class ProteinRepositoryJdbc implements ProteinRepository {

@Autowired
GenericQueryProcessor queryProcessor;

private static final Logger log = Logger.getLogger(ProteinRepositoryJdbc.class);
protected final TwoColumnRowMapper<Integer, String, Set<String>> multiValSqlRowMapper = TwoColumnRowMapper.multiValMapper();
protected TwoColumnRowMapper<Integer, String, ProteinExternalId> idExternalIdMapper = new TwoColumnRowMapper<Integer, String, ProteinExternalId>() {
@Override
public void addToMap(Integer firstColumn, String secondColumn, Map<Integer, ProteinExternalId> map) {
map.put(firstColumn, new ProteinExternalId(secondColumn));
public void addToMap(Integer protein_id, String protein_external_id, Map<Integer, ProteinExternalId> map) {
map.put(protein_id, new ProteinExternalId(protein_external_id));
}
};
protected TwoColumnRowMapper<Integer, String, UniprotAC> uniprotAcMapper = new TwoColumnRowMapper<Integer, String, UniprotAC>() {
@Override
public void addToMap(Integer proteinId, String linkout, Map<Integer, UniprotAC> map) {
if (map.containsKey(proteinId)) {
log.warn("duplicate uniprotAc for " + proteinId);
}
map.put(proteinId, new UniprotAC(linkout));
}
};
@Autowired
GenericQueryProcessor queryProcessor;

@Override
public Map<Integer, ProteinExternalId> loadExternalIds(Integer speciesId) {
Expand Down Expand Up @@ -84,6 +95,12 @@ public Map<Integer, String> loadProteinSequences(Integer speciesId) {
new MapSqlParameterSource("species_id", speciesId));
}

@Override
public Map<Integer, UniprotAC> loadUniqueUniProtIds(Integer speciesId) {
return queryProcessor.selectTwoColumns("protein_id", "protein_name", "items.proteins_names", uniprotAcMapper,
"linkout = 'UniProt' AND species_id = :species_id", new MapSqlParameterSource("species_id", speciesId));
}

}


Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.google.common.collect.ImmutableSet;
import org.junit.Test;
import org.string_db.ProteinExternalId;
import org.string_db.UniprotAC;

import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -73,4 +74,9 @@ public void test_protein_preferred_name() throws Exception {
assertEquals("thrA", names.get(4735233));
}

@Test
public void test_loadUniqueUniProtIds() throws Exception {
final Map<Integer, UniprotAC> ids = repo.loadUniqueUniProtIds(272634);
assertEquals(new UniprotAC("P11311"), ids.get(2815147));
}
}
Loading

0 comments on commit c426cb9

Please sign in to comment.