diff --git a/src/main/java/io/academic/service/AcademicSearchService.java b/src/main/java/io/academic/service/AcademicSearchService.java index 357de46..f63c023 100644 --- a/src/main/java/io/academic/service/AcademicSearchService.java +++ b/src/main/java/io/academic/service/AcademicSearchService.java @@ -6,6 +6,7 @@ import com.google.gson.JsonParser; import org.elasticsearch.action.search.*; import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.search.Scroll; import org.elasticsearch.search.SearchHit; @@ -16,6 +17,8 @@ import org.springframework.stereotype.Service; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import static org.elasticsearch.index.query.QueryBuilders.matchQuery; @@ -30,8 +33,11 @@ public AcademicSearchService() { public String search(String q) throws IOException { + ArrayList criterias = new ArrayList(); + criterias.add("dc"); + criterias.add("content"); SearchRequest searchRequest = new SearchRequest("harvester"); - searchRequest.source(buildSource("term","dc",q,false)); + searchRequest.source(buildSource("term",criterias,q,false)); //this values are necessary if we need scrollable results (in other words if our result have more than 10 hits) final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1)); @@ -47,8 +53,10 @@ public String search(String q) throws IOException { public String searchBy(String q, String criteria) throws IOException { + ArrayList criterias = new ArrayList(); + criterias.add(criteria); SearchRequest searchRequest = new SearchRequest("harvester"); - searchRequest.source(buildSource("match",criteria,q,false)); + searchRequest.source(buildSource("match",criterias,q,false)); //this values are necessary if we need scrollable results (in other words if our result have more than 10 hits) final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1)); @@ -66,8 +74,10 @@ public String searchBy(String q, String criteria) throws IOException { public String getAll() throws IOException { + ArrayList criterias = new ArrayList(); + criterias.add(""); SearchRequest searchRequest = new SearchRequest("harvester"); - searchRequest.source(buildSource("matchAll","","",true)); + searchRequest.source(buildSource("matchAll",criterias,"",true)); //this values are necessary if we need scrollable results (in other words if our result have more than 10 hits) final Scroll scroll = new Scroll(TimeValue.timeValueMinutes(1)); @@ -128,16 +138,16 @@ public String toJson(String nonJsonString){ - public SearchSourceBuilder buildSource(String queryType, String criteria, String q, Boolean showAllFields){ + public SearchSourceBuilder buildSource(String queryType, ArrayList criteria, String q, Boolean showAllFields){ SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); if (queryType.equals("match")) { - searchSourceBuilder.query(matchQuery(criteria,q)); + searchSourceBuilder.query(QueryBuilders.matchQuery(criteria.get(0),q)); } else if (queryType.equals("term")) { - searchSourceBuilder.query(QueryBuilders.termQuery(criteria,q)); + searchSourceBuilder.query(QueryBuilders.multiMatchQuery(q,criteria.toArray(new String[criteria.size()]))); } else { @@ -147,7 +157,8 @@ else if (queryType.equals("term")) searchSourceBuilder.sort(new FieldSortBuilder("title.keyword").order(SortOrder.DESC)); if (!showAllFields) { - String[] includeFields = new String[] {"title",criteria}; + criteria.add("title"); + String[] includeFields = criteria.toArray(new String[criteria.size()]); String[] excludeFields = new String[] {""}; searchSourceBuilder.fetchSource(includeFields,excludeFields); searchSourceBuilder.fetchSource(true); diff --git a/src/main/java/io/academic/service/OaiService.java b/src/main/java/io/academic/service/OaiService.java index e470daf..df7907a 100644 --- a/src/main/java/io/academic/service/OaiService.java +++ b/src/main/java/io/academic/service/OaiService.java @@ -90,18 +90,7 @@ public RestHighLevelClient getRestClient() { public static final String INDEX = "harvester"; private static final String TYPE = "oai"; -// private DocumentService documentService = null; -// private IndexService indexService = null; -// private SearchService searchService = null; -// private IndexRequest request; - -// @Autowired -// public OaiService(DocumentService documentService, IndexService indexService, SearchService searchService) { -// this.documentService = documentService; -// this.indexService = indexService; -//// indexService.createIndex(); -// this.searchService = searchService; -// } + @Autowired public OaiService() { @@ -109,48 +98,22 @@ public OaiService() } public void elasticSave(Article article) throws IOException { -// System.out.println("inside elasticsave"); - -// IndexRequest request = new IndexRequest(INDEX, TYPE).setEntity(article); -// System.out.println("before article get Article Identifier"); -// System.out.println(article.getArticleIdentifier()); -// if (article.getArticleIdentifier() != null) { -// request.setId(String.valueOf(article.getId())); -// System.out.println("inside article getid"); IndexRequest request = new IndexRequest(INDEX,TYPE); request.setPipeline("academic-pdf"); -// System.out.println(new Gson().toJson(article)); - request.source(new Gson().toJson(article), XContentType.JSON); + // before using this pipeline we have to add pipeline to the elasticsearch by following command +// PUT _ingest/pipeline/academic-pdf +// { +// "description": "parse pdfs and index into ES", +// "processors" : +// [ +// { "attachment" : { "field": "pdf" } }, +// { "remove" : { "field": "pdf" } } +// ] // } - - + request.source(new Gson().toJson(article), XContentType.JSON); IndexResponse indexResponse = restClient.index(request); -// String index = indexResponse.getIndex(); -// String type = indexResponse.getType(); -// String id = indexResponse.getId(); -// long version = indexResponse.getVersion(); -// System.out.println(index+" ,"+type+", "+id+", "+version); -// if (indexResponse.getResult() == DocWriteResponse.Result.CREATED) { -// -// } else if (indexResponse.getResult() == DocWriteResponse.Result.UPDATED) { -// -// } -// ReplicationResponse.ShardInfo shardInfo = indexResponse.getShardInfo(); -// if (shardInfo.getTotal() != shardInfo.getSuccessful()) { -// -// } -// if (shardInfo.getFailed() > 0) { -// for (ReplicationResponse.ShardInfo.Failure failure : shardInfo.getFailures()) { -// String reason = failure.reason(); -// } -// } - - - - -// return documentService.index(request); } @@ -179,11 +142,9 @@ public void saveRecords(List recordTypes) { oaiRecord.setState(0); oaiRecords.add(oaiRecord); + //TODO: we ave to check all the parts name and assigned according to related name not order String[] parts = parsedDc.getDc().split(";;"); Article article = new Article(); -// System.out.println("article create sonrasi article id : "+article.getId()); -// System.out.println("article create sonrasi oai id : "+oaiRecord.getId()); - article.setTitle(parts[0].split("::")[1]); article.setAuthors(parts[1].split("::")[1]); article.setKeywords(parts[2].split("::")[1]); @@ -195,23 +156,18 @@ public void saveRecords(List recordTypes) { { String downlaodUrl = parts[10].split("::")[1]; article.setRelation(downlaodUrl); -// article.setBase64("not available"); article.setBase64(UrlPdftoBase64(downlaodUrl)); } else { article.setRelation("not available"); - article.setBase64("bm90IGF2YWlsYWJsZQ=="); + article.setBase64("bm90IGF2YWlsYWJsZQ=="); //it means not available in base 64 } article.setDc(parsedDc.getDc()); article.setArticleIdentifier(parseIdentifier(oaiRecord.getIdentifier())); -// article.setArticleIdentifier(oaiRecord.getIdentifier()); -// System.out.println("article add oncesi article id : "+article.getId()); articles.add(article); -// System.out.println("elastic save oncesi article id : "+article.getId()); -// System.out.println("elastic save oncesi article title : "+article.getTitle()); try { elasticSave(article); } catch (IOException e) { @@ -247,13 +203,8 @@ public String UrlPdftoBase64(String url) { try { oracle = new URL(url); URLConnection yc = oracle.openConnection(); -// BufferedReader in = new BufferedReader(new InputStreamReader( -// yc.getInputStream())); + BufferedInputStream bis = new BufferedInputStream(yc.getInputStream()); -// String inputLine; -// while ((inputLine = in.readLine()) != null) -// System.out.println(inputLine); -// in.close(); byte bytes[] = IOUtils.toByteArray(bis); bis.close(); @@ -266,31 +217,8 @@ public String UrlPdftoBase64(String url) { e.printStackTrace(); } - - - return base64; -// String inputLine; -// while ((inputLine = in.readLine()) != null) -// System.out.println(inputLine); -// in.close(); - - - - -// BufferedReader in = new BufferedReader( -// new InputStreamReader(oracle.openStream())); -// byte bytes[] = IOUtils.toByteArray(oracle); - - -// String b64String = Base64. - -// String inputLine; -// while ((inputLine = in.readLine()) != null) -// System.out.println(inputLine); -// in.close(); - } private LocalDateTime parseDateTime(String string) { diff --git a/src/main/java/io/academic/service/ProcessorService.java b/src/main/java/io/academic/service/ProcessorService.java index 07c69c3..e6ad4cb 100644 --- a/src/main/java/io/academic/service/ProcessorService.java +++ b/src/main/java/io/academic/service/ProcessorService.java @@ -38,8 +38,7 @@ public void startProcessors() { oaiDataProviderService.queue(new OaiDataProvider("Acta Medica Anatolia","http://dergipark.gov.tr/api/public/oai/","http://dergipark.gov.tr/download/article-file/","dergipark.ulakbim.gov.tr" )); // oaiDataProviderService.queue(new OaiDataProvider("http://export.arxiv.org/oai2","https://arxiv.org/pdf/")); - oaiService.delete(); - + oaiService.delete(); }