This is an automated email from the git hooks/post-receive script. New commit to branch feature/comparatif-lunce-pg in repository coselmar. See https://gitlab.nuiton.org/codelutin/coselmar.git commit 0dd6ace8f79a732368ce72fade56dce20f8bb19a Author: Yannick Martel <martel@©odelutin.com> Date: Tue May 30 15:32:00 2017 +0200 Review cloudtag request from postgresql and fix indexation from lucene --- .../java/fr/ifremer/coselmar/beans/CloudWord.java | 5 +++ .../persistence/entity/QuestionTopiaDao.java | 41 ++++++++++------------ .../indexation/DocumentsIndexationService.java | 12 +++---- .../services/v1/ExperimentationService.java | 13 ++++++- .../coselmar/services/v1/QuestionsWebService.java | 33 ++++++++++------- 5 files changed, 62 insertions(+), 42 deletions(-) diff --git a/coselmar-persistence/src/main/java/fr/ifremer/coselmar/beans/CloudWord.java b/coselmar-persistence/src/main/java/fr/ifremer/coselmar/beans/CloudWord.java index 5837b08..abe7036 100644 --- a/coselmar-persistence/src/main/java/fr/ifremer/coselmar/beans/CloudWord.java +++ b/coselmar-persistence/src/main/java/fr/ifremer/coselmar/beans/CloudWord.java @@ -52,4 +52,9 @@ public class CloudWord implements Serializable { public void setWeight(long weight) { this.weight = weight; } + + @Override + public String toString() { + return "CloudWord { " + text + ": " + weight + " }"; + } } diff --git a/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java b/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java index 8faba2f..17bcd17 100644 --- a/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java +++ b/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java @@ -369,29 +369,24 @@ public class QuestionTopiaDao extends AbstractQuestionTopiaDao<Question> { private final String sql; private final String getSql(String questionId) { - return "SELECT word, nentry FROM ts_stat( ' select to_tsvector(''public.simple_english_conf'', q." + Question.PROPERTY_TITLE + ")" + - " || to_tsvector(''public.simple_english_conf'', q." + Question.PROPERTY_SUMMARY + ")" + - " || to_tsvector(''public.simple_english_conf'', qt." + Question.PROPERTY_THEME + ")" + - " || COALESCE(to_tsvector(''public.simple_english_conf'', d." + Document.PROPERTY_NAME + "),'''')" + - " || COALESCE(to_tsvector(''public.simple_english_conf'', dk." + Document.PROPERTY_KEYWORDS + "),'''')" + - " || COALESCE(to_tsvector(''public.simple_english_conf'', d." + Document.PROPERTY_SUMMARY + "),'''')" + - " || COALESCE(to_tsvector(''public.simple_english_conf'', d." + Document.PROPERTY_FILE_CONTENT + "),'''') FROM question q" + - " LEFT JOIN relateddocuments_relatedquestion ON" + - " relateddocuments_relatedquestion.relatedquestion = q.topiaid" + - " LEFT JOIN closingdocuments_relatedquestion ON" + - " closingdocuments_relatedquestion.relatedquestion = q.topiaid" + - " LEFT JOIN document d on" + - " d.topiaid = closingdocuments_relatedquestion.closingdocuments OR" + - " d.topiaid = relateddocuments_relatedquestion.relateddocuments" + - " LEFT JOIN question_theme qt ON" + - " qt.owner = q.topiaid" + - " LEFT JOIN document_keywords dk ON" + - " dk.owner = d.topiaid" + - " WHERE q.topiaid = ''" + questionId + "''" + - " ')" + - " WHERE char_length(word) > 3 " + - " ORDER BY nentry DESC " + - " "; + + return "SELECT word, nentry FROM ts_stat( ' " + + " with documents as ( select d." + Document.PROPERTY_NAME + " as name, d." + Document.PROPERTY_SUMMARY + " as summary, d." + Document.PROPERTY_FILE_CONTENT + " as fileContent, (SELECT string_agg(dk." + Document.PROPERTY_KEYWORDS + ", '' '') from document_keywords dk where dk.owner = d.topiaid) as keywords " + + " from question k " + + " LEFT JOIN relateddocuments_relatedquestion RD ON RD.relatedquestion = k.topiaid " + + " LEFT JOIN closingdocuments_relatedquestion CD ON CD.relatedquestion = k.topiaid " + + " LEFT JOIN document d on d.topiaid = CD.closingdocuments OR d.topiaid = RD.relateddocuments " + + " WHERE k.topiaid = ''" + questionId + "'' ) " + + " SELECT to_tsvector(''public.simple_english_conf'', q." + Question.PROPERTY_TITLE + ") " + + " || to_tsvector(''public.simple_english_conf'', q." + Question.PROPERTY_SUMMARY + ") " + + " || to_tsvector(''public.simple_english_conf'', (SELECT string_agg(qt." + Question.PROPERTY_THEME + ", '' '') from question_theme qt where qt.owner = q.topiaid ) ) " + + " || to_tsvector(''public.simple_english_conf'', (SELECT string_agg(name, '' '') FROM documents) ) " + + " || to_tsvector(''public.simple_english_conf'', (SELECT string_agg(summary, '' '') FROM documents) ) " + + " || to_tsvector(''public.simple_english_conf'', (SELECT string_agg(keywords, '' '') FROM documents) ) " + + " || to_tsvector(''public.simple_english_conf'', (SELECT string_agg(fileContent, '' '') FROM documents)) " + + " FROM question q where q.topiaid = ''" + questionId + "'' ') " + + " WHERE char_length(word) > 3 " + + " ORDER BY nentry DESC "; } QuestionTermStatSqlQuery(String questionId) { diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java index 700e1a0..158c10f 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java @@ -94,7 +94,7 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { String documentName = document.getName(); String documentSummary = document.getSummary(); - doc.add(new Field(DOCUMENT_NAME_INDEX_PROPERTY, documentName, LuceneUtils.TYPE_STORED)); + doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, documentName, Field.Store.YES)); if (StringUtils.isNotBlank(document.getAuthors())) { doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); } @@ -252,7 +252,7 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { String documentName = document.getName(); String documentSummary = document.getSummary(); - doc.add(new Field(DOCUMENT_NAME_INDEX_PROPERTY, documentName, LuceneUtils.TYPE_STORED)); + doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, documentName, Field.Store.YES)); if (StringUtils.isNotBlank(document.getAuthors())) { doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); } @@ -326,7 +326,7 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { } - public Map<String, Long> getTopDocumentsTerms(List<String> questionIds) throws IOException { + public Map<String, Long> getTopDocumentsTerms(List<String> documentIds) throws IOException { DirectoryReader ireader = DirectoryReader.open(getLuceneUtils().getIndexWriter()); IndexSearcher isearcher = new IndexSearcher(ireader); @@ -336,9 +336,9 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { queryBuilder.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST); BooleanQuery.Builder questionIdBuilder = new BooleanQuery.Builder(); - for (String questionId : questionIds) { - if(StringUtils.isNotBlank(questionId)) { - questionIdBuilder.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, questionId.toLowerCase())), BooleanClause.Occur.SHOULD); + for (String documentId : documentIds) { + if(StringUtils.isNotBlank(documentId)) { + questionIdBuilder.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, documentId.toLowerCase())), BooleanClause.Occur.SHOULD); } } queryBuilder.add(questionIdBuilder.build(), BooleanClause.Occur.MUST); diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java index 48cba7f..3c83b41 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java @@ -27,8 +27,11 @@ package fr.ifremer.coselmar.services.v1; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.Collections2; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Ordering; import com.google.common.collect.Sets; +import com.rometools.rome.feed.rss.Cloud; import fr.ifremer.coselmar.beans.CloudWord; import fr.ifremer.coselmar.beans.DocumentBean; import fr.ifremer.coselmar.beans.LinkBean; @@ -71,6 +74,7 @@ import org.nuiton.util.DateUtil; import org.nuiton.util.pagination.PaginationParameter; import org.nuiton.util.pagination.PaginationResult; +import javax.annotation.Nullable; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -122,6 +126,7 @@ public class ExperimentationService extends CoselmarWebServiceSupport { String term = documentTermFreq.getKey(); Long frequence = documentTermFreq.getValue(); if (topQuestionsTerms.containsKey(term)) { + topQuestionsTerms.put(term, topQuestionsTerms.get(term) + frequence); } else { topQuestionsTerms.put(term, frequence); } @@ -139,7 +144,13 @@ public class ExperimentationService extends CoselmarWebServiceSupport { } } - return topWords; + ImmutableList<CloudWord> cloudWords = ImmutableList.copyOf(Ordering.natural().onResultOf(new Function<CloudWord, Long>() { + public Long apply(CloudWord input) { + return input.getWeight(); + } + }).reverse().sortedCopy(topWords)); + + return cloudWords; } public List<CloudWord> getPostgresTopWords(String questionId) { diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java index e7a9d48..6970d98 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java @@ -27,7 +27,9 @@ package fr.ifremer.coselmar.services.v1; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.Collections2; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import fr.ifremer.coselmar.beans.CloudWord; import fr.ifremer.coselmar.beans.DocumentBean; @@ -1078,17 +1080,16 @@ public class QuestionsWebService extends CoselmarWebServiceSupport { Question question = getQuestionDao().forTopiaIdEquals(fullQuestionId).findUnique(); List<CloudWord> topWords = new ArrayList<>(); -// if (getCoselmarServicesConfig().isPostgresqlDatabase()) { -// try { -// topWords = getQuestionDao().findTopWords(getFullIdFromShort(Question.class, questionId)); -// } catch (TopiaNoResultException e) { -// if (log.isErrorEnabled()) { -// log.error("Try to find top words for non existing questionId" + questionId, e); -// } -// throw new NoResultException("Question does not exist"); -// } -// } else { -// topWords = Collections.EMPTY_LIST; + if (getCoselmarServicesConfig().isPostgresqlDatabase()) { + try { + topWords = getQuestionDao().findTopWords(getFullIdFromShort(Question.class, questionId)); + } catch (TopiaNoResultException e) { + if (log.isErrorEnabled()) { + log.error("Try to find top words for non existing questionId" + questionId, e); + } + throw new NoResultException("Question does not exist"); + } + } else { QuestionsIndexationService questionsIndexationService = getServicesContext().newService(QuestionsIndexationService.class); DocumentsIndexationService documentsIndexationService = getServicesContext().newService(DocumentsIndexationService.class); @@ -1100,6 +1101,7 @@ public class QuestionsWebService extends CoselmarWebServiceSupport { String term = documentTermFreq.getKey(); Long frequence = documentTermFreq.getValue(); if (topQuestionsTerms.containsKey(term)) { + topQuestionsTerms.put(term, topQuestionsTerms.get(term) + frequence); } else { topQuestionsTerms.put(term, frequence); } @@ -1116,7 +1118,14 @@ public class QuestionsWebService extends CoselmarWebServiceSupport { log.error("Unable to index new question", e); } } -// } + + // Sort by CloudTag#weight DESC + topWords = ImmutableList.copyOf(Ordering.natural().onResultOf(new Function<CloudWord, Long>() { + public Long apply(CloudWord input) { + return input.getWeight(); + } + }).reverse().sortedCopy(topWords)); + } return topWords; } -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.