branch feature/comparatif-lunce-pg updated (d9939cf -> 0dd6ace)
This is an automated email from the git hooks/post-receive script. New change to branch feature/comparatif-lunce-pg in repository coselmar. See https://gitlab.nuiton.org/codelutin/coselmar.git discards d9939cf Ajout du fileContent dans la table Document et comparatif de recherche entre lucene et pg pour les topterms adds ea698e5 prevent NPE adds d86c724 just simply put javax.servlet:javax.servlet-api as provided adds f432be6 Exclusion de bcprov-jdk15on de la dépendance apache tika (conflit avec la version de webmotion?) new a763b05 Ajout du fileContent dans la table Document et comparatif de recherche entre lucene et pg pour les topterms new 56f5389 Rest-Expose lucene and pg versions of question topterms methods new 0dd6ace Review cloudtag request from postgresql and fix indexation from lucene This update added new revisions after undoing existing revisions. That is to say, some revisions that were in the old version of the branch are not in the new version. This situation occurs when a user --force pushes a change and generates a repository containing something like this: * -- * -- B -- O -- O -- O (d9939cf) \ N -- N -- N refs/heads/feature/comparatif-lunce-pg (0dd6ace) You should already have received notification emails for all of the O revisions, and so the following emails describe only the N revisions from the common base, B. Any revisions marked "omits" are not gone; other references still refer to them. Any revisions marked "discards" are gone forever. The 3 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Detailed log of new commits: commit 0dd6ace8f79a732368ce72fade56dce20f8bb19a Author: Yannick Martel <martel@©odelutin.com> Date: Tue May 30 15:32:00 2017 +0200 Review cloudtag request from postgresql and fix indexation from lucene commit 56f53896104324c5cc3119271e287790851ded4e Author: Yannick Martel <martel@©odelutin.com> Date: Mon May 29 10:43:39 2017 +0200 Rest-Expose lucene and pg versions of question topterms methods commit a763b05e739a1366c9060d9acdd14b90e3cbb057 Author: Yannick Martel <martel@©odelutin.com> Date: Wed May 24 12:03:05 2017 +0200 Ajout du fileContent dans la table Document et comparatif de recherche entre lucene et pg pour les topterms Summary of changes: coselmar-bundle/src/main/webapp/WEB-INF/web.xml | 2 +- .../java/fr/ifremer/coselmar/beans/CloudWord.java | 5 +++ .../persistence/entity/QuestionTopiaDao.java | 41 ++++++++++----------- .../indexation/DocumentsIndexationService.java | 42 +++++++++++----------- .../indexation/QuestionsIndexationService.java | 30 ++++++++-------- .../services/v1/ExperimentationService.java | 13 ++++++- .../coselmar/services/v1/QuestionsWebService.java | 33 ++++++++++------- coselmar-rest/src/main/resources/mapping | 2 ++ pom.xml | 9 ++++- 9 files changed, 105 insertions(+), 72 deletions(-) -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.
This is an automated email from the git hooks/post-receive script. New commit to branch feature/comparatif-lunce-pg in repository coselmar. See https://gitlab.nuiton.org/codelutin/coselmar.git commit a763b05e739a1366c9060d9acdd14b90e3cbb057 Author: Yannick Martel <martel@©odelutin.com> Date: Wed May 24 12:03:05 2017 +0200 Ajout du fileContent dans la table Document et comparatif de recherche entre lucene et pg pour les topterms --- .../persistence/entity/QuestionTopiaDao.java | 15 +- ...V2_1_0_1__9197_add_fileContent_in_documents.sql | 23 +++ .../src/main/xmi/coselmar-model.properties | 1 + .../src/main/xmi/coselmar-model.zargo | Bin 11007 -> 11150 bytes .../indexation/DocumentsIndexationService.java | 16 +- .../coselmar/services/v1/AdminWebService.java | 21 ++- .../coselmar/services/v1/DocumentsWebService.java | 33 +++- .../services/v1/ExperimentationService.java | 180 +++++++++++++++++++++ coselmar-rest/src/main/resources/mapping | 2 + 9 files changed, 267 insertions(+), 24 deletions(-) diff --git a/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java b/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java index 8c66d18..8faba2f 100644 --- a/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java +++ b/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java @@ -369,12 +369,13 @@ public class QuestionTopiaDao extends AbstractQuestionTopiaDao<Question> { private final String sql; private final String getSql(String questionId) { - return "SELECT word, nentry FROM ts_stat( ' select to_tsvector(''public.simple_english_conf'', q.title)" + - " || to_tsvector(''public.simple_english_conf'', q.summary)" + - " || to_tsvector(''public.simple_english_conf'', qt.theme)" + - " || COALESCE(to_tsvector(''public.simple_english_conf'', d.name),'''')" + - " || COALESCE(to_tsvector(''public.simple_english_conf'', dk.keywords),'''')" + - " || COALESCE(to_tsvector(''public.simple_english_conf'', d.summary),'''') FROM question q" + + return "SELECT word, nentry FROM ts_stat( ' select to_tsvector(''public.simple_english_conf'', q." + Question.PROPERTY_TITLE + ")" + + " || to_tsvector(''public.simple_english_conf'', q." + Question.PROPERTY_SUMMARY + ")" + + " || to_tsvector(''public.simple_english_conf'', qt." + Question.PROPERTY_THEME + ")" + + " || COALESCE(to_tsvector(''public.simple_english_conf'', d." + Document.PROPERTY_NAME + "),'''')" + + " || COALESCE(to_tsvector(''public.simple_english_conf'', dk." + Document.PROPERTY_KEYWORDS + "),'''')" + + " || COALESCE(to_tsvector(''public.simple_english_conf'', d." + Document.PROPERTY_SUMMARY + "),'''')" + + " || COALESCE(to_tsvector(''public.simple_english_conf'', d." + Document.PROPERTY_FILE_CONTENT + "),'''') FROM question q" + " LEFT JOIN relateddocuments_relatedquestion ON" + " relateddocuments_relatedquestion.relatedquestion = q.topiaid" + " LEFT JOIN closingdocuments_relatedquestion ON" + @@ -390,7 +391,7 @@ public class QuestionTopiaDao extends AbstractQuestionTopiaDao<Question> { " ')" + " WHERE char_length(word) > 3 " + " ORDER BY nentry DESC " + - " LIMIT 30"; + " "; } QuestionTermStatSqlQuery(String questionId) { diff --git a/coselmar-persistence/src/main/resources/db/migration/V2_1_0_1__9197_add_fileContent_in_documents.sql b/coselmar-persistence/src/main/resources/db/migration/V2_1_0_1__9197_add_fileContent_in_documents.sql new file mode 100644 index 0000000..f8a64c7 --- /dev/null +++ b/coselmar-persistence/src/main/resources/db/migration/V2_1_0_1__9197_add_fileContent_in_documents.sql @@ -0,0 +1,23 @@ +--- +-- #%L +-- Coselmar :: Persistence +-- %% +-- Copyright (C) 2014 - 2016 Ifremer, Code Lutin +-- %% +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- You should have received a copy of the GNU General Public +-- License along with this program. If not, see +-- <http://www.gnu.org/licenses/gpl-3.0.html>. +-- #L% +--- + +ALTER TABLE document ADD fileContent TEXT; diff --git a/coselmar-persistence/src/main/xmi/coselmar-model.properties b/coselmar-persistence/src/main/xmi/coselmar-model.properties index 1ee6952..5419513 100644 --- a/coselmar-persistence/src/main/xmi/coselmar-model.properties +++ b/coselmar-persistence/src/main/xmi/coselmar-model.properties @@ -30,6 +30,7 @@ model.tagvalue.useEnumerationName=true fr.ifremer.coselmar.persistence.entity.Document.attribute.summary.tagValue.hibernateAttributeType=text fr.ifremer.coselmar.persistence.entity.Document.attribute.comment.tagValue.hibernateAttributeType=text fr.ifremer.coselmar.persistence.entity.Document.attribute.citation.tagValue.hibernateAttributeType=text +fr.ifremer.coselmar.persistence.entity.Document.attribute.fileContent.tagValue.hibernateAttributeType=text fr.ifremer.coselmar.persistence.entity.Question.attribute.summary.tagValue.hibernateAttributeType=text fr.ifremer.coselmar.persistence.entity.Question.attribute.conclusion.tagValue.hibernateAttributeType=text \ No newline at end of file diff --git a/coselmar-persistence/src/main/xmi/coselmar-model.zargo b/coselmar-persistence/src/main/xmi/coselmar-model.zargo index 85a95b3..c2eac82 100644 Binary files a/coselmar-persistence/src/main/xmi/coselmar-model.zargo and b/coselmar-persistence/src/main/xmi/coselmar-model.zargo differ diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java index 8efe04d..700e1a0 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java @@ -86,7 +86,7 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { protected static final String DOCUMENT_FILE_CONTENT_INDEX_PROPERTY = "documentFileContent"; protected static final String DOCUMENT_TYPE = "documentindextype"; - public void indexDocument(DocumentBean document, String filepath) throws IOException { + public void indexDocument(DocumentBean document, String fileContent) throws IOException { Document doc = new Document(); doc.add(new StringField(DOCUMENT_ID_INDEX_PROPERTY, document.getId(), Field.Store.YES)); @@ -121,18 +121,8 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { } } - if (StringUtils.isNotBlank(filepath)) { - try { - File documentFile = new File(filepath); - String parsedDocumentFile = getLuceneUtils().getTika().parseToString(documentFile); - doc.add(new Field(DOCUMENT_FILE_CONTENT_INDEX_PROPERTY, parsedDocumentFile, LuceneUtils.TYPE_STORED)); - } catch (TikaException te) { - if (log.isErrorEnabled()) { - String message = String.format("Unable to index document '%s'", filepath); - log.error(message); - } - } - + if (StringUtils.isNotBlank(fileContent)) { + doc.add(new Field(DOCUMENT_FILE_CONTENT_INDEX_PROPERTY, fileContent, LuceneUtils.TYPE_STORED)); } getLuceneUtils().getIndexWriter().addDocument(doc); diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java index 33b3e57..98ad677 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/AdminWebService.java @@ -24,6 +24,7 @@ package fr.ifremer.coselmar.services.v1; * #L% */ +import java.io.File; import java.io.IOException; import java.util.List; @@ -42,6 +43,7 @@ import fr.ifremer.coselmar.services.indexation.DocumentsIndexationService; import fr.ifremer.coselmar.services.indexation.QuestionsIndexationService; import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; +import org.apache.tika.exception.TikaException; import static org.apache.commons.logging.LogFactory.getLog; @@ -76,10 +78,25 @@ public class AdminWebService extends CoselmarWebServiceSupport { // get All documents List<Document> documents = getDocumentDao().findAll(); for (Document document : documents) { - String lightId = getPersistenceContext().getTopiaIdFactory().getRandomPart(document.getTopiaId()); DocumentBean documentBean = BeanEntityConverter.toBean(getPersistenceContext().getTopiaIdFactory(), document); - documentsIndexationService.indexDocument(documentBean, document.getFilePath()); + String filePath = document.getFilePath(); + String fileContent = null; + try { + fileContent = getServicesContext().getLuceneUtils().getTika().parseToString(new File(filePath)); + document.setFileContent(fileContent); + getDocumentDao().update(document); + } catch (IOException e) { + if (log.isErrorEnabled()) { + log.error("Unable to read uploaded file " + filePath, e); + } + } catch (TikaException e) { + if (log.isErrorEnabled()) { + log.error("Unable to get file content from Tika : " + filePath, e); + } + } + documentsIndexationService.indexDocument(documentBean, fileContent); } + commit(); // Get all questions List<Question> questions = getQuestionDao().findAll(); diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java index 9a1d010..d8bda0c 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/DocumentsWebService.java @@ -54,6 +54,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.logging.Log; import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.tika.exception.TikaException; import org.debux.webmotion.server.call.UploadFile; import org.debux.webmotion.server.render.Render; import org.nuiton.topia.persistence.TopiaNoResultException; @@ -65,6 +66,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.net.URL; import java.util.ArrayList; import java.util.Collection; import java.util.Date; @@ -376,12 +378,24 @@ public class DocumentsWebService extends CoselmarWebServiceSupport { String documentName = document.getName(); String contentType = null; String filePath = null; + String fileContent = null; // If document has a file, manager it ! if (uploadFile != null) { Pair<String, String> pathAndContentType = managerDocumentFile(uploadFile, owner); filePath = pathAndContentType.getLeft(); contentType = pathAndContentType.getRight(); + try { + fileContent = getServicesContext().getLuceneUtils().getTika().parseToString(new File(filePath)); + } catch (IOException e) { + if (log.isErrorEnabled()) { + log.error("Unable to read uploaded file " + filePath, e); + } + } catch (TikaException e) { + if (log.isErrorEnabled()) { + log.error("Unable to get file content from Tika : " + filePath, e); + } + } } // Document Metadata @@ -431,6 +445,7 @@ public class DocumentsWebService extends CoselmarWebServiceSupport { documentEntity.setWithFile(true); documentEntity.setMimeType(contentType); documentEntity.setFilePath(filePath); + documentEntity.setFileContent(fileContent); } else { documentEntity.setWithFile(false); } @@ -445,7 +460,7 @@ public class DocumentsWebService extends CoselmarWebServiceSupport { DocumentsIndexationService documentsIndexationService = getServicesContext().newService(DocumentsIndexationService.class); try { - documentsIndexationService.indexDocument(result, filePath); + documentsIndexationService.indexDocument(result, fileContent); if (log.isDebugEnabled()) { String message = String.format("Document '%s' added to index", documentName); log.debug(message); @@ -489,6 +504,19 @@ public class DocumentsWebService extends CoselmarWebServiceSupport { Pair<String, String> pathAndContentType = managerDocumentFile(uploadFile, owner); String filePath = pathAndContentType.getLeft(); String contentType = pathAndContentType.getRight(); + String fileContent = null; + // Read file content + try { + fileContent = getServicesContext().getLuceneUtils().getTika().parseToString(new File(filePath)); + } catch (IOException e) { + if (log.isErrorEnabled()) { + log.error("Unable to read uploaded file " + filePath, e); + } + } catch (TikaException e) { + if (log.isErrorEnabled()) { + log.error("Unable to get file content from Tika : " + filePath, e); + } + } // If document has already a file, remove it if (StringUtils.isNotBlank(documentEntity.getFilePath())) { @@ -500,13 +528,14 @@ public class DocumentsWebService extends CoselmarWebServiceSupport { documentEntity.setMimeType(contentType); documentEntity.setFilePath(filePath); documentEntity.setFileName(uploadFile.getName()); + documentEntity.setFileContent(fileContent); // Should update document index information to put the file DocumentBean documentBean = BeanEntityConverter.toBean(getPersistenceContext().getTopiaIdFactory(), documentEntity); DocumentsIndexationService documentsIndexationService = getServicesContext().newService(DocumentsIndexationService.class); try { - documentsIndexationService.updateDocument(documentBean, filePath); // no document file for the moment here + documentsIndexationService.updateDocument(documentBean, fileContent); if (log.isDebugEnabled()) { String message = String.format("Document '%s' was updated in index", documentEntity.getName()); log.debug(message); diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java new file mode 100644 index 0000000..48cba7f --- /dev/null +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java @@ -0,0 +1,180 @@ +package fr.ifremer.coselmar.services.v1; + +/* + * #%L + * Coselmar :: Rest Services + * $Id:$ + * $HeadURL:$ + * %% + * Copyright (C) 2014 Ifremer, Code Lutin + * %% + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program. If not, see + * <http://www.gnu.org/licenses/gpl-3.0.html>. + * #L% + */ + +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.collect.Collections2; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import fr.ifremer.coselmar.beans.CloudWord; +import fr.ifremer.coselmar.beans.DocumentBean; +import fr.ifremer.coselmar.beans.LinkBean; +import fr.ifremer.coselmar.beans.QuestionBean; +import fr.ifremer.coselmar.beans.QuestionExportModel; +import fr.ifremer.coselmar.beans.QuestionSearchBean; +import fr.ifremer.coselmar.beans.QuestionSearchExample; +import fr.ifremer.coselmar.beans.QuestionTreeNode; +import fr.ifremer.coselmar.beans.QuestionUserRole; +import fr.ifremer.coselmar.beans.UserBean; +import fr.ifremer.coselmar.beans.UserWebToken; +import fr.ifremer.coselmar.converter.BeanEntityConverter; +import fr.ifremer.coselmar.exceptions.CoselmarTechnicalException; +import fr.ifremer.coselmar.persistence.entity.CoselmarUser; +import fr.ifremer.coselmar.persistence.entity.CoselmarUserGroup; +import fr.ifremer.coselmar.persistence.entity.CoselmarUserRole; +import fr.ifremer.coselmar.persistence.entity.Document; +import fr.ifremer.coselmar.persistence.entity.Link; +import fr.ifremer.coselmar.persistence.entity.LinkImpl; +import fr.ifremer.coselmar.persistence.entity.Privacy; +import fr.ifremer.coselmar.persistence.entity.Question; +import fr.ifremer.coselmar.persistence.entity.QuestionImpl; +import fr.ifremer.coselmar.persistence.entity.Status; +import fr.ifremer.coselmar.services.CoselmarWebServiceSupport; +import fr.ifremer.coselmar.services.errors.InvalidCredentialException; +import fr.ifremer.coselmar.services.errors.NoResultException; +import fr.ifremer.coselmar.services.errors.UnauthorizedException; +import fr.ifremer.coselmar.services.indexation.DocumentsIndexationService; +import fr.ifremer.coselmar.services.indexation.QuestionsIndexationService; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.lucene.queryparser.classic.ParseException; +import org.debux.webmotion.server.render.Render; +import org.nuiton.csv.Export; +import org.nuiton.topia.persistence.TopiaIdFactory; +import org.nuiton.topia.persistence.TopiaNoResultException; +import org.nuiton.util.DateUtil; +import org.nuiton.util.pagination.PaginationParameter; +import org.nuiton.util.pagination.PaginationResult; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * @author ymartel <martel@codelutin.com> + */ +public class ExperimentationService extends CoselmarWebServiceSupport { + + private static final Log log = LogFactory.getLog(ExperimentationService.class); + + public List<String> evaluateTopWordsGeneration() { + Question question = getQuestionDao().forStatusEquals(Status.IN_PROGRESS).findAny(); + long start = System.currentTimeMillis(); + String questionId = getShortIdFromFull(question.getTopiaId()); + System.out.println("Question : " + questionId); + List<CloudWord> luceneTopWords = getLuceneTopWords(questionId); + long stop = System.currentTimeMillis(); + String luceneTiming = String.format("Recherche par Lucene : %d termes en %d ms", luceneTopWords.size(), stop - start); + start = System.currentTimeMillis(); + List<CloudWord> postgresTopWords = getPostgresTopWords(questionId); + stop = System.currentTimeMillis(); + String pgTiming = String.format("Recherche par Postgresql : %d termes en %d ms", postgresTopWords.size(), stop - start); + + return Lists.newArrayList(luceneTiming, pgTiming); + } + + public List<CloudWord> getLuceneTopWords(String questionId) { + + // Retrieve Question + String fullQuestionId = getFullIdFromShort(Question.class, questionId); + Question question = getQuestionDao().forTopiaIdEquals(fullQuestionId).findUnique(); + + List<CloudWord> topWords = new ArrayList<>(); + + QuestionsIndexationService questionsIndexationService = getServicesContext().newService(QuestionsIndexationService.class); + DocumentsIndexationService documentsIndexationService = getServicesContext().newService(DocumentsIndexationService.class); + try { + Map<String, Long> topQuestionsTerms = questionsIndexationService.getTopQuestionsTerms(Lists.newArrayList(questionId)); + List<String> shortDocumentIds = getShortDocumentIds(question); + Map<String, Long> topDocumentsTerms = documentsIndexationService.getTopDocumentsTerms(shortDocumentIds); + for (Map.Entry<String, Long> documentTermFreq : topDocumentsTerms.entrySet()) { + String term = documentTermFreq.getKey(); + Long frequence = documentTermFreq.getValue(); + if (topQuestionsTerms.containsKey(term)) { + } else { + topQuestionsTerms.put(term, frequence); + } + } + + for (Map.Entry<String, Long> termFreq : topQuestionsTerms.entrySet()) { + String term = termFreq.getKey(); + CloudWord cloudWord = new CloudWord(term, termFreq.getValue()); + topWords.add(cloudWord); + } + + } catch (IOException e) { + if (log.isErrorEnabled()) { + log.error("Unable to index new question", e); + } + } + + return topWords; + } + + public List<CloudWord> getPostgresTopWords(String questionId) { + + List<CloudWord> topWords; + if (getCoselmarServicesConfig().isPostgresqlDatabase()) { + try { + topWords = getQuestionDao().findTopWords(getFullIdFromShort(Question.class, questionId)); + } catch (TopiaNoResultException e) { + if (log.isErrorEnabled()) { + log.error("Try to find top words for non existing questionId" + questionId, e); + } + topWords = Collections.EMPTY_LIST; + } + } else { + topWords = Collections.EMPTY_LIST; + } + + return topWords; + } + + //////////////////////////////////////////////////////////////////////////// + /////////////////////// Internal Parts ///////////////////////////// + //////////////////////////////////////////////////////////////////////////// + + protected List<String> getShortDocumentIds(Question question) { + List<String> shortDocumentIds = new ArrayList<>(); + for (String relatedDocumentId : question.getRelatedDocumentsTopiaIds()) { + String shortIdFromFull = getShortIdFromFull(relatedDocumentId); + shortDocumentIds.add(shortIdFromFull); + } + for (String closingDocumentId : question.getClosingDocumentsTopiaIds()) { + String shortIdFromFull = getShortIdFromFull(closingDocumentId); + shortDocumentIds.add(shortIdFromFull); + } + return shortDocumentIds; + } +} diff --git a/coselmar-rest/src/main/resources/mapping b/coselmar-rest/src/main/resources/mapping index b498a97..147d53d 100644 --- a/coselmar-rest/src/main/resources/mapping +++ b/coselmar-rest/src/main/resources/mapping @@ -76,6 +76,8 @@ GET /v1/questions/{questionId}/topwords QuestionsWebService.getTopWords GET /v1/general/topwords GeneralWebService.getTopWords +GET /v1/experimentation/topwords ExperimentationService.evaluateTopWordsGeneration + # Admin API POST /v1/admin/lucene/index AdminWebService.refreshLuceneIndex -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.
This is an automated email from the git hooks/post-receive script. New commit to branch feature/comparatif-lunce-pg in repository coselmar. See https://gitlab.nuiton.org/codelutin/coselmar.git commit 56f53896104324c5cc3119271e287790851ded4e Author: Yannick Martel <martel@©odelutin.com> Date: Mon May 29 10:43:39 2017 +0200 Rest-Expose lucene and pg versions of question topterms methods --- coselmar-rest/src/main/resources/mapping | 2 ++ 1 file changed, 2 insertions(+) diff --git a/coselmar-rest/src/main/resources/mapping b/coselmar-rest/src/main/resources/mapping index 147d53d..27e8e5b 100644 --- a/coselmar-rest/src/main/resources/mapping +++ b/coselmar-rest/src/main/resources/mapping @@ -77,6 +77,8 @@ GET /v1/questions/{questionId}/topwords QuestionsWebService.getTopWords GET /v1/general/topwords GeneralWebService.getTopWords GET /v1/experimentation/topwords ExperimentationService.evaluateTopWordsGeneration +GET /v1/experimentation/lucenetopwords/{questionId} ExperimentationService.getLuceneTopWords +GET /v1/experimentation/pgtopwords/{questionId} ExperimentationService.getPostgresTopWords # Admin API POST /v1/admin/lucene/index AdminWebService.refreshLuceneIndex -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.
This is an automated email from the git hooks/post-receive script. New commit to branch feature/comparatif-lunce-pg in repository coselmar. See https://gitlab.nuiton.org/codelutin/coselmar.git commit 0dd6ace8f79a732368ce72fade56dce20f8bb19a Author: Yannick Martel <martel@©odelutin.com> Date: Tue May 30 15:32:00 2017 +0200 Review cloudtag request from postgresql and fix indexation from lucene --- .../java/fr/ifremer/coselmar/beans/CloudWord.java | 5 +++ .../persistence/entity/QuestionTopiaDao.java | 41 ++++++++++------------ .../indexation/DocumentsIndexationService.java | 12 +++---- .../services/v1/ExperimentationService.java | 13 ++++++- .../coselmar/services/v1/QuestionsWebService.java | 33 ++++++++++------- 5 files changed, 62 insertions(+), 42 deletions(-) diff --git a/coselmar-persistence/src/main/java/fr/ifremer/coselmar/beans/CloudWord.java b/coselmar-persistence/src/main/java/fr/ifremer/coselmar/beans/CloudWord.java index 5837b08..abe7036 100644 --- a/coselmar-persistence/src/main/java/fr/ifremer/coselmar/beans/CloudWord.java +++ b/coselmar-persistence/src/main/java/fr/ifremer/coselmar/beans/CloudWord.java @@ -52,4 +52,9 @@ public class CloudWord implements Serializable { public void setWeight(long weight) { this.weight = weight; } + + @Override + public String toString() { + return "CloudWord { " + text + ": " + weight + " }"; + } } diff --git a/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java b/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java index 8faba2f..17bcd17 100644 --- a/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java +++ b/coselmar-persistence/src/main/java/fr/ifremer/coselmar/persistence/entity/QuestionTopiaDao.java @@ -369,29 +369,24 @@ public class QuestionTopiaDao extends AbstractQuestionTopiaDao<Question> { private final String sql; private final String getSql(String questionId) { - return "SELECT word, nentry FROM ts_stat( ' select to_tsvector(''public.simple_english_conf'', q." + Question.PROPERTY_TITLE + ")" + - " || to_tsvector(''public.simple_english_conf'', q." + Question.PROPERTY_SUMMARY + ")" + - " || to_tsvector(''public.simple_english_conf'', qt." + Question.PROPERTY_THEME + ")" + - " || COALESCE(to_tsvector(''public.simple_english_conf'', d." + Document.PROPERTY_NAME + "),'''')" + - " || COALESCE(to_tsvector(''public.simple_english_conf'', dk." + Document.PROPERTY_KEYWORDS + "),'''')" + - " || COALESCE(to_tsvector(''public.simple_english_conf'', d." + Document.PROPERTY_SUMMARY + "),'''')" + - " || COALESCE(to_tsvector(''public.simple_english_conf'', d." + Document.PROPERTY_FILE_CONTENT + "),'''') FROM question q" + - " LEFT JOIN relateddocuments_relatedquestion ON" + - " relateddocuments_relatedquestion.relatedquestion = q.topiaid" + - " LEFT JOIN closingdocuments_relatedquestion ON" + - " closingdocuments_relatedquestion.relatedquestion = q.topiaid" + - " LEFT JOIN document d on" + - " d.topiaid = closingdocuments_relatedquestion.closingdocuments OR" + - " d.topiaid = relateddocuments_relatedquestion.relateddocuments" + - " LEFT JOIN question_theme qt ON" + - " qt.owner = q.topiaid" + - " LEFT JOIN document_keywords dk ON" + - " dk.owner = d.topiaid" + - " WHERE q.topiaid = ''" + questionId + "''" + - " ')" + - " WHERE char_length(word) > 3 " + - " ORDER BY nentry DESC " + - " "; + + return "SELECT word, nentry FROM ts_stat( ' " + + " with documents as ( select d." + Document.PROPERTY_NAME + " as name, d." + Document.PROPERTY_SUMMARY + " as summary, d." + Document.PROPERTY_FILE_CONTENT + " as fileContent, (SELECT string_agg(dk." + Document.PROPERTY_KEYWORDS + ", '' '') from document_keywords dk where dk.owner = d.topiaid) as keywords " + + " from question k " + + " LEFT JOIN relateddocuments_relatedquestion RD ON RD.relatedquestion = k.topiaid " + + " LEFT JOIN closingdocuments_relatedquestion CD ON CD.relatedquestion = k.topiaid " + + " LEFT JOIN document d on d.topiaid = CD.closingdocuments OR d.topiaid = RD.relateddocuments " + + " WHERE k.topiaid = ''" + questionId + "'' ) " + + " SELECT to_tsvector(''public.simple_english_conf'', q." + Question.PROPERTY_TITLE + ") " + + " || to_tsvector(''public.simple_english_conf'', q." + Question.PROPERTY_SUMMARY + ") " + + " || to_tsvector(''public.simple_english_conf'', (SELECT string_agg(qt." + Question.PROPERTY_THEME + ", '' '') from question_theme qt where qt.owner = q.topiaid ) ) " + + " || to_tsvector(''public.simple_english_conf'', (SELECT string_agg(name, '' '') FROM documents) ) " + + " || to_tsvector(''public.simple_english_conf'', (SELECT string_agg(summary, '' '') FROM documents) ) " + + " || to_tsvector(''public.simple_english_conf'', (SELECT string_agg(keywords, '' '') FROM documents) ) " + + " || to_tsvector(''public.simple_english_conf'', (SELECT string_agg(fileContent, '' '') FROM documents)) " + + " FROM question q where q.topiaid = ''" + questionId + "'' ') " + + " WHERE char_length(word) > 3 " + + " ORDER BY nentry DESC "; } QuestionTermStatSqlQuery(String questionId) { diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java index 700e1a0..158c10f 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java @@ -94,7 +94,7 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { String documentName = document.getName(); String documentSummary = document.getSummary(); - doc.add(new Field(DOCUMENT_NAME_INDEX_PROPERTY, documentName, LuceneUtils.TYPE_STORED)); + doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, documentName, Field.Store.YES)); if (StringUtils.isNotBlank(document.getAuthors())) { doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); } @@ -252,7 +252,7 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { String documentName = document.getName(); String documentSummary = document.getSummary(); - doc.add(new Field(DOCUMENT_NAME_INDEX_PROPERTY, documentName, LuceneUtils.TYPE_STORED)); + doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, documentName, Field.Store.YES)); if (StringUtils.isNotBlank(document.getAuthors())) { doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); } @@ -326,7 +326,7 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { } - public Map<String, Long> getTopDocumentsTerms(List<String> questionIds) throws IOException { + public Map<String, Long> getTopDocumentsTerms(List<String> documentIds) throws IOException { DirectoryReader ireader = DirectoryReader.open(getLuceneUtils().getIndexWriter()); IndexSearcher isearcher = new IndexSearcher(ireader); @@ -336,9 +336,9 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { queryBuilder.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST); BooleanQuery.Builder questionIdBuilder = new BooleanQuery.Builder(); - for (String questionId : questionIds) { - if(StringUtils.isNotBlank(questionId)) { - questionIdBuilder.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, questionId.toLowerCase())), BooleanClause.Occur.SHOULD); + for (String documentId : documentIds) { + if(StringUtils.isNotBlank(documentId)) { + questionIdBuilder.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, documentId.toLowerCase())), BooleanClause.Occur.SHOULD); } } queryBuilder.add(questionIdBuilder.build(), BooleanClause.Occur.MUST); diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java index 48cba7f..3c83b41 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java @@ -27,8 +27,11 @@ package fr.ifremer.coselmar.services.v1; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.Collections2; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Ordering; import com.google.common.collect.Sets; +import com.rometools.rome.feed.rss.Cloud; import fr.ifremer.coselmar.beans.CloudWord; import fr.ifremer.coselmar.beans.DocumentBean; import fr.ifremer.coselmar.beans.LinkBean; @@ -71,6 +74,7 @@ import org.nuiton.util.DateUtil; import org.nuiton.util.pagination.PaginationParameter; import org.nuiton.util.pagination.PaginationResult; +import javax.annotation.Nullable; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -122,6 +126,7 @@ public class ExperimentationService extends CoselmarWebServiceSupport { String term = documentTermFreq.getKey(); Long frequence = documentTermFreq.getValue(); if (topQuestionsTerms.containsKey(term)) { + topQuestionsTerms.put(term, topQuestionsTerms.get(term) + frequence); } else { topQuestionsTerms.put(term, frequence); } @@ -139,7 +144,13 @@ public class ExperimentationService extends CoselmarWebServiceSupport { } } - return topWords; + ImmutableList<CloudWord> cloudWords = ImmutableList.copyOf(Ordering.natural().onResultOf(new Function<CloudWord, Long>() { + public Long apply(CloudWord input) { + return input.getWeight(); + } + }).reverse().sortedCopy(topWords)); + + return cloudWords; } public List<CloudWord> getPostgresTopWords(String questionId) { diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java index e7a9d48..6970d98 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/QuestionsWebService.java @@ -27,7 +27,9 @@ package fr.ifremer.coselmar.services.v1; import com.google.common.base.Function; import com.google.common.base.Preconditions; import com.google.common.collect.Collections2; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import fr.ifremer.coselmar.beans.CloudWord; import fr.ifremer.coselmar.beans.DocumentBean; @@ -1078,17 +1080,16 @@ public class QuestionsWebService extends CoselmarWebServiceSupport { Question question = getQuestionDao().forTopiaIdEquals(fullQuestionId).findUnique(); List<CloudWord> topWords = new ArrayList<>(); -// if (getCoselmarServicesConfig().isPostgresqlDatabase()) { -// try { -// topWords = getQuestionDao().findTopWords(getFullIdFromShort(Question.class, questionId)); -// } catch (TopiaNoResultException e) { -// if (log.isErrorEnabled()) { -// log.error("Try to find top words for non existing questionId" + questionId, e); -// } -// throw new NoResultException("Question does not exist"); -// } -// } else { -// topWords = Collections.EMPTY_LIST; + if (getCoselmarServicesConfig().isPostgresqlDatabase()) { + try { + topWords = getQuestionDao().findTopWords(getFullIdFromShort(Question.class, questionId)); + } catch (TopiaNoResultException e) { + if (log.isErrorEnabled()) { + log.error("Try to find top words for non existing questionId" + questionId, e); + } + throw new NoResultException("Question does not exist"); + } + } else { QuestionsIndexationService questionsIndexationService = getServicesContext().newService(QuestionsIndexationService.class); DocumentsIndexationService documentsIndexationService = getServicesContext().newService(DocumentsIndexationService.class); @@ -1100,6 +1101,7 @@ public class QuestionsWebService extends CoselmarWebServiceSupport { String term = documentTermFreq.getKey(); Long frequence = documentTermFreq.getValue(); if (topQuestionsTerms.containsKey(term)) { + topQuestionsTerms.put(term, topQuestionsTerms.get(term) + frequence); } else { topQuestionsTerms.put(term, frequence); } @@ -1116,7 +1118,14 @@ public class QuestionsWebService extends CoselmarWebServiceSupport { log.error("Unable to index new question", e); } } -// } + + // Sort by CloudTag#weight DESC + topWords = ImmutableList.copyOf(Ordering.natural().onResultOf(new Function<CloudWord, Long>() { + public Long apply(CloudWord input) { + return input.getWeight(); + } + }).reverse().sortedCopy(topWords)); + } return topWords; } -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.
participants (1)
-
codelutin.com scm