branch feature/comparatif-lunce-pg updated (92ac2f3 -> f5fda91)
This is an automated email from the git hooks/post-receive script. New change to branch feature/comparatif-lunce-pg in repository coselmar. See https://gitlab.nuiton.org/codelutin/coselmar.git discards 92ac2f3 Revert the disparition of ExperimentationService with rebase :o adds dc2afeb Prepare import massif de document adds d5a986f refs-30 #9206 First draft for Documents Zip management adds fdce731 refs-50 #9206 First draft for error management in Documents Zip import adds 1f29db5 refs #9206 technical error management during zip import adds 64faa45 refs-65 #9206 Start UI for documents zip file adds 01fdb54 refs-75 #9206 Add some details/infos in admin page about actions adds 008c802 Merge branch 'feature/9206-upload-validation-zip-documents' into develop adds 3de0177 refs #9206 Fix indexation revert when failure on documents mass import new f5fda91 Revert the disparition of ExperimentationService with rebase :o This update added new revisions after undoing existing revisions. That is to say, some revisions that were in the old version of the branch are not in the new version. This situation occurs when a user --force pushes a change and generates a repository containing something like this: * -- * -- B -- O -- O -- O (92ac2f3) \ N -- N -- N refs/heads/feature/comparatif-lunce-pg (f5fda91) You should already have received notification emails for all of the O revisions, and so the following emails describe only the N revisions from the common base, B. Any revisions marked "omits" are not gone; other references still refer to them. Any revisions marked "discards" are gone forever. The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "adds" were already present in the repository and have only been added to this reference. Detailed log of new commits: commit f5fda91cde34f21da483a09625282614d26a30c7 Author: Yannick Martel <martel@©odelutin.com> Date: Wed Jun 7 17:02:07 2017 +0200 Revert the disparition of ExperimentationService with rebase :o Summary of changes: coselmar-rest/pom.xml | 4 - .../fr/ifremer/coselmar/beans/DocumentBean.java | 11 +- .../coselmar/beans/DocumentImportModel.java | 50 +++ .../java/fr/ifremer/coselmar/beans/FileInfos.java | 35 ++ .../beans/MassiveDocumentsImportResult.java | 51 +++ .../coselmar/converter/BeanEntityConverter.java | 4 +- .../services/CoselmarWebServiceSupport.java | 8 +- .../indexation/DocumentsIndexationService.java | 2 +- .../coselmar/services/indexation/TikaUtils.java | 5 + .../coselmar/services/v1/AdminWebService.java | 45 ++- .../coselmar/services/v1/DocumentsWebService.java | 424 ++++++++++++++++----- coselmar-rest/src/main/resources/mapping | 1 + .../services/v1/DocumentsWebServiceTest.java | 124 ++++++ coselmar-rest/src/test/resources/documents.zip | Bin 0 -> 151809 bytes .../src/test/resources/documents_errors.zip | Bin 0 -> 87774 bytes coselmar-ui/src/main/webapp/i18n/en.js | 31 ++ coselmar-ui/src/main/webapp/i18n/fr.js | 32 ++ .../src/main/webapp/js/coselmar-admin-services.js | 18 + .../src/main/webapp/js/coselmar-controllers.js | 20 +- .../src/main/webapp/views/admin/admintools.html | 70 +++- 20 files changed, 800 insertions(+), 135 deletions(-) create mode 100644 coselmar-rest/src/main/java/fr/ifremer/coselmar/beans/DocumentImportModel.java create mode 100644 coselmar-rest/src/main/java/fr/ifremer/coselmar/beans/FileInfos.java create mode 100644 coselmar-rest/src/main/java/fr/ifremer/coselmar/beans/MassiveDocumentsImportResult.java create mode 100644 coselmar-rest/src/test/java/fr/ifremer/coselmar/services/v1/DocumentsWebServiceTest.java create mode 100644 coselmar-rest/src/test/resources/documents.zip create mode 100644 coselmar-rest/src/test/resources/documents_errors.zip -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.
This is an automated email from the git hooks/post-receive script. New commit to branch feature/comparatif-lunce-pg in repository coselmar. See https://gitlab.nuiton.org/codelutin/coselmar.git commit f5fda91cde34f21da483a09625282614d26a30c7 Author: Yannick Martel <martel@©odelutin.com> Date: Wed Jun 7 17:02:07 2017 +0200 Revert the disparition of ExperimentationService with rebase :o --- .gitignore | 3 - .../services/v1/ExperimentationService.java | 126 +++++++++++++++++++++ coselmar-rest/src/main/resources/mapping | 3 + 3 files changed, 129 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index fc6f9da..4ac8fb8 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,3 @@ /*/.settings /*/*.zargo~ coselmar-ui/src/main/webapp/version.txt - - -coselmar-ui/src/main/webapp/version.txt diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java new file mode 100644 index 0000000..76b3101 --- /dev/null +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/v1/ExperimentationService.java @@ -0,0 +1,126 @@ +package fr.ifremer.coselmar.services.v1; + +import com.google.common.base.Function; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Ordering; +import fr.ifremer.coselmar.beans.CloudWord; +import fr.ifremer.coselmar.persistence.entity.Question; +import fr.ifremer.coselmar.persistence.entity.Status; +import fr.ifremer.coselmar.services.CoselmarWebServiceSupport; +import fr.ifremer.coselmar.services.indexation.DocumentsIndexationService; +import fr.ifremer.coselmar.services.indexation.QuestionsIndexationService; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.nuiton.topia.persistence.TopiaNoResultException; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * @author ymartel (martel@codelutin.com) + */ +public class ExperimentationService extends CoselmarWebServiceSupport { + + private static final Log log = LogFactory.getLog(ExperimentationService.class); + + public List<String> evaluateTopWordsGeneration() { + Question question = getQuestionDao().forStatusEquals(Status.IN_PROGRESS).findAny(); + long start = System.currentTimeMillis(); + String questionId = getShortIdFromFull(question.getTopiaId()); + System.out.println("Question : " + questionId); + List<CloudWord> luceneTopWords = getLuceneTopWords(questionId); + long stop = System.currentTimeMillis(); + String luceneTiming = String.format("Recherche par Lucene : %d termes en %d ms", luceneTopWords.size(), stop - start); + start = System.currentTimeMillis(); + List<CloudWord> postgresTopWords = getPostgresTopWords(questionId); + stop = System.currentTimeMillis(); + String pgTiming = String.format("Recherche par Postgresql : %d termes en %d ms", postgresTopWords.size(), stop - start); + + return Lists.newArrayList(luceneTiming, pgTiming); + } + + public List<CloudWord> getLuceneTopWords(String questionId) { + + // Retrieve Question + String fullQuestionId = getFullIdFromShort(Question.class, questionId); + Question question = getQuestionDao().forTopiaIdEquals(fullQuestionId).findUnique(); + + List<CloudWord> topWords = new ArrayList<>(); + + QuestionsIndexationService questionsIndexationService = getServicesContext().newService(QuestionsIndexationService.class); + DocumentsIndexationService documentsIndexationService = getServicesContext().newService(DocumentsIndexationService.class); + try { + Map<String, Long> topQuestionsTerms = questionsIndexationService.getTopQuestionsTerms(Lists.newArrayList(questionId)); + List<String> shortDocumentIds = getShortDocumentIds(question); + Map<String, Long> topDocumentsTerms = documentsIndexationService.getTopDocumentsTerms(shortDocumentIds); + for (Map.Entry<String, Long> documentTermFreq : topDocumentsTerms.entrySet()) { + String term = documentTermFreq.getKey(); + Long frequence = documentTermFreq.getValue(); + if (topQuestionsTerms.containsKey(term)) { + topQuestionsTerms.put(term, topQuestionsTerms.get(term) + frequence); + } else { + topQuestionsTerms.put(term, frequence); + } + } + + for (Map.Entry<String, Long> termFreq : topQuestionsTerms.entrySet()) { + String term = termFreq.getKey(); + CloudWord cloudWord = new CloudWord(term, termFreq.getValue()); + topWords.add(cloudWord); + } + + } catch (IOException e) { + if (log.isErrorEnabled()) { + log.error("Unable to index new question", e); + } + } + + ImmutableList<CloudWord> cloudWords = ImmutableList.copyOf(Ordering.natural().onResultOf(new Function<CloudWord, Long>() { + public Long apply(CloudWord input) { + return input.getWeight(); + } + }).reverse().sortedCopy(topWords)); + + return cloudWords; + } + + public List<CloudWord> getPostgresTopWords(String questionId) { + + List<CloudWord> topWords; + if (getCoselmarServicesConfig().isPostgresqlDatabase()) { + try { + topWords = getQuestionDao().findTopWords(getFullIdFromShort(Question.class, questionId)); + } catch (TopiaNoResultException e) { + if (log.isErrorEnabled()) { + log.error("Try to find top words for non existing questionId" + questionId, e); + } + topWords = Collections.EMPTY_LIST; + } + } else { + topWords = Collections.EMPTY_LIST; + } + + return topWords; + } + + //////////////////////////////////////////////////////////////////////////// + /////////////////////// Internal Parts ///////////////////////////// + //////////////////////////////////////////////////////////////////////////// + + protected List<String> getShortDocumentIds(Question question) { + List<String> shortDocumentIds = new ArrayList<>(); + for (String relatedDocumentId : question.getRelatedDocumentsTopiaIds()) { + String shortIdFromFull = getShortIdFromFull(relatedDocumentId); + shortDocumentIds.add(shortIdFromFull); + } + for (String closingDocumentId : question.getClosingDocumentsTopiaIds()) { + String shortIdFromFull = getShortIdFromFull(closingDocumentId); + shortDocumentIds.add(shortIdFromFull); + } + return shortDocumentIds; + } +} diff --git a/coselmar-rest/src/main/resources/mapping b/coselmar-rest/src/main/resources/mapping index 154cae2..07fc121 100644 --- a/coselmar-rest/src/main/resources/mapping +++ b/coselmar-rest/src/main/resources/mapping @@ -79,6 +79,9 @@ GET /v1/general/topwords GeneralWebService.getTopWords # Admin API POST /v1/admin/lucene/index AdminWebService.refreshLuceneIndex POST /v1/admin/documents/zip DocumentsWebService.uploadZipDocuments +GET /v1/experimentation/topwords ExperimentationService.evaluateTopWordsGeneration +GET /v1/experimentation/lucenetopwords/{questionId} ExperimentationService.getLuceneTopWords +GET /v1/experimentation/pgtopwords/{questionId} ExperimentationService.getPostgresTopWords # Export -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.
participants (1)
-
codelutin.com scm