This is an automated email from the git hooks/post-receive script. New commit to branch develop in repository coselmar. See https://gitlab.nuiton.org/codelutin/coselmar.git commit c616ba1fb803d57b19b2c1dd0c5f1e0e62108b2b Author: Yannick Martel <martel@©odelutin.com> Date: Mon May 22 16:08:11 2017 +0200 refs #9197 use vectors on document fields indexation --- .../indexation/DocumentsIndexationService.java | 20 ++++++++--------- .../coselmar/services/indexation/LuceneUtils.java | 15 +++++++++++++ .../indexation/QuestionsIndexationService.java | 25 ++++++---------------- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java index 87c4b68..e218577 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/DocumentsIndexationService.java @@ -87,16 +87,16 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { String documentName = document.getName(); String documentSummary = document.getSummary(); - doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, documentName, Field.Store.YES)); + doc.add(new Field(DOCUMENT_NAME_INDEX_PROPERTY, documentName, LuceneUtils.TYPE_STORED)); if (StringUtils.isNotBlank(document.getAuthors())) { doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); } - doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, documentSummary, Field.Store.YES)); + doc.add(new Field(DOCUMENT_SUMMARY_INDEX_PROPERTY, documentSummary, LuceneUtils.TYPE_STORED)); doc.add(new Field("type", DOCUMENT_TYPE, TextField.TYPE_STORED)); // Cloud Tag management if (documentName.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { - doc.add(new TextField(DOCUMENT_NAME_CLOUD_TAG_PROPERTY, documentName.replaceAll("'", " "), Field.Store.YES)); + doc.add(new Field(DOCUMENT_NAME_CLOUD_TAG_PROPERTY, documentName.replaceAll("'", " "), LuceneUtils.TYPE_STORED)); } // if (documentSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { // doc.add(new TextField(DOCUMENT_SUMMARY_CLOUD_TAG_PROPERTY, documentSummary, Field.Store.YES)); @@ -109,7 +109,7 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { // Cloud Tag management if (keyword.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { - doc.add(new Field(DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY, keyword.replaceAll("'", " "), TextField.TYPE_STORED)); + doc.add(new Field(DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY, keyword.replaceAll("'", " "), LuceneUtils.TYPE_STORED)); } } } @@ -118,7 +118,7 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { try { File documentFile = new File(filepath); String parsedDocumentFile = getLuceneUtils().getTika().parseToString(documentFile); - doc.add(new Field(DOCUMENT_FILE_CONTENT_INDEX_PROPERTY, parsedDocumentFile, TextField.TYPE_STORED)); + doc.add(new Field(DOCUMENT_FILE_CONTENT_INDEX_PROPERTY, parsedDocumentFile, LuceneUtils.TYPE_STORED)); } catch (TikaException te) { if (log.isErrorEnabled()) { String message = String.format("Unable to index document '%s'", filepath); @@ -255,15 +255,15 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { String documentName = document.getName(); String documentSummary = document.getSummary(); - doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, documentName, Field.Store.YES)); + doc.add(new Field(DOCUMENT_NAME_INDEX_PROPERTY, documentName, LuceneUtils.TYPE_STORED)); if (StringUtils.isNotBlank(document.getAuthors())) { doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES)); } - doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, documentSummary, Field.Store.YES)); + doc.add(new Field(DOCUMENT_SUMMARY_INDEX_PROPERTY, documentSummary, LuceneUtils.TYPE_STORED)); // Cloud Tag management if (documentName.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { - doc.add(new TextField(DOCUMENT_NAME_CLOUD_TAG_PROPERTY, documentName.replaceAll("'", " "), Field.Store.YES)); + doc.add(new Field(DOCUMENT_NAME_CLOUD_TAG_PROPERTY, documentName.replaceAll("'", " "), LuceneUtils.TYPE_STORED)); } // if (documentSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { // doc.add(new TextField(DOCUMENT_SUMMARY_CLOUD_TAG_PROPERTY, documentSummary, Field.Store.YES)); @@ -276,7 +276,7 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { // Cloud Tag management if (keyword.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { - doc.add(new TextField(DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY, keyword.replaceAll("'", " "), Field.Store.YES)); + doc.add(new Field(DOCUMENT_KEYWORD_CLOUD_TAG_PROPERTY, keyword.replaceAll("'", " "), LuceneUtils.TYPE_STORED)); } } } @@ -288,7 +288,7 @@ public class DocumentsIndexationService extends CoselmarSimpleServiceSupport { File documentFile = new File(filepath); String parsedDocumentFile = getLuceneUtils().getTika().parseToString(documentFile); if (StringUtils.isNotBlank(parsedDocumentFile)) { - doc.add(new Field(DOCUMENT_FILE_CONTENT_INDEX_PROPERTY, parsedDocumentFile, TextField.TYPE_STORED)); + doc.add(new Field(DOCUMENT_FILE_CONTENT_INDEX_PROPERTY, parsedDocumentFile, LuceneUtils.TYPE_STORED)); } } catch (TikaException te) { if (log.isErrorEnabled()) { diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java index 43a3c43..26224cf 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/LuceneUtils.java @@ -33,6 +33,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; @@ -51,6 +53,19 @@ public class LuceneUtils { public IndexWriter indexWriter; protected Tika tika; + public static final FieldType TYPE_STORED = new FieldType(); + static { + TYPE_STORED.setOmitNorms(true); + TYPE_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + TYPE_STORED.setStored(true); + TYPE_STORED.setStoreTermVectors(true); + TYPE_STORED.setStoreTermVectorPositions(true); + TYPE_STORED.setStoreTermVectorOffsets(true); + TYPE_STORED.setStoreTermVectorPayloads(true); + TYPE_STORED.setTokenized(true); + TYPE_STORED.freeze(); + } + protected CoselmarServicesConfig servicesConfig; public LuceneUtils(CoselmarServicesConfig servicesConfig) { diff --git a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java index b5e92d3..f60ce70 100644 --- a/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java +++ b/coselmar-rest/src/main/java/fr/ifremer/coselmar/services/indexation/QuestionsIndexationService.java @@ -86,19 +86,6 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { protected static final String QUESTION_THEME_CLOUD_TAG_PROPERTY = "questionCloudTagTheme"; protected static final String DOCUMENT_TYPE = "questionindextype"; - public static final FieldType TYPE_STORED = new FieldType(); - static { - TYPE_STORED.setOmitNorms(true); - TYPE_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); - TYPE_STORED.setStored(true); - TYPE_STORED.setStoreTermVectors(true); - TYPE_STORED.setStoreTermVectorPositions(true); - TYPE_STORED.setStoreTermVectorOffsets(true); - TYPE_STORED.setStoreTermVectorPayloads(true); - TYPE_STORED.setTokenized(true); - TYPE_STORED.freeze(); - } - public void indexQuestion(QuestionBean question) throws IOException { // First : try to find if already exist to update it @@ -120,11 +107,11 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { doc.add(new StringField(QUESTION_ID_INDEX_PROPERTY, question.getId(), Field.Store.YES)); doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, questionTitle, Field.Store.YES)); - doc.add(new Field(QUESTION_SUMMARY_INDEX_PROPERTY, questionSummary, TYPE_STORED)); + doc.add(new Field(QUESTION_SUMMARY_INDEX_PROPERTY, questionSummary, LuceneUtils.TYPE_STORED)); // Cloud Tag management if (questionTitle.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { - doc.add(new Field(QUESTION_TITLE_CLOUD_TAG_PROPERTY, questionTitle.replaceAll("'", " "), TYPE_STORED)); + doc.add(new Field(QUESTION_TITLE_CLOUD_TAG_PROPERTY, questionTitle.replaceAll("'", " "), LuceneUtils.TYPE_STORED)); } // if (questionSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { // doc.add(new TextField(QUESTION_SUMMARY_CLOUD_TAG_PROPERTY, questionSummary, Field.Store.YES)); @@ -137,7 +124,7 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { // Cloud Tag management if (theme.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { - doc.add(new Field(QUESTION_THEME_CLOUD_TAG_PROPERTY, theme.replaceAll("'", " "), TYPE_STORED)); + doc.add(new Field(QUESTION_THEME_CLOUD_TAG_PROPERTY, theme.replaceAll("'", " "), LuceneUtils.TYPE_STORED)); } } } @@ -157,14 +144,14 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { doc.add(new StringField(QUESTION_ID_INDEX_PROPERTY, question.getId(), Field.Store.YES)); doc.add(new TextField(QUESTION_TITLE_INDEX_PROPERTY, questionTitle, Field.Store.YES)); - doc.add(new Field(QUESTION_SUMMARY_INDEX_PROPERTY, questionSummary, TYPE_STORED)); + doc.add(new Field(QUESTION_SUMMARY_INDEX_PROPERTY, questionSummary, LuceneUtils.TYPE_STORED)); doc.add(new TextField(QUESTION_STATUS_INDEX_PROPERTY, question.getStatus(), Field.Store.YES)); doc.add(new TextField(QUESTION_PRIVACY_INDEX_PROPERTY, question.getPrivacy(), Field.Store.YES)); // Cloud Tag management if (questionTitle.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { - doc.add(new Field(QUESTION_TITLE_CLOUD_TAG_PROPERTY, questionTitle.replaceAll("'", " "), TYPE_STORED)); + doc.add(new Field(QUESTION_TITLE_CLOUD_TAG_PROPERTY, questionTitle.replaceAll("'", " "), LuceneUtils.TYPE_STORED)); } // if (questionSummary.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { // doc.add(new TextField(QUESTION_SUMMARY_CLOUD_TAG_PROPERTY, questionSummary, Field.Store.YES)); @@ -177,7 +164,7 @@ public class QuestionsIndexationService extends CoselmarSimpleServiceSupport { // Cloud Tag management if (theme.length() >= TransverseIndexationService.CLOUD_TAG_WORD_MIN_SIZE) { - doc.add(new Field(QUESTION_THEME_CLOUD_TAG_PROPERTY, theme.replaceAll("'", " "), TYPE_STORED)); + doc.add(new Field(QUESTION_THEME_CLOUD_TAG_PROPERTY, theme.replaceAll("'", " "), LuceneUtils.TYPE_STORED)); } } } -- To stop receiving notification emails like this one, please contact codelutin.com SCM administrator <admin+scm@codelutin.com>.