Author: bpoussin Date: 2010-12-10 00:30:35 +0100 (Fri, 10 Dec 2010) New Revision: 588 Url: http://nuiton.org/repositories/revision/wikitty/588 Log: (un gros commit comme je ne les aimes pas :() Evolution #1137: create new field type binary (only generation is not done) Evolution #1141: Refactor solr code to store less field (currently 6 for 1 text field) Evolution #1142: Add documentation on solr indexation mecanisme Evolution #1143: Add mecanisme in jdbc to allow easy support of new database Change many string to constant value use BusinessHelper when we can to simplify code Added: trunk/wikitty-jdbc-impl/src/main/resources/wikitty-jdbc-query-mysql.properties trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/SolrUtil.java trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/package-info.java trunk/wikitty-solr-impl/src/main/resources/solrconfig-complete.xml Modified: trunk/pom.xml trunk/wikitty-api/pom.xml trunk/wikitty-api/src/main/java/org/nuiton/wikitty/WikittyConfig.java trunk/wikitty-api/src/main/java/org/nuiton/wikitty/WikittyUtil.java trunk/wikitty-api/src/main/java/org/nuiton/wikitty/addons/importexport/ImportExportCSV.java trunk/wikitty-api/src/main/java/org/nuiton/wikitty/addons/importexport/ImportExportXML.java trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/FieldType.java trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/Wikitty.java trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/WikittyCopyOnWrite.java trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/WikittyImpl.java trunk/wikitty-api/src/main/java/org/nuiton/wikitty/search/Search.java trunk/wikitty-api/src/main/java/org/nuiton/wikitty/services/WikittyServiceStorage.java trunk/wikitty-api/src/test/java/org/nuiton/wikitty/conform/StorageTest.java trunk/wikitty-jdbc-impl/src/main/java/org/nuiton/wikitty/jdbc/WikittyJDBCUtil.java trunk/wikitty-jdbc-impl/src/main/java/org/nuiton/wikitty/jdbc/WikittyStorageJDBC.java trunk/wikitty-jdbc-impl/src/main/resources/wikitty-jdbc-query.properties trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/Restriction2Solr.java trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/WikittySearchEngineSolr.java trunk/wikitty-solr-impl/src/main/resources/schema.xml Modified: trunk/pom.xml =================================================================== --- trunk/pom.xml 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/pom.xml 2010-12-09 23:30:35 UTC (rev 588) @@ -27,16 +27,25 @@ <module>wikitty-solr-impl</module> <module>wikitty-jdbc-impl</module> - <module>wikitty-ui-zk</module> <module>wikitty-hessian-client</module> <module>wikitty-hessian-server</module> <module>wikitty-perf-test</module> + <module>wikitty-publication</module> + </modules> <dependencyManagement> <dependencies> + <!-- base64 encoder/decoder used for binary type --> <dependency> + <groupId>net.iharder</groupId> + <artifactId>base64</artifactId> + <version>2.3.8</version> + </dependency> + + <!-- jetty servlet container used for hessian server --> + <dependency> <groupId>org.eclipse.jetty</groupId> <artifactId>jetty-server</artifactId> <version>${jettyVersion}</version> @@ -57,42 +66,7 @@ <version>${jettyVersion}</version> </dependency> - <!--dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - <version>4.8.1</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>commons-logging</groupId> - <artifactId>commons-logging</artifactId> - <version>1.1.1</version> - <scope>compile</scope> - </dependency> - - <dependency> - <groupId>log4j</groupId> - <artifactId>log4j</artifactId> - <version>1.2.16</version> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>commons-lang</groupId> - <artifactId>commons-lang</artifactId> - <version>2.5</version> - <scope>compile</scope> - </dependency> - - <dependency> - <groupId>commons-collections</groupId> - <artifactId>commons-collections</artifactId> - <version>3.2.1</version> - <scope>compile</scope> - </dependency--> - - <dependency> <groupId>commons-dbcp</groupId> <artifactId>commons-dbcp</artifactId> <version>1.4</version> @@ -585,6 +559,7 @@ <module>wikitty-jpa-impl</module> <module>wikitty-jms-impl</module> <module>wikitty-multistorage-impl</module> + <module>wikitty-ui-zk</module> </modules> </profile> </profiles> Modified: trunk/wikitty-api/pom.xml =================================================================== --- trunk/wikitty-api/pom.xml 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/pom.xml 2010-12-09 23:30:35 UTC (rev 588) @@ -41,7 +41,13 @@ </dependency> <!-- COMPILE --> + <!-- base64 encoder/decoder used for binary type --> <dependency> + <groupId>net.iharder</groupId> + <artifactId>base64</artifactId> + </dependency> + + <dependency> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> </dependency> Modified: trunk/wikitty-api/src/main/java/org/nuiton/wikitty/WikittyConfig.java =================================================================== --- trunk/wikitty-api/src/main/java/org/nuiton/wikitty/WikittyConfig.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/src/main/java/org/nuiton/wikitty/WikittyConfig.java 2010-12-09 23:30:35 UTC (rev 588) @@ -133,7 +133,9 @@ WIKITTY_STORAGE_JDBC_QUERY_FILE( "wikitty.storage.jdbc.queryfile", - _("JDBC query configuration file"), + _("JDBC query configuration file. You can put more than one file" + + " to load specific SQL statement for your database." + + " Exemple: wikitty-jdbc-query.properties,wikitty-jdbc-query-mysql.properties"), "wikitty-jdbc-query.properties", String.class, false, false), WIKITTY_STORAGE_JDBC_DRIVER( Modified: trunk/wikitty-api/src/main/java/org/nuiton/wikitty/WikittyUtil.java =================================================================== --- trunk/wikitty-api/src/main/java/org/nuiton/wikitty/WikittyUtil.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/src/main/java/org/nuiton/wikitty/WikittyUtil.java 2010-12-09 23:30:35 UTC (rev 588) @@ -25,8 +25,8 @@ package org.nuiton.wikitty; -import com.thoughtworks.xstream.converters.basic.DateConverter; import java.beans.PropertyDescriptor; +import java.io.IOException; import java.lang.reflect.Field; import java.lang.reflect.Method; import org.apache.commons.lang.StringUtils; @@ -60,6 +60,7 @@ import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; +import net.iharder.Base64; import org.apache.commons.beanutils.BeanUtilsBean; import org.apache.commons.beanutils.converters.DateTimeConverter; @@ -381,54 +382,37 @@ return result; } - // /** - // * - // * @param value null and empty string are casted to '0' int value. - // * @throws WikittyException on NumberFormatException or if value object - // can't be casted to int. - // */ - // static public int toInt(Object value) throws WikittyException { - // int result = 0; - // if (value == null || value.equals("") ) { - // result = 0; // default to 0 - // } else if (value instanceof Number) { - // result = ((Number) value).intValue(); - // } else { - // // try to convert to int - // try { - // result = Integer.parseInt(value.toString()); - // } catch (NumberFormatException eee) { - // throw new WikittyException(String.format( - // "Can't convert value '%s' to int", getClass(value)), eee); - // } - // } - // return result; - // } - // - // static public float toFloat(Object value) throws WikittyException { - // float result = 0; - // if (value == null) { - // result = 0; // default to 0 - // } else if (value instanceof Number) { - // result = ((Number) value).floatValue(); - // } else { - // // try to convert to float - // try { - // result = Float.parseFloat(value.toString()); - // } catch (NumberFormatException eee) { - // throw new WikittyException(String.format( - // "Can't convert value '%s' to float", getClass(value)), eee); - // } - // } - // return result; - // } + /** + * Get value as Binary. + * + * @param value null and empty return empty byte[] + * + * @return value as byte[] + */ + static public byte[] toBinary(Object value) { + byte[] result = null; + if (value == null || "".equals(value)) { + result = new byte[0]; // default to 0 + } else if (value instanceof String) { + try { + result = Base64.decode((String) value); + } catch (IOException eee) { + throw new WikittyException("Bad Base64 format", eee); + } + } else if (value instanceof byte[]) { + result = (byte[]) value; + } else { + result = value.toString().getBytes(); + } + return result; + } /** * Get value as BigDecimal. - * + * * @param value null and empty string are casted to '0' value. * @throws WikittyException on NumberFormatException or if value object can't be casted to number. - * + * * @return value as BigDecimal */ static public BigDecimal toBigDecimal(Object value) { @@ -565,29 +549,62 @@ } /** - * Convert object o for indexation. - * + * Convert object o for export CSV/XML. + * * @param field field description * @param o field value * @return solr representation */ - static public String toString(FieldType field, Object o) { + static public String toStringForExport(FieldType field, Object o) { String result = null; if (o != null) { + if (o instanceof String) { + result = (String) o; + } else { switch (field.getType()) { - case DATE: - // Date date = (Date)o; - result = (o instanceof String) ? (String) o - : WikittyUtil.solrDateFormat.format((Date) o); - break; - default: - result = WikittyUtil.toString(o); - break; + case BINARY: + result = Base64.encodeBytes((byte[]) o); + break; + case DATE: + // Date date = (Date)o; + result = WikittyUtil.solrDateFormat.format((Date) o); + break; + default: + result = WikittyUtil.toString(o); + break; + } } } return result; } + /** + * Convert object o for indexation + * + * @param field field description + * @param o field value + * @return solr representation + */ + static public String toStringForSearchEngine(FieldType field, Object o) { + String result = null; + if (o != null) { + switch (field.getType()) { + case BINARY: + // don't index binary + result = ""; + break; + case DATE: + // Date date = (Date)o; + result = (o instanceof String) ? (String) o + : WikittyUtil.solrDateFormat.format((Date) o); + break; + default: + result = WikittyUtil.toString(o); + break; + } + } + return result; + } /** * Convert string field representation to correct value type. @@ -599,18 +616,29 @@ static public Object fromString(FieldType field, String s) { Object result = null; switch (field.getType()) { - case BOOLEAN: - result = WikittyUtil.toBoolean(s); - break; - case DATE: - result = WikittyUtil.toDate(s); - break; - case NUMERIC: - result = WikittyUtil.toBigDecimal(s); - break; - default: - result = s; - break; + case BINARY: + if (s == null | "".equals(s)) { + result = new byte[0]; + } else { + try { + result = Base64.decode(s); + } catch (IOException eee) { + throw new WikittyException("Bad Base64 format", eee); + } + } + break; + case BOOLEAN: + result = WikittyUtil.toBoolean(s); + break; + case DATE: + result = WikittyUtil.toDate(s); + break; + case NUMERIC: + result = WikittyUtil.toBigDecimal(s); + break; + default: + result = s; + break; } return result; } Modified: trunk/wikitty-api/src/main/java/org/nuiton/wikitty/addons/importexport/ImportExportCSV.java =================================================================== --- trunk/wikitty-api/src/main/java/org/nuiton/wikitty/addons/importexport/ImportExportCSV.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/src/main/java/org/nuiton/wikitty/addons/importexport/ImportExportCSV.java 2010-12-09 23:30:35 UTC (rev 588) @@ -344,13 +344,13 @@ if (fqField != null) { String separator = ""; for (Object o : (Collection<?>) fqField) { - String fqFieldValue = WikittyUtil.toString(type, o); + String fqFieldValue = WikittyUtil.toStringForExport(type, o); currentField += separator + "(" + fqFieldValue + ")"; separator = ","; } } } else { - String fqFieldValue = WikittyUtil.toString(type, w.getFqField(fieldName)); + String fqFieldValue = WikittyUtil.toStringForExport(type, w.getFqField(fieldName)); currentField = fqFieldValue; } Modified: trunk/wikitty-api/src/main/java/org/nuiton/wikitty/addons/importexport/ImportExportXML.java =================================================================== --- trunk/wikitty-api/src/main/java/org/nuiton/wikitty/addons/importexport/ImportExportXML.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/src/main/java/org/nuiton/wikitty/addons/importexport/ImportExportXML.java 2010-12-09 23:30:35 UTC (rev 588) @@ -125,9 +125,8 @@ Map<String, String> tagValues = WikittyUtil.tagValuesToMap(CDATA); ext.setTagValues(tagValues); } else if (w != null) { - String[] fq = name.split("\\."); - String extensionName = fq[0]; - String fieldName = fq[1]; + String extensionName = WikittyExtension.extractExtensionName(name); + String fieldName = WikittyExtension.extractFieldName(name); FieldType fieldType = w.getFieldType(name); if (fieldType.isCollection()) { w.addToField(extensionName, fieldName, CDATA); @@ -177,7 +176,7 @@ Object fqField = w.getFqField(fieldName); if (fqField != null) { for (Object o : (Collection) fqField) { - String fqFieldValue = WikittyUtil.toString(type, o); + String fqFieldValue = WikittyUtil.toStringForExport(type, o); if (fqFieldValue != null) { fqFieldValue = StringEscapeUtils.escapeXml(fqFieldValue); result.write(" <" + fieldName + ">" + fqFieldValue + "</" + fieldName + ">\n"); @@ -185,7 +184,7 @@ } } } else { - String fqFieldValue = WikittyUtil.toString(type, w.getFqField(fieldName)); + String fqFieldValue = WikittyUtil.toStringForExport(type, w.getFqField(fieldName)); if (fqFieldValue != null) { fqFieldValue = StringEscapeUtils.escapeXml(fqFieldValue); result.write(" <" + fieldName + ">" + fqFieldValue + "</" + fieldName + ">\n"); Modified: trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/FieldType.java =================================================================== --- trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/FieldType.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/FieldType.java 2010-12-09 23:30:35 UTC (rev 588) @@ -53,7 +53,7 @@ static public String NOT_NULL = "notNull"; static public enum TYPE { - BOOLEAN, DATE, NUMERIC, STRING, WIKITTY; + BINARY, BOOLEAN, DATE, NUMERIC, STRING, WIKITTY; /** * convert string to TYPE, this method accept not trimed and not well @@ -144,6 +144,8 @@ protected Object getContainedValidObject( Object value ) { Object result = null; switch (type) { + case BINARY: + result = WikittyUtil.toBinary(value); break; case DATE: result = WikittyUtil.toDate(value); break; case NUMERIC: Modified: trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/Wikitty.java =================================================================== --- trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/Wikitty.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/Wikitty.java 2010-12-09 23:30:35 UTC (rev 588) @@ -122,6 +122,8 @@ Object getFieldAsObject(String ext, String fieldName); + byte[] getFieldAsBytes(String ext, String fieldName); + boolean getFieldAsBoolean(String ext, String fieldName); BigDecimal getFieldAsBigDecimal(String ext, String fieldName); Modified: trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/WikittyCopyOnWrite.java =================================================================== --- trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/WikittyCopyOnWrite.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/WikittyCopyOnWrite.java 2010-12-09 23:30:35 UTC (rev 588) @@ -234,6 +234,11 @@ } @Override + public byte[] getFieldAsBytes(String ext, String fieldName) { + return target.getFieldAsBytes(ext, fieldName); + } + + @Override public boolean getFieldAsBoolean(String ext, String fieldName) { return target.getFieldAsBoolean(ext, fieldName); } Modified: trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/WikittyImpl.java =================================================================== --- trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/WikittyImpl.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/src/main/java/org/nuiton/wikitty/entities/WikittyImpl.java 2010-12-09 23:30:35 UTC (rev 588) @@ -501,6 +501,22 @@ } /* + * @see org.nuiton.wikitty.Wikitty#getFieldAsString(java.lang.String, java.lang.String) + */ + @Override + public byte[] getFieldAsBytes(String ext, String fieldName) { + Object value = getFieldAsObject(ext, fieldName); + try { + byte[] result = WikittyUtil.toBinary(value); + return result; + } catch (WikittyException eee) { + throw new WikittyException(String.format( + "field '%s' is not a valid byte[]", + ext + "." + fieldName), eee); + } + } + + /* * @see org.nuiton.wikitty.Wikitty#getFieldAsBoolean(java.lang.String, java.lang.String) */ @Override Modified: trunk/wikitty-api/src/main/java/org/nuiton/wikitty/search/Search.java =================================================================== --- trunk/wikitty-api/src/main/java/org/nuiton/wikitty/search/Search.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/src/main/java/org/nuiton/wikitty/search/Search.java 2010-12-09 23:30:35 UTC (rev 588) @@ -86,11 +86,11 @@ if (type.isCollection()) { Collection<?> collection = (Collection<?>) value; for (Object o : collection) { - String strValue = WikittyUtil.toString(type, o); + String strValue = WikittyUtil.toStringForSearchEngine(type, o); result.eq(fqfieldName, strValue); } } else { - String strValue = WikittyUtil.toString(type, value); + String strValue = WikittyUtil.toStringForSearchEngine(type, value); result.eq(fqfieldName, strValue); } } Modified: trunk/wikitty-api/src/main/java/org/nuiton/wikitty/services/WikittyServiceStorage.java =================================================================== --- trunk/wikitty-api/src/main/java/org/nuiton/wikitty/services/WikittyServiceStorage.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/src/main/java/org/nuiton/wikitty/services/WikittyServiceStorage.java 2010-12-09 23:30:35 UTC (rev 588) @@ -161,7 +161,7 @@ if (type.isNotNull()) { if (null == w.getFieldAsObject(ext.getName(), fieldName)) { throw new WikittyException(String.format( - "Field %s must not be null", fieldName)); + "Field '%s' must not be null", fieldName)); } } } Modified: trunk/wikitty-api/src/test/java/org/nuiton/wikitty/conform/StorageTest.java =================================================================== --- trunk/wikitty-api/src/test/java/org/nuiton/wikitty/conform/StorageTest.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-api/src/test/java/org/nuiton/wikitty/conform/StorageTest.java 2010-12-09 23:30:35 UTC (rev 588) @@ -61,6 +61,8 @@ import org.nuiton.wikitty.services.WikittyServiceEnhanced; import org.nuiton.wikitty.services.WikittyEvent; import org.nuiton.wikitty.WikittyUtil; +import org.nuiton.wikitty.entities.FieldFactory; +import org.nuiton.wikitty.entities.WikittyTreeNodeHelper; import org.nuiton.wikitty.search.operators.Element; import org.nuiton.wikitty.search.Search; @@ -626,7 +628,8 @@ // store 1 Wikitty myWikitty = new WikittyImpl(); myWikitty.addExtension(WikittyTreeNodeImpl.extensions); - myWikitty.setField(WikittyTreeNode.EXT_WIKITTYTREENODE, WikittyTreeNode.FIELD_WIKITTYTREENODE_NAME, "name"); + WikittyTreeNodeHelper.setName(myWikitty, "name"); + ws.store(null, Collections.singletonList(myWikitty), false); // delete @@ -641,4 +644,29 @@ WikittyServiceEnhanced.restore(ws, null, myWikitty.getId()); Assert.assertNotNull(restoredWikitty); } + + @Test + public void testBinaryFieldStorage() { + String extName = "BinaryExt"; + byte[] bytes = "Coucou le monde".getBytes(); + + WikittyExtension BinaryExt = new WikittyExtension(extName, + "1.0", // version + null, + WikittyUtil.buildFieldMapExtension( // building field map + "String name unique=\"true\"", + "Binary content")); + Wikitty w = new WikittyImpl(); + w.addExtension(BinaryExt); + w.setField(extName, "name", "LeBin"); + w.setField(extName, "content", bytes); + + ws.store(null, Collections.singletonList(w), false); + + Wikitty restoredWikitty = + WikittyServiceEnhanced.restore(ws, null, w.getId()); + Assert.assertNotNull(restoredWikitty); + Assert.assertEquals("LeBin", restoredWikitty.getFieldAsString(extName, "name")); + Assert.assertEquals(bytes, restoredWikitty.getFieldAsBytes(extName, "content")); + } } Modified: trunk/wikitty-jdbc-impl/src/main/java/org/nuiton/wikitty/jdbc/WikittyJDBCUtil.java =================================================================== --- trunk/wikitty-jdbc-impl/src/main/java/org/nuiton/wikitty/jdbc/WikittyJDBCUtil.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-jdbc-impl/src/main/java/org/nuiton/wikitty/jdbc/WikittyJDBCUtil.java 2010-12-09 23:30:35 UTC (rev 588) @@ -83,6 +83,8 @@ /** field type column in the extension_data table */ static final public String COL_FIELDTYPE = "fieldType"; /** boolean value column in the wikitty_data table */ + static final public String COL_BINARY_VALUE = "binaryValue"; + /** boolean value column in the wikitty_data table */ static final public String COL_BOOLEAN_VALUE = "booleanValue"; /** number value column in the wikitty_data table */ static final public String COL_NUMBER_VALUE = "numberValue"; @@ -109,9 +111,16 @@ "jdbc.queries.creation.wikitty.admin.test"; static final public String QUERY_CREATION_WIKITTY_ADMIN = "jdbc.queries.creation.wikitty.admin"; - /** wikitty_admin table creation query property name */ + /** wikitty_data column binary test exits query property name */ + static final public String QUERY_CREATION_WIKITTY_DATA_TEST_BINARY = + "jdbc.queries.creation.wikitty.data.test.binary"; + /** wikitty_data column binary creation with alter query property name */ + static final public String QUERY_CREATION_WIKITTY_DATA_ALTER_BINARY = + "jdbc.queries.creation.wikitty.data.alter.binary"; + /** wikitty_data table test exists query property name */ static final public String QUERY_CREATION_WIKITTY_DATA_TEST = "jdbc.queries.creation.wikitty.data.test"; + /** wikitty_data table creation query property name */ static final public String QUERY_CREATION_WIKITTY_DATA = "jdbc.queries.creation.wikitty.data"; /** insertion in the admin table query property name */ @@ -162,40 +171,49 @@ /** - * Loads the properties in the {@code wikitty-jdbc-config.properties} file. + * Loads the properties from configuration file, one or more properties + * can be load default load {@code wikitty-jdbc-config.properties} file. * * @param properties custom properties to override default configuration - * @return the properties for the connection and the queries + * @return the properties for the queries */ public static synchronized Properties loadQuery(ApplicationConfig config) { - Properties result = new Properties(); - - InputStream streamQuery = null; - try { + Properties result = null; - String wikittyQueryFile = config.getOption( - WikittyConfig.WikittyOption.WIKITTY_STORAGE_JDBC_QUERY_FILE.getKey()); - // queries - URL url = ClassLoader.getSystemResource(wikittyQueryFile); - if (url == null) { - ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); - url = contextClassLoader.getResource(wikittyQueryFile); + // list of all properties file to load + String wikittyQueryFileList = config.getOption( + WikittyConfig.WikittyOption.WIKITTY_STORAGE_JDBC_QUERY_FILE.getKey()); + + String[] wikittyQueryFiles = wikittyQueryFileList.split(","); + + for (String file : wikittyQueryFiles) { + // create new Properties with result as parent + result = new Properties(result); + InputStream streamQuery = null; + try { + // queries + URL url = ClassLoader.getSystemResource(file); + if (url == null) { + ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); + url = contextClassLoader.getResource(file); + } + + if (log.isInfoEnabled()) { + log.info("Reading resource from: " + url); + } + // url can't be null + streamQuery = url.openStream(); + result.load(streamQuery); + } catch (IOException eee) { + throw new WikittyException(String.format( + "Unable to load property file '%s'", + file), eee); + } finally { + IOUtils.closeQuietly(streamQuery); } - - if (log.isInfoEnabled()) { - log.info("Reading resource from: " + url); - } - // url can't be null - streamQuery = url.openStream(); - result.load(streamQuery); - - } catch (IOException eee) { - throw new WikittyException("Unable to load property file", eee); } - finally { - IOUtils.closeQuietly(streamQuery); - } + return result; } Modified: trunk/wikitty-jdbc-impl/src/main/java/org/nuiton/wikitty/jdbc/WikittyStorageJDBC.java =================================================================== --- trunk/wikitty-jdbc-impl/src/main/java/org/nuiton/wikitty/jdbc/WikittyStorageJDBC.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-jdbc-impl/src/main/java/org/nuiton/wikitty/jdbc/WikittyStorageJDBC.java 2010-12-09 23:30:35 UTC (rev 588) @@ -24,6 +24,9 @@ */ package org.nuiton.wikitty.jdbc; +import java.io.IOException; +import java.io.InputStream; +import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.COL_BINARY_VALUE; import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.COL_BOOLEAN_VALUE; import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.COL_DATE_VALUE; import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.COL_DELETION_DATE; @@ -36,6 +39,8 @@ import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.QUERY_CLEAR_WIKITTY; import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.QUERY_CREATION_WIKITTY_ADMIN; import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.QUERY_CREATION_WIKITTY_ADMIN_TEST; +import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.QUERY_CREATION_WIKITTY_DATA_ALTER_BINARY; +import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.QUERY_CREATION_WIKITTY_DATA_TEST_BINARY; import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.QUERY_CREATION_WIKITTY_DATA; import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.QUERY_CREATION_WIKITTY_DATA_TEST; import static org.nuiton.wikitty.jdbc.WikittyJDBCUtil.QUERY_DELETE_WIKITTY_ADMIN; @@ -67,6 +72,7 @@ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -109,8 +115,18 @@ public WikittyStorageJDBC(ApplicationConfig config, WikittyExtensionStorage extensionStorage) { this.config = config; this.extensionStorage = extensionStorage; - jdbcQuery = WikittyJDBCUtil.loadQuery(config); + checkTableOrCreation(); + + // all time use alter after creation for binaryValue column because + // this datatype is not portable + checkColumnBinaryOrAlter(); + } + + /** + * test table existance or create them if necessary + */ + protected void checkTableOrCreation() { Connection connectionTest = WikittyJDBCUtil.getJDBCConnection(config); try { // If test of existance work, no exception and do nothing @@ -139,9 +155,47 @@ } } + /** + * Add binary column if necessary + * If add can be done, wikitty work for all, except binary type + */ + protected void checkColumnBinaryOrAlter() { + Connection connectionTest = WikittyJDBCUtil.getJDBCConnection(config); + try { + // If test of existance work, no exception and do nothing + // if exception try to create databse + Statement statementTest = connectionTest.createStatement(); + statementTest.execute(jdbcQuery.getProperty(QUERY_CREATION_WIKITTY_DATA_TEST_BINARY)); + } catch (SQLException silentError) { + if (log.isInfoEnabled()) { + log.info("try to alter wikitty database to add binary column"); + } + Connection connection = WikittyJDBCUtil.getConnection(config); + try { + Statement statement = connection.createStatement(); + statement.execute(jdbcQuery.getProperty(QUERY_CREATION_WIKITTY_DATA_ALTER_BINARY)); + WikittyJDBCUtil.commitJDBCConnection(connection); + } catch (SQLException eee) { + WikittyJDBCUtil.rollbackJDBCConnection(connection); + // no exception just log fatal, wikitty can work without this + // column but can't store binary. If binary is not used there is + // no probleme + log.fatal("Can add column to store binary field. You can't use binary", eee); +// throw new WikittyException("Can't create table for wikitty storage", eee); + } finally { + WikittyJDBCUtil.closeQuietly(connection); + } + } finally { + WikittyJDBCUtil.closeQuietly(connectionTest); + } + } + protected String getColName(FieldType.TYPE type) { String result; switch(type) { + case BINARY: + result = COL_BINARY_VALUE; + break; case BOOLEAN: result = COL_BOOLEAN_VALUE; break; @@ -466,6 +520,14 @@ FieldType type = result.getFieldType(fqfieldName); Object value = null; switch (type.getType()) { + case BINARY: + InputStream blob = resultSet.getBinaryStream(COL_BINARY_VALUE); + try { + value = IOUtils.toByteArray(blob); + } catch (IOException eee) { + throw new WikittyException("Can't read blob stream for database", eee); + } + break; case BOOLEAN: value = resultSet.getBoolean(COL_BOOLEAN_VALUE); break; Added: trunk/wikitty-jdbc-impl/src/main/resources/wikitty-jdbc-query-mysql.properties =================================================================== --- trunk/wikitty-jdbc-impl/src/main/resources/wikitty-jdbc-query-mysql.properties (rev 0) +++ trunk/wikitty-jdbc-impl/src/main/resources/wikitty-jdbc-query-mysql.properties 2010-12-09 23:30:35 UTC (rev 588) @@ -0,0 +1,31 @@ +### +# #%L +# Wikitty :: wikitty-jdbc-impl +# +# $Id$ +# $HeadURL$ +# %% +# Copyright (C) 2010 CodeLutin, Benjamin Poussin +# %% +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Lesser Public License for more details. +# +# You should have received a copy of the GNU General Lesser Public +# License along with this program. If not, see +# <http://www.gnu.org/licenses/lgpl-3.0.html>. +# #L% +### +# +# ce fichier surcharge la requete d'ajout de la column binaryValue pour mysql +# il doit etre charge apres le fichier par defaut dans la sequence de fichier +# + +#table wikitty_data creation query +jdbc.queries.creation.wikitty.data.alter.binary=ALTER TABLE wikitty_data ADD COLUMN binaryValue blob; Modified: trunk/wikitty-jdbc-impl/src/main/resources/wikitty-jdbc-query.properties =================================================================== --- trunk/wikitty-jdbc-impl/src/main/resources/wikitty-jdbc-query.properties 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-jdbc-impl/src/main/resources/wikitty-jdbc-query.properties 2010-12-09 23:30:35 UTC (rev 588) @@ -22,6 +22,14 @@ # <http://www.gnu.org/licenses/lgpl-3.0.html>. # #L% ### +# +# Ce fichier essaie d'utiliser du sql portable d'une base \u00e0 une autre +# Il est compatible au moins avec les bases: +# - h2 (http://www.h2database.com) +# - PostgreSQL (http://www.postgresql.fr) +# - MySQL (http://www.mysql.com) excepter l'ajout de la colonne binaryValue (a cause du bytea) +# + #table extension_admin creation query jdbc.queries.creation.extension.admin.test=SELECT * FROM extension_admin LIMIT 1; jdbc.queries.creation.extension.admin=CREATE TABLE extension_admin (\ @@ -51,6 +59,9 @@ PRIMARY KEY (id)); #table wikitty_data creation query +# all time use alter after creation for binaryValue column because this datatype is not portable +jdbc.queries.creation.wikitty.data.test.binary=SELECT binaryValue FROM wikitty_data LIMIT 1; +jdbc.queries.creation.wikitty.data.alter.binary=ALTER TABLE wikitty_data ADD COLUMN binaryValue bytea; jdbc.queries.creation.wikitty.data.test=SELECT * FROM wikitty_data LIMIT 1; jdbc.queries.creation.wikitty.data=CREATE TABLE wikitty_data (\ id varchar(64) NOT NULL,\ Modified: trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/Restriction2Solr.java =================================================================== --- trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/Restriction2Solr.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/Restriction2Solr.java 2010-12-09 23:30:35 UTC (rev 588) @@ -276,13 +276,13 @@ private String like2solr(Like like) throws WikittyException { SearchAs searchAs = like.getSearchAs(); String element2solr = element2solr(like.getElement()); - if(element2solr.endsWith("_s")) { // is string + if(element2solr.endsWith(WikittySearchEngineSolr.SUFFIX_STRING)) { // is string switch(searchAs) { case AsText: - element2solr += "_t"; + element2solr += WikittySearchEngineSolr.SUFFIX_STRING_FULLTEXT; break; case ToLowerCase: - element2solr += "_c"; + element2solr += WikittySearchEngineSolr.SUFFIX_STRING_LOWERCASE; break; } } @@ -290,7 +290,7 @@ // Warning if you need add searchAs, AsText and ToLowerCase need search // at lowercase String value2solr = value2solr(like.getValue()); - if(!element2solr.endsWith("_dt")) { // is not date + if(!element2solr.endsWith(WikittySearchEngineSolr.SUFFIX_DATE)) { // is not date value2solr = value2solr.toLowerCase(); } @@ -300,13 +300,13 @@ private String unlike2solr(Unlike unlike) throws WikittyException { SearchAs searchAs = unlike.getSearchAs(); String element2solr = element2solr(unlike.getElement()); - if(element2solr.endsWith("_s")) { // is string + if(element2solr.endsWith(WikittySearchEngineSolr.SUFFIX_STRING)) { // is string switch(searchAs) { case AsText: - element2solr += "_t"; + element2solr += WikittySearchEngineSolr.SUFFIX_STRING_FULLTEXT; break; case ToLowerCase: - element2solr += "_c"; + element2solr += WikittySearchEngineSolr.SUFFIX_STRING_LOWERCASE; break; } } @@ -314,7 +314,7 @@ // Warning if you need add searchAs, AsText and ToLowerCase need search // at lowercase String value2solr = value2solr(unlike.getValue()); - if(!element2solr.endsWith("_dt")) { // is not date + if(!element2solr.endsWith(WikittySearchEngineSolr.SUFFIX_DATE)) { // is not date value2solr = value2solr.toLowerCase(); } Added: trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/SolrUtil.java =================================================================== --- trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/SolrUtil.java (rev 0) +++ trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/SolrUtil.java 2010-12-09 23:30:35 UTC (rev 588) @@ -0,0 +1,78 @@ +package org.nuiton.wikitty.solr; + + +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrInputDocument; + +/** + * + * @author poussin + * @version $Revision$ + * + * Last update: $Date$ + * by : $Author$ + */ +public class SolrUtil { + + /** to use log facility, just put in your code: log.info(\"...\"); */ + static private Log log = LogFactory.getLog(SolrUtil.class); + + /** + * copy all field of source in new document. + * If include is true copy only field specified in fields + * if include is false copy all field except field in fields. + * + * example: + * if doc contains field: abc, aabbcc, aaabbbccc, toto + * copySolrDocument(doc, true, "aa.*", ".*bbb.*") + * field copied are: aabbcc, aaabbbccc + * + * copySolrDocument(doc, false, "aa.*", ".*bbb.*") + * field copied are: abc, toto + * + * @param source + * @param include + * @param fields + * @return + */ + static public SolrInputDocument copySolrDocument( + SolrDocument source, boolean include, String... fields) { + SolrInputDocument result = new SolrInputDocument(); + Collection<String> fieldNames = source.getFieldNames(); + + Set<String> fieldToCopy = new HashSet<String>(); + if (include) { + for (String fieldName : fieldNames) { + for (String fieldRegexp : fields) { + if (fieldName.matches(fieldRegexp)) { + fieldToCopy.add(fieldName); + } + } + } + } else { // exclude + fieldToCopy.addAll(fieldNames); + for (String fieldName : fieldNames) { + for (String fieldRegexp : fields) { + if (fieldName.matches(fieldRegexp)) { + fieldToCopy.remove(fieldName); + } + } + } + } + + for (String fieldName : fieldToCopy) { + Collection<Object> fieldValues = source.getFieldValues(fieldName); + for (Object fieldValue : fieldValues) { + result.addField(fieldName, fieldValue); + } + } + return result; + } +} Modified: trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/WikittySearchEngineSolr.java =================================================================== --- trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/WikittySearchEngineSolr.java 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/WikittySearchEngineSolr.java 2010-12-09 23:30:35 UTC (rev 588) @@ -69,8 +69,10 @@ import com.arjuna.ats.arjuna.state.OutputObjectState; import com.arjuna.ats.internal.arjuna.abstractrecords.LastResourceRecord; import java.io.File; +import java.util.regex.Pattern; import org.nuiton.util.ApplicationConfig; import org.nuiton.wikitty.WikittyConfig; +import org.nuiton.wikitty.WikittyUtil; import org.nuiton.wikitty.entities.WikittyTreeNodeHelper; /** @@ -107,6 +109,31 @@ static final public String TREENODE_ROOT = TREENODE_PREFIX + "root"; static final public String TREENODE_PATH = TREENODE_PREFIX + "path"; + static final public String SUFFIX_BINARY = "_bi"; + static final public String SUFFIX_BOOLEAN = "_b"; + static final public String SUFFIX_DATE = "_dt"; + static final public String SUFFIX_STRING = "_s"; + static final public String SUFFIX_NUMERIC = "_d"; + static final public String SUFFIX_WIKITTY = "_w"; // not used yet + static final public String SUFFIX_STRING_LOWERCASE = "_c"; + static final public String SUFFIX_STRING_FULLTEXT = "_t"; + + static final public String[] fieldNotToCopyPattern = { + Pattern.quote(TREENODE_PREFIX) + ".*" + }; + + static final public String[] fieldToCopyPattern = { + // match: id, extensions, not_null_fields + SOLR_ID, SOLR_EXTENSIONS, SOLR_NOT_NULL_FIELDS, + // match: "(?!(all\.)).*_bi" accept ce qui fini par _bi sauf si ca commence par "all." + "(?!(" + SOLR_ALL_EXTENSIONS + "\\.)).*" + SUFFIX_BINARY, + "(?!(" + SOLR_ALL_EXTENSIONS + "\\.)).*" + SUFFIX_BOOLEAN, + "(?!(" + SOLR_ALL_EXTENSIONS + "\\.)).*" + SUFFIX_DATE, + "(?!(" + SOLR_ALL_EXTENSIONS + "\\.)).*" + SUFFIX_NUMERIC, + "(?!(" + SOLR_ALL_EXTENSIONS + "\\.)).*" + SUFFIX_STRING, + "(?!(" + SOLR_ALL_EXTENSIONS + "\\.)).*" + SUFFIX_WIKITTY + }; + /** use to permit client to modify fieldname during query generation */ static public interface FieldModifier { public String convertToSolr(WikittyTransaction transaction, String fieldname); @@ -122,7 +149,7 @@ @Override public String convertToSolr(WikittyTransaction transaction, String fqfieldname) { String result = fqfieldname; - String[] searchField = fqfieldname.split("\\."); + String[] searchField = fqfieldname.split(WikittyUtil.FQ_FIELD_NAME_SEPARATOR_REGEX); if (Element.ELT_EXTENSION.equals(fqfieldname)) { result = SOLR_EXTENSIONS; @@ -135,10 +162,12 @@ String fieldName = searchField[1]; if (Criteria.ALL_EXTENSIONS.equals(extName)) { - fqfieldname = SOLR_ALL_EXTENSIONS + "." + fieldName; + fqfieldname = SOLR_ALL_EXTENSIONS + + WikittyUtil.FQ_FIELD_NAME_SEPARATOR + fieldName; } if (searchField.length >= 3) { + // TODO poussin 20101209 je ne vois pas dans quel cas on passe ici String fieldNameType = searchField[2]; TYPE type = FieldType.TYPE.valueOf(fieldNameType); result = WikittySearchEngineSolr.getSolrFieldName(fqfieldname, type); @@ -162,10 +191,21 @@ return result; } + /** + * if you change this, change + * {@link WikittySearchEngineSolr#getSolrFieldName(java.lang.String, org.nuiton.wikitty.entities.FieldType.TYPE)} + * too + */ @Override public String convertToField(WikittyTransaction transaction, String solrName) { - String fieldName = solrName.replaceAll("(_b$)|(_dt$)|(_s$)|(_d$)", ""); - if(SOLR_EXTENSIONS.equals(fieldName)) { + String fieldName = solrName.replaceAll( + "(" + SUFFIX_BINARY + "$)" + + "|(" + SUFFIX_BOOLEAN + "$)" + + "|(" + SUFFIX_DATE + "$)" + + "|(" + SUFFIX_STRING + "$)" + + "|(" + SUFFIX_WIKITTY + "$)" + + "|(" + SUFFIX_NUMERIC + "$)", ""); + if (SOLR_EXTENSIONS.equals(fieldName)) { fieldName = Element.ELT_EXTENSION; } return fieldName; @@ -292,22 +332,11 @@ // Get documents SolrInputDocument doc = solrResource.getAddedDoc(id); if(doc == null) { - doc = new SolrInputDocument(); - // Copy old field value SolrDocument found = findById(solrServer, id); if (found != null) { - Collection<String> fieldNames = found.getFieldNames(); - for (String fieldName : fieldNames) { - Collection<Object> fieldValues = found.getFieldValues(fieldName); - - if(!fieldName.startsWith(TREENODE_PREFIX)) { - for (Object fieldValue : fieldValues) { - doc.addField(fieldName, fieldValue); - } - } - } - + // FIXME poussin 20101209 a finir en ne copiant que le necessaire voir javadoc du package + doc = SolrUtil.copySolrDocument(found, false, fieldNotToCopyPattern); solrResource.addDoc(id, doc); } else { if (log.isWarnEnabled()) { @@ -639,7 +668,8 @@ query.setStart(firstIndex); int nbRows; if (endIndex == -1) { - // WARNING It is necessary to substract 'start' otherwise, there is a capacity overlow in solR + // WARNING It is necessary to substract 'start' otherwise, + // there is a capacity overlow in solR nbRows = Integer.MAX_VALUE - firstIndex; } else { nbRows = endIndex - firstIndex + 1; @@ -752,7 +782,7 @@ public Integer findNodeCount(WikittyTransaction transaction, Wikitty w, Criteria filter) { String wikittyId = w.getId(); - String parent = w.getFieldAsWikitty(WikittyTreeNode.EXT_WIKITTYTREENODE, WikittyTreeNode.FIELD_WIKITTYTREENODE_PARENT); + String parent = WikittyTreeNodeHelper.getParent(w); if(parent == null) { parent = TREENODE_ROOT; } else { @@ -773,7 +803,7 @@ public Map<String, Integer> findAllChildrenCount(WikittyTransaction transaction, Wikitty w, Criteria filter) { String wikittyId = w.getId(); - String parent = w.getFieldAsWikitty(WikittyTreeNode.EXT_WIKITTYTREENODE, WikittyTreeNode.FIELD_WIKITTYTREENODE_PARENT); + String parent = WikittyTreeNodeHelper.getParent(w); if(parent == null) { parent = TREENODE_ROOT; } else { @@ -823,11 +853,39 @@ * extensions : extensionNames * fieldName : fieldValue * + * FIXME poussin 20101209 beaucoup trop de champs redondant sauver. + * + * Une champs de type chaine est indexer 7 fois :(. _s et _s_c sont indexe + * exactement de la meme facon sauf qu'avant l'envoi a solr _s_c en mis en + * minuscule (ce qui devrait etre le role de solr en mettant la bonne config) + * _s et _s_c sont indexer en string et _s_t est indexe en text. + * + * string est indexer sans traitement + * text est indexer apres traitement + * (ce qui permet lors de la recherche des ecarts. ex: wifi matchera wi-fi) + * + * _s_c et _s_t ne sont utilise que pour + * {@link Restriction2Solr#like2solr}{@link Restriction2Solr#unlike2solr} + * n'y a-t-il pas moyen d'utiliser un champs deja indexe ? + * + * On a aujourd'hui: + * <li> text (qui rassemble tous les champs et est le champs de recherche par defaut) + * <li> ext.field_s (verbatime) + * <li> ext.field_s_c (verbatime lowercase) + * <li> ext.field_s_t (travaille) + * <li> all.field_s (verbatime) + * <li> all.field_s_c (verbatime lowercase) + * <li> all.field_s_t (travaille) + * + * et surtout ils sont tous stored :( + * * @param w all wikitties object to index * @return solrInputDocument used to modify index */ protected SolrInputDocument createIndexDocument(Wikitty w) { - log.debug("index wikitty " + w.getId()); + if (log.isDebugEnabled()) { + log.debug("index wikitty " + w.getId()); + } SolrInputDocument doc = new SolrInputDocument(); String id = w.getId(); @@ -842,8 +900,9 @@ TYPE type = fieldType.getType(); String solrFqFieldName = getSolrFieldName(fqfieldName, type); - String[] solrFieldName = solrFqFieldName.split("\\."); - String solrAllFieldName = SOLR_ALL_EXTENSIONS + "." + solrFieldName[1]; + String solrAllFieldName = SOLR_ALL_EXTENSIONS + + WikittyUtil.FQ_FIELD_NAME_SEPARATOR + + WikittyUtil.getFieldNameFromFQFieldName(solrFqFieldName); Object objectValue = w.getFqField(fqfieldName); if(objectValue != null) { @@ -856,11 +915,11 @@ // Store string field in differents styles if(type == TYPE.STRING) { - doc.addField(solrFqFieldName + "_t", itemValue); - doc.addField(solrAllFieldName + "_t", itemValue); + doc.addField(solrFqFieldName + SUFFIX_STRING_FULLTEXT, itemValue); + doc.addField(solrAllFieldName + SUFFIX_STRING_FULLTEXT, itemValue); String itemValueLowerCase = itemValue.toString().toLowerCase(); - doc.addField(solrFqFieldName + "_c", itemValueLowerCase); - doc.addField(solrAllFieldName + "_c", itemValueLowerCase); + doc.addField(solrFqFieldName + SUFFIX_STRING_LOWERCASE, itemValueLowerCase); + doc.addField(solrAllFieldName + SUFFIX_STRING_LOWERCASE, itemValueLowerCase); } doc.addField(SOLR_NOT_NULL_FIELDS, fqfieldName); @@ -873,15 +932,17 @@ // Store string field in differents styles if(type == TYPE.STRING) { - doc.addField(solrFqFieldName + "_t", objectValue); - doc.addField(solrAllFieldName + "_t", objectValue); + doc.addField(solrFqFieldName + SUFFIX_STRING_FULLTEXT, objectValue); + doc.addField(solrAllFieldName + SUFFIX_STRING_FULLTEXT, objectValue); String objectValueLowerCase = objectValue.toString().toLowerCase(); - doc.addField(solrFqFieldName + "_c", objectValueLowerCase); - doc.addField(solrAllFieldName + "_c", objectValueLowerCase); + doc.addField(solrFqFieldName + SUFFIX_STRING_LOWERCASE, objectValueLowerCase); + doc.addField(solrAllFieldName + SUFFIX_STRING_LOWERCASE, objectValueLowerCase); } doc.addField(SOLR_NOT_NULL_FIELDS, fqfieldName); - log.debug("index field " + solrFqFieldName + " with value '" + objectValue + "'"); + if (log.isDebugEnabled()) { + log.debug("index field " + solrFqFieldName + " with value '" + objectValue + "'"); + } } } } @@ -909,16 +970,28 @@ return null; } + /** + * if you change this method, change + * {@link TypeFieldModifer#convertToField(org.nuiton.wikitty.services.WikittyTransaction, java.lang.String)} + * too + * + * @param fqfieldName + * @param type + * @return + */ public static String getSolrFieldName(String fqfieldName, TYPE type) { switch (type) { + case BINARY: + return fqfieldName + SUFFIX_BINARY; case BOOLEAN: - return fqfieldName + "_b"; + return fqfieldName + SUFFIX_BOOLEAN; case DATE: - return fqfieldName + "_dt"; + return fqfieldName + SUFFIX_DATE; case STRING: - return fqfieldName + "_s"; + return fqfieldName + SUFFIX_STRING; case NUMERIC: - return fqfieldName + "_d"; + return fqfieldName + SUFFIX_NUMERIC; + // FIXME poussin 20101209 pourquoi ne pas mettre explicitement un suffix pour le type WIKITTY ? default: return fqfieldName; } Added: trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/package-info.java =================================================================== --- trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/package-info.java (rev 0) +++ trunk/wikitty-solr-impl/src/main/java/org/nuiton/wikitty/solr/package-info.java 2010-12-09 23:30:35 UTC (rev 588) @@ -0,0 +1,130 @@ +/** + * <h1>Indexation</h1> + * + * chaque type de champs est suffixe par un marqueur + * + * <table> + * <tr> + * <th>Type wikitty</th><th>Suffixe</th><th>Type d'indexation</th><th>valeur</th><th>stored</th><th>multiValued</th> + * </tr> + * <tr> + * <td>{@link org.nuiton.wikitty.entities.FieldType.TYPE#BINARY}</td><td>_bi {@link WikittySearchEngineSolr#SUFFIX_BINARY}</td><td>aucun</td><td>vide</td><td>non</td><td>true</td> + * </tr> + * <tr> + * <td>{@link org.nuiton.wikitty.entities.FieldType.TYPE#BOOLEAN}</td><td>_b {@link WikittySearchEngineSolr#SUFFIX_BOOLEAN}</td><td>boolean</td><td>la valeur du champs</td><td>true</td><td>true</td> + * </tr> + * <tr> + * <td>{@link org.nuiton.wikitty.entities.FieldType.TYPE#DATE}</td><td>_dt {@link WikittySearchEngineSolr#SUFFIX_DATE}</td><td>date</td><td>la valeur du champs</td><td>true</td><td>true</td> + * </tr> + * <tr> + * <td>{@link org.nuiton.wikitty.entities.FieldType.TYPE#NUMBER}</td><td>_d {@link WikittySearchEngineSolr#SUFFIX_NUMERIC}</td><td>sdouble</td><td>la valeur du champs</td><td>true</td><td>true</td> + * </tr> + * <tr> + * <td>{@link org.nuiton.wikitty.entities.FieldType.TYPE#WIKITTY}</td><td>_w {@link WikittySearchEngineSolr#SUFFIX_WIKITTY}</td><td>string</td><td>l'id du wikitty</td><td>true</td><td>true</td> + * </tr> + * <tr> + * <td rowspan="3">{@link org.nuiton.wikitty.entities.FieldType.TYPE#STRING}</td><td>_s {@link WikittySearchEngineSolr#SUFFIX_STRING}</td><td>string</td><td>la valeur du champs</td><td>true</td><td>true</td> + * </tr> + * <tr> + * <td>_s_c {@link WikittySearchEngineSolr#SUFFIX_STRING_LOWERCASE}</td><td>string</td><td>la valeur du champs en minuscule</td><td>true</td><td>true</td> + * </tr> + * <tr> + * <td>_s_t {@link WikittySearchEngineSolr#SUFFIX_STRING_FULLTEXT}</td><td>text</td><td>la valeur du champs</td><td>true</td><td>true</td> + * </tr> + * </table> + * + * D'autres champs sont indexes + * <table> + * <tr> + * <th>champs</th><th>Type d'indexation</th><th>valeur</th><th>stored</th><th>multiValued</th> + * </tr> + * <tr> + * <td>id</td><td>string</td><td>l'id du wikitty</td><td>true</td><td>false</td> + * </tr> + * <tr> + * <td>extensions</td><td>string</td><td>la liste des extensions</td><td>true</td><td>true</td> + * </tr> + * <tr> + * <td>not_null_fields</td><td>string</td><td>la liste des champs qui doivent etre non null</td><td>true</td><td>true</td> + * </tr> + * <tr> + * <td>text</td><td>text</td><td>la valeur de tous les champs ayant un suffix</td><td>true</td><td>true</td> + * </tr> + * </table> + * + * <p> + * Les champs sont tous restockes dans une extension 'all' pour pouvoir faire + * des recherches sur toutes les extensions en meme temps. Par exemple rechercher + * tout ce qui porte le 'nom' 'portable' quelque soit l'extension (*.nom:portable) + * <p> + * Les chaines de caracteres doivent obligatoirement etre indexee en type string + * si l'on veut pouvoir faire des facettes dessus. Il faut donc obligatoirement + * indexer les chaines en 'string' et aussi en 'text' pour pouvoir les utiliser + * dans les facettes mais aussi que la recheche soit plus permissive. + * <p> + * SolR copie tous les champs dans le champs 'text' pour la recherche fulltext + * ce champs est le champs par defaut de recherche. + * <p> + * id est marque comme devant etre un champs unique (et donc lorsqu'on enregistre + * un nouveau document avec le meme id, l'ancien est supprime) + * <p> + * Tous les champs sont marque stored car lors de la reindexation des arbres + * on a besoin de faire une copie de l'ancien document et donc de pouvoir + * recuperer la valeur des champs (voir alternative) + * <p> + * Par exemple si on a un champs <b>product.description: String</b> nous le + * retrouverons dans 7 champs de l'index: + * + * <li> text : text (car est la copie de tous les champs) + * <li> product.description_s : string (necessaire pour la facetisation) + * <li> product.description_s_c : string + * <li> product.description_s_t : text + * <li> all.description_s : string + * <li> all.description_s_c : string + * <li> all.description_s_t : text + * + * il faudrait que les 5 derniers soit autogenere par solr en utilisant un + * <b>copyField</b> dans le schema.xml et qu'il ne soit pas stocke. Mais pour + * cela il faudrait que <b>copyField</b> permette l'utilisation de regexp + * (faire un patch a SolR ?) + * + * <copyField source="*_s" dest="*_s_c"/> + * <copyField source="*_s" dest="*_s_t"/> + * + * <copyField source="*.*_s" dest="all.*_s"/> + * <copyField source="*.*_s" dest="all.*_s_c"/> + * <copyField source="*.*_s" dest="all.*_s_t"/> + * + * et aussi definir les all pour les autres types + * <copyField source="*.*_b" dest="all.*_b"/> + * <copyField source="*.*_dt" dest="all.*_dt"/> + * <copyField source="*.*_d" dest="all.*_d"/> + * <copyField source="*.*_w" dest="all.*_w"/> + * + * copyField ne support que une * et au debut ou a la fin, donc actuellement + * il serait possible de d'avoir + * + * <copyField source="*_s" dest="*_s_c"/> + * <copyField source="*_s" dest="*_s_t"/> + * + * et d'enregistrer les champs deux fois en <b>extName.fieldName</b> et en + * <b>all.fieldName</b> ensuite lorsqu'on a besoin de copier un document + * il ne faut prendre les champs id, extensions, not_null_fields et les champs + * ne commencant pas par 'all.' et se finissant exclusivement par + * _bi, _b, _dt, _d, _s, _w. Cest dernier seront enregistrer aussi en all. + * + * Ainsi on passe de 6 champs stocke + 1, a 1 champ stocker + 6. Pour les chaines + * et de 2 champs stockes a 1 champ stocke + 1. + * + * + * <h2>alternative au stockage de tout les champs</h2> + * <p> + * Une alternative serait de ne reprendre que les champs reels (pas les copies) + * et recreer les copies a partir de ceux la. Les copies pourront ne plus etre + * stored=true. + * <p> + * Une autre alternative serait de récuperer l'objet dans le Storage et de le + * reindexer completement + * + */ +package org.nuiton.wikitty.solr; Modified: trunk/wikitty-solr-impl/src/main/resources/schema.xml =================================================================== --- trunk/wikitty-solr-impl/src/main/resources/schema.xml 2010-12-09 15:14:22 UTC (rev 587) +++ trunk/wikitty-solr-impl/src/main/resources/schema.xml 2010-12-09 23:30:35 UTC (rev 588) @@ -289,10 +289,12 @@ <copyField source="*_d" dest="text"/> <copyField source="*_dt" dest="text"/> + <!-- on indexe pas les binary field --> + <dynamicField name="*_bi" type="string" indexed="false" stored="false" multiValued="true"/> <dynamicField name="*_i" type="sint" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/> - <dynamicField name="*_t" type="text" indexed="true" stored="true" multiValued="true"/> - <dynamicField name="*_c" type="string" indexed="true" stored="true" multiValued="true"/> + <dynamicField name="*_t" type="text" indexed="true" stored="true" multiValued="true"/> + <dynamicField name="*_c" type="string" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_l" type="slong" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_b" type="boolean" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_f" type="sfloat" indexed="true" stored="true" multiValued="true"/> Added: trunk/wikitty-solr-impl/src/main/resources/solrconfig-complete.xml =================================================================== --- trunk/wikitty-solr-impl/src/main/resources/solrconfig-complete.xml (rev 0) +++ trunk/wikitty-solr-impl/src/main/resources/solrconfig-complete.xml 2010-12-09 23:30:35 UTC (rev 588) @@ -0,0 +1,733 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + #%L + Wikitty :: wikitty-solr-impl + + $Id$ + $HeadURL$ + %% + Copyright (C) 2009 - 2010 CodeLutin + %% + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Lesser Public License for more details. + + You should have received a copy of the GNU General Lesser Public + License along with this program. If not, see + <http://www.gnu.org/licenses/lgpl-3.0.html>. + #L% + --> + +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<config> + <!-- Set this to 'false' if you want solr to continue working after it has + encountered an severe configuration error. In a production environment, + you may want solr to keep working even if one handler is mis-configured. + + You may also set this to false using by setting the system property: + -Dsolr.abortOnConfigurationError=false + --> + <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError> + + <!-- Used to specify an alternate directory to hold all index data + other than the default ./data under the Solr home. + If replication is in use, this should match the replication configuration. --> + <dataDir>${wikitty.searchengine.solr.directory.data:./solr/data}</dataDir> + + <!-- The DirectoryFactory to use for indexes. + solr.StandardDirectoryFactory, the default, is filesystem based. + solr.RAMDirectoryFactory is memory based, not persistent, and doesn't work with replication. + A prefix of "solr." for class names is an alias that + causes solr to search appropriate packages, including + org.apache.solr.(search|update|request|core|analysis) + --> + <directoryFactory name="DirectoryFactory" class="${wikitty.searchengine.solr.directory.factory:solr.StandardDirectoryFactory}"/> + + <indexDefaults> + <!-- Values here affect all index writers and act as a default unless overridden. --> + <useCompoundFile>false</useCompoundFile> + + <mergeFactor>10</mergeFactor> + <!-- + If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first. + + --> + <!--<maxBufferedDocs>1000</maxBufferedDocs>--> + <!-- Tell Lucene when to flush documents to disk. + Giving Lucene more memory for indexing means faster indexing at the cost of more RAM + + If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first. + + --> + <ramBufferSizeMB>32</ramBufferSizeMB> + <maxMergeDocs>2147483647</maxMergeDocs> + <maxFieldLength>10000</maxFieldLength> + <writeLockTimeout>1000</writeLockTimeout> + <commitLockTimeout>10000</commitLockTimeout> + + <!-- + Expert: Turn on Lucene's auto commit capability. + This causes intermediate segment flushes to write a new lucene + index descriptor, enabling it to be opened by an external + IndexReader. + NOTE: Despite the name, this value does not have any relation to Solr's autoCommit functionality + --> + <!--<luceneAutoCommit>false</luceneAutoCommit>--> + <!-- + Expert: + The Merge Policy in Lucene controls how merging is handled by Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous + versions used LogDocMergePolicy. + + LogByteSizeMergePolicy chooses segments to merge based on their size. The Lucene 2.2 default, LogDocMergePolicy chose when + to merge based on number of documents + + Other implementations of MergePolicy must have a no-argument constructor + --> + <!--<mergePolicy>org.apache.lucene.index.LogByteSizeMergePolicy</mergePolicy>--> + + <!-- + Expert: + The Merge Scheduler in Lucene controls how merges are performed. The ConcurrentMergeScheduler (Lucene 2.3 default) + can perform merges in the background using separate threads. The SerialMergeScheduler (Lucene 2.2 default) does not. + --> + <!--<mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>--> + + <!-- + This option specifies which Lucene LockFactory implementation to use. + + single = SingleInstanceLockFactory - suggested for a read-only index + or when there is no possibility of another process trying + to modify the index. + native = NativeFSLockFactory + simple = SimpleFSLockFactory + + (For backwards compatibility with Solr 1.2, 'simple' is the default + if not specified.) + --> + <lockType>simple</lockType> + </indexDefaults> + + <mainIndex> + <!-- options specific to the main on-disk lucene index --> + <useCompoundFile>false</useCompoundFile> + <ramBufferSizeMB>32</ramBufferSizeMB> + <mergeFactor>10</mergeFactor> + <!-- Deprecated --> + <!--<maxBufferedDocs>1000</maxBufferedDocs>--> + <maxMergeDocs>2147483647</maxMergeDocs> + <maxFieldLength>10000</maxFieldLength> + + <!-- If true, unlock any held write or commit locks on startup. + This defeats the locking mechanism that allows multiple + processes to safely access a lucene index, and should be + used with care. + This is not needed if lock type is 'none' or 'single' + --> + <unlockOnStartup>true</unlockOnStartup> + </mainIndex> + + <!-- Enables JMX if and only if an existing MBeanServer is found, use + this if you want to configure JMX through JVM parameters. Remove + this to disable exposing Solr configuration and statistics to JMX. + + If you want to connect to a particular server, specify the agentId + e.g. <jmx agentId="myAgent" /> + + If you want to start a new MBeanServer, specify the serviceUrl + e.g <jmx serviceurl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr" /> + + For more details see http://wiki.apache.org/solr/SolrJmx + --> + <jmx /> + + <!-- the default high-performance update handler --> + <updateHandler class="solr.DirectUpdateHandler2"> + + <!-- A prefix of "solr." for class names is an alias that + causes solr to search appropriate packages, including + org.apache.solr.(search|update|request|core|analysis) + --> + + <!-- Perform a <commit/> automatically under certain conditions: + maxDocs - number of updates since last commit is greater than this + maxTime - oldest uncommited update (in ms) is this long ago + <autoCommit> + <maxDocs>10000</maxDocs> + <maxTime>1000</maxTime> + </autoCommit> + --> + + <!-- The RunExecutableListener executes an external command. + exe - the name of the executable to run + dir - dir to use as the current working directory. default="." + wait - the calling thread waits until the executable returns. default="true" + args - the arguments to pass to the program. default=nothing + env - environment variables to set. default=nothing + --> + <!-- A postCommit event is fired after every commit or optimize command + <listener event="postCommit" class="solr.RunExecutableListener"> + <str name="exe">solr/bin/snapshooter</str> + <str name="dir">.</str> + <bool name="wait">true</bool> + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr> + <arr name="env"> <str>MYVAR=val1</str> </arr> + </listener> + --> + <!-- A postOptimize event is fired only after every optimize command, useful + in conjunction with index distribution to only distribute optimized indicies + <listener event="postOptimize" class="solr.RunExecutableListener"> + <str name="exe">snapshooter</str> + <str name="dir">solr/bin</str> + <bool name="wait">true</bool> + </listener> + --> + + </updateHandler> + + + <query> + <!-- Maximum number of clauses in a boolean query... can affect + range or prefix queries that expand to big boolean + queries. An exception is thrown if exceeded. --> + <maxBooleanClauses>1024</maxBooleanClauses> + + + <!-- Cache used by SolrIndexSearcher for filters (DocSets), + unordered sets of *all* documents that match a query. + When a new searcher is opened, its caches may be prepopulated + or "autowarmed" using data from caches in the old searcher. + autowarmCount is the number of items to prepopulate. For LRUCache, + the autowarmed items will be the most recently accessed items. + Parameters: + class - the SolrCache implementation (currently only LRUCache) + size - the maximum number of entries in the cache + initialSize - the initial capacity (number of entries) of + the cache. (seel java.util.HashMap) + autowarmCount - the number of entries to prepopulate from + and old cache. + --> + <filterCache + class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="128"/> + + <!-- queryResultCache caches results of searches - ordered lists of + document ids (DocList) based on a query, a sort, and the range + of documents requested. --> + <queryResultCache + class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="32"/> + + <!-- documentCache caches Lucene Document objects (the stored fields for each document). + Since Lucene internal document ids are transient, this cache will not be autowarmed. --> + <documentCache + class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="0"/> + + <!-- If true, stored fields that are not requested will be loaded lazily. + + This can result in a significant speed improvement if the usual case is to + not load all stored fields, especially if the skipped fields are large compressed + text fields. + --> + <enableLazyFieldLoading>true</enableLazyFieldLoading> + + <!-- Example of a generic cache. These caches may be accessed by name + through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert(). + The purpose is to enable easy caching of user/application level data. + The regenerator argument should be specified as an implementation + of solr.search.CacheRegenerator if autowarming is desired. --> + <!-- + <cache name="myUserCache" + class="solr.LRUCache" + size="4096" + initialSize="1024" + autowarmCount="1024" + regenerator="org.mycompany.mypackage.MyRegenerator" + /> + --> + + <!-- An optimization that attempts to use a filter to satisfy a search. + If the requested sort does not include score, then the filterCache + will be checked for a filter matching the query. If found, the filter + will be used as the source of document ids, and then the sort will be + applied to that. + <useFilterForSortedQuery>true</useFilterForSortedQuery> + --> + + <!-- An optimization for use with the queryResultCache. When a search + is requested, a superset of the requested number of document ids + are collected. For example, if a search for a particular query + requests matching documents 10 through 19, and queryWindowSize is 50, + then documents 0 through 49 will be collected and cached. Any further + requests in that range can be satisfied via the cache. --> + <queryResultWindowSize>50</queryResultWindowSize> + + <!-- Maximum number of documents to cache for any entry in the + queryResultCache. --> + <queryResultMaxDocsCached>200</queryResultMaxDocsCached> + + <!-- This entry enables an int hash representation for filters (DocSets) + when the number of items in the set is less than maxSize. For smaller + sets, this representation is more memory efficient, more efficient to + iterate over, and faster to take intersections. --> + <HashDocSet maxSize="3000" loadFactor="0.75"/> + + <!-- a newSearcher event is fired whenever a new searcher is being prepared + and there is a current searcher handling requests (aka registered). --> + <!-- QuerySenderListener takes an array of NamedList and executes a + local query request for each NamedList in sequence. --> + <listener event="newSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst> + <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst> + <lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst> + </arr> + </listener> + + <!-- a firstSearcher event is fired whenever a new searcher is being + prepared but there is no current registered searcher to handle + requests or to gain autowarming data from. --> + <listener event="firstSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst> + <lst><str name="q">static firstSearcher warming query from solrconfig.xml</str></lst> + </arr> + </listener> + + <!-- If a search request comes in and there is no current registered searcher, + then immediately register the still warming searcher and use it. If + "false" then all requests will block until the first searcher is done + warming. --> + <useColdSearcher>false</useColdSearcher> + + <!-- Maximum number of searchers that may be warming in the background + concurrently. An error is returned if this limit is exceeded. Recommend + 1-2 for read-only slaves, higher for masters w/o cache warming. --> + <maxWarmingSearchers>2</maxWarmingSearchers> + + </query> + + <!-- + Let the dispatch filter handler /select?qt=XXX + handleSelect=true will use consistent error handling for /select and /update + handleSelect=false will use solr1.1 style error formatting + --> + <requestDispatcher handleSelect="true" > + <!--Make sure your system has some authentication before enabling remote streaming! --> + <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" /> + + <!-- Set HTTP caching related parameters (for proxy caches and clients). + + To get the behaviour of Solr 1.2 (ie: no caching related headers) + use the never304="true" option and do not specify a value for + <cacheControl> + --> + <!-- <httpCaching never304="true"> --> + <httpCaching lastModifiedFrom="openTime" + etagSeed="Solr"> + <!-- lastModFrom="openTime" is the default, the Last-Modified value + (and validation against If-Modified-Since requests) will all be + relative to when the current Searcher was opened. + You can change it to lastModFrom="dirLastMod" if you want the + value to exactly corrispond to when the physical index was last + modified. + + etagSeed="..." is an option you can change to force the ETag + header (and validation against If-None-Match requests) to be + differnet even if the index has not changed (ie: when making + significant changes to your config file) + + lastModifiedFrom and etagSeed are both ignored if you use the + never304="true" option. + --> + <!-- If you include a <cacheControl> directive, it will be used to + generate a Cache-Control header, as well as an Expires header + if the value contains "max-age=" + + By default, no Cache-Control header is generated. + + You can use the <cacheControl> option even if you have set + never304="true" + --> + <!-- <cacheControl>max-age=30, public</cacheControl> --> + </httpCaching> + </requestDispatcher> + + + <!-- requestHandler plugins... incoming queries will be dispatched to the + correct handler based on the path or the qt (query type) param. + Names starting with a '/' are accessed with the a path equal to the + registered name. Names without a leading '/' are accessed with: + http://host/app/select?qt=name + If no qt is defined, the requestHandler that declares default="true" + will be used. + --> + <requestHandler name="standard" class="solr.SearchHandler" default="true"> + <!-- default values for query parameters --> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <!-- + <int name="rows">10</int> + <str name="fl">*</str> + <str name="version">2.1</str> + --> + </lst> + </requestHandler> + + + <!-- DisMaxRequestHandler allows easy searching across multiple fields + for simple user-entered phrases. It's implementation is now + just the standard SearchHandler with a default query type + of "dismax". + see http://wiki.apache.org/solr/DisMaxRequestHandler + --> + <requestHandler name="dismax" class="solr.SearchHandler" > + <lst name="defaults"> + <str name="defType">dismax</str> + <str name="echoParams">explicit</str> + <float name="tie">0.01</float> + <str name="qf"> + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + </str> + <str name="pf"> + text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9 + </str> + <str name="bf"> + ord(popularity)^0.5 recip(rord(price),1,1000,1000)^0.3 + </str> + <str name="fl"> + id,name,price,score + </str> + <str name="mm"> + 2<-1 5<-2 6<90% + </str> + <int name="ps">100</int> + <str name="q.alt">*:*</str> + <!-- example highlighter config, enable per-query with hl=true --> + <str name="hl.fl">text features name</str> + <!-- for this field, we want no fragmenting, just highlighting --> + <str name="f.name.hl.fragsize">0</str> + <!-- instructs Solr to return the field itself if no query terms are + found --> + <str name="f.name.hl.alternateField">name</str> + <str name="f.text.hl.fragmenter">regex</str> <!-- defined below --> + </lst> + </requestHandler> + + <!-- Note how you can register the same handler multiple times with + different names (and different init parameters) + --> + <requestHandler name="partitioned" class="solr.SearchHandler" > + <lst name="defaults"> + <str name="defType">dismax</str> + <str name="echoParams">explicit</str> + <str name="qf">text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0</str> + <str name="mm">2<-1 5<-2 6<90%</str> + <!-- This is an example of using Date Math to specify a constantly + moving date range in a config... + --> + <str name="bq">incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2</str> + </lst> + <!-- In addition to defaults, "appends" params can be specified + to identify values which should be appended to the list of + multi-val params from the query (or the existing "defaults"). + + In this example, the param "fq=instock:true" will be appended to + any query time fq params the user may specify, as a mechanism for + partitioning the index, independent of any user selected filtering + that may also be desired (perhaps as a result of faceted searching). + + NOTE: there is *absolutely* nothing a client can do to prevent these + "appends" values from being used, so don't use this mechanism + unless you are sure you always want it. + --> + <lst name="appends"> + <str name="fq">inStock:true</str> + </lst> + <!-- "invariants" are a way of letting the Solr maintainer lock down + the options available to Solr clients. Any params values + specified here are used regardless of what values may be specified + in either the query, the "defaults", or the "appends" params. + + In this example, the facet.field and facet.query params are fixed, + limiting the facets clients can use. Faceting is not turned on by + default - but if the client does specify facet=true in the request, + these are the only facets they will be able to see counts for; + regardless of what other facet.field or facet.query params they + may specify. + + NOTE: there is *absolutely* nothing a client can do to prevent these + "invariants" values from being used, so don't use this mechanism + unless you are sure you always want it. + --> + <lst name="invariants"> + <str name="facet.field">cat</str> + <str name="facet.field">manu_exact</str> + <str name="facet.query">price:[* TO 500]</str> + <str name="facet.query">price:[500 TO *]</str> + </lst> + </requestHandler> + + + <!-- + Search components are registered to SolrCore and used by Search Handlers + + By default, the following components are avaliable: + + <searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" /> + <searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" /> + <searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" /> + <searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" /> + <searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" /> + + Default configuration in a requestHandler would look like: + <arr name="components"> + <str>query</str> + <str>facet</str> + <str>mlt</str> + <str>highlight</str> + <str>debug</str> + </arr> + + If you register a searchComponent to one of the standard names, that will be used instead. + To insert handlers before or after the 'standard' components, use: + + <arr name="first-components"> + <str>myFirstComponentName</str> + </arr> + + <arr name="last-components"> + <str>myLastComponentName</str> + </arr> + --> + + <!-- The spell check component can return a list of alternative spelling + suggestions. --> + <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> + + <str name="queryAnalyzerFieldType">textSpell</str> + + <lst name="spellchecker"> + <str name="name">default</str> + <str name="field">spell</str> + <str name="spellcheckIndexDir">./spellchecker1</str> + + </lst> + <lst name="spellchecker"> + <str name="name">jarowinkler</str> + <str name="field">spell</str> + <!-- Use a different Distance Measure --> + <str name="distanceMeasure">org.apache.lucene.search.spell.JaroWinklerDistance</str> + <str name="spellcheckIndexDir">./spellchecker2</str> + + </lst> + + <lst name="spellchecker"> + <str name="classname">solr.FileBasedSpellChecker</str> + <str name="name">file</str> + <str name="sourceLocation">spellings.txt</str> + <str name="characterEncoding">UTF-8</str> + <str name="spellcheckIndexDir">./spellcheckerFile</str> + </lst> + </searchComponent> + + <!-- a request handler utilizing the spellcheck component --> + <requestHandler name="/spellCheckCompRH" class="solr.SearchHandler"> + <lst name="defaults"> + <!-- omp = Only More Popular --> + <str name="spellcheck.onlyMorePopular">false</str> + <!-- exr = Extended Results --> + <str name="spellcheck.extendedResults">false</str> + <!-- The number of suggestions to return --> + <str name="spellcheck.count">1</str> + </lst> + <arr name="last-components"> + <str>spellcheck</str> + </arr> + </requestHandler> + + <!-- a search component that enables you to configure the top results for + a given query regardless of the normal lucene scoring.--> + +<!-- poussin 20090902 remove elevate this file is empty, what need ? + <searchComponent name="elevator" class="solr.QueryElevationComponent" > + <str name="queryFieldType">string</str> + <str name="config-file">elevate.xml</str> + </searchComponent> + --> + <!-- a request handler utilizing the elevator component --> +<!-- + <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + </lst> + <arr name="last-components"> + <str>elevator</str> + </arr> + </requestHandler> + --> + + <!-- Update request handler. + + Note: Since solr1.1 requestHandlers requires a valid content type header if posted in + the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8' + The response format differs from solr1.1 formatting and returns a standard error code. + + To enable solr1.1 behavior, remove the /update handler or change its path + --> + <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" /> + + <!-- + Analysis request handler. Since Solr 1.3. Use to returnhow a document is analyzed. Useful + for debugging and as a token server for other types of applications + --> + <requestHandler name="/analysis" class="solr.AnalysisRequestHandler" /> + + + <!-- CSV update handler, loaded on demand --> + <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" /> + + + <!-- + Admin Handlers - This will register all the standard admin RequestHandlers. Adding + this single handler is equivolent to registering: + + <requestHandler name="/admin/luke" class="org.apache.solr.handler.admin.LukeRequestHandler" /> + <requestHandler name="/admin/system" class="org.apache.solr.handler.admin.SystemInfoHandler" /> + <requestHandler name="/admin/plugins" class="org.apache.solr.handler.admin.PluginInfoHandler" /> + <requestHandler name="/admin/threads" class="org.apache.solr.handler.admin.ThreadDumpHandler" /> + <requestHandler name="/admin/properties" class="org.apache.solr.handler.admin.PropertiesRequestHandler" /> + <requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" > + + If you wish to hide files under ${solr.home}/conf, explicitly register the ShowFileRequestHandler using: + <requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" > + <lst name="invariants"> + <str name="hidden">synonyms.txt</str> + <str name="hidden">anotherfile.txt</str> + </lst> + </requestHandler> + --> + <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" /> + + <!-- ping/healthcheck --> + <requestHandler name="/admin/ping" class="PingRequestHandler"> + <lst name="defaults"> + <str name="qt">standard</str> + <str name="q">solrpingquery</str> + <str name="echoParams">all</str> + </lst> + </requestHandler> + + <!-- Echo the request contents back to the client --> + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > + <lst name="defaults"> + <str name="echoParams">explicit</str> <!-- for all params (including the default etc) use: 'all' --> + <str name="echoHandler">true</str> + </lst> + </requestHandler> + + <highlighting> + <!-- Configure the standard fragmenter --> + <!-- This could most likely be commented out in the "default" case --> + <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true"> + <lst name="defaults"> + <int name="hl.fragsize">100</int> + </lst> + </fragmenter> + + <!-- A regular-expression-based fragmenter (f.i., for sentence extraction) --> + <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter"> + <lst name="defaults"> + <!-- slightly smaller fragsizes work better because of slop --> + <int name="hl.fragsize">70</int> + <!-- allow 50% slop on fragment sizes --> + <float name="hl.regex.slop">0.5</float> + <!-- a basic sentence pattern --> + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> + </lst> + </fragmenter> + + <!-- Configure the standard formatter --> + <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true"> + <lst name="defaults"> + <str name="hl.simple.pre"><![CDATA[<em>]]></str> + <str name="hl.simple.post"><![CDATA[</em>]]></str> + </lst> + </formatter> + </highlighting> + + + <!-- queryResponseWriter plugins... query responses will be written using the + writer specified by the 'wt' request parameter matching the name of a registered + writer. + The "default" writer is the default and will be used if 'wt' is not specified + in the request. XMLResponseWriter will be used if nothing is specified here. + The json, python, and ruby writers are also available by default. + + <queryResponseWriter name="xml" class="org.apache.solr.request.XMLResponseWriter" default="true"/> + <queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/> + <queryResponseWriter name="python" class="org.apache.solr.request.PythonResponseWriter"/> + <queryResponseWriter name="ruby" class="org.apache.solr.request.RubyResponseWriter"/> + <queryResponseWriter name="php" class="org.apache.solr.request.PHPResponseWriter"/> + <queryResponseWriter name="phps" class="org.apache.solr.request.PHPSerializedResponseWriter"/> + + <queryResponseWriter name="custom" class="com.example.MyResponseWriter"/> + --> + + <!-- XSLT response writer transforms the XML output by any xslt file found + in Solr's conf/xslt directory. Changes to xslt files are checked for + every xsltCacheLifetimeSeconds. + --> + <queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter"> + <int name="xsltCacheLifetimeSeconds">5</int> + </queryResponseWriter> + + + <queryParser name="wikitty" class="org.nuiton.wikitty.solr.WikittyQueryParser"/> + + <!-- example of registering a query parser + <queryParser name="lucene" class="org.apache.solr.search.LuceneQParserPlugin"/> + --> + + <!-- example of registering a custom function parser + <valueSourceParser name="myfunc" class="com.mycompany.MyValueSourceParser" /> + --> + + <!-- config for the admin interface --> + <admin> + <defaultQuery>solr</defaultQuery> + + <!-- configure a healthcheck file for servers behind a loadbalancer + <healthcheck type="file">server-enabled</healthcheck> + --> + </admin> + +</config>