Author: kmorin Date: 2009-11-19 17:29:55 +0100 (Thu, 19 Nov 2009) New Revision: 331 Modified: wikengo_core-wikitty/wikengo_core-wikitty-jdbc-impl/src/test/resources/log4j.properties wikengo_core-wikitty/wikengo_core-wikitty-jdbc-impl/src/test/resources/schema.xml wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/pom.xml wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/src/main/resources/schema.xml wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/src/main/resources/solrconfig.xml Log: Config solr for efficient research Modified: wikengo_core-wikitty/wikengo_core-wikitty-jdbc-impl/src/test/resources/log4j.properties =================================================================== --- wikengo_core-wikitty/wikengo_core-wikitty-jdbc-impl/src/test/resources/log4j.properties 2009-11-17 18:48:40 UTC (rev 330) +++ wikengo_core-wikitty/wikengo_core-wikitty-jdbc-impl/src/test/resources/log4j.properties 2009-11-19 16:29:55 UTC (rev 331) @@ -5,5 +5,5 @@ # Configuration by components log4j.rootLogger=ERROR, logConsole -log4j.category.org.sharengo.wikitty=DEBUG -log4j.category.org.apache.solr=DEBUG +log4j.logger.org.sharengo.wikitty=DEBUG +log4j.logger.org.apache.solr=DEBUG Modified: wikengo_core-wikitty/wikengo_core-wikitty-jdbc-impl/src/test/resources/schema.xml =================================================================== --- wikengo_core-wikitty/wikengo_core-wikitty-jdbc-impl/src/test/resources/schema.xml 2009-11-17 18:48:40 UTC (rev 330) +++ wikengo_core-wikitty/wikengo_core-wikitty-jdbc-impl/src/test/resources/schema.xml 2009-11-19 16:29:55 UTC (rev 331) @@ -16,10 +16,10 @@ limitations under the License. --> -<!-- +<!-- This is the Solr schema file. This file should be named "schema.xml" and should be in the conf directory under the solr home - (i.e. ./solr/conf/schema.xml by default) + (i.e. ./solr/conf/schema.xml by default) or located where the classloader for the Solr webapp can find it. This example schema is the recommended starting point for users. @@ -46,7 +46,7 @@ org.apache.solr.analysis package. --> - <!-- The StrField type is not analyzed, but indexed/stored verbatim. + <!-- The StrField type is not analyzed, but indexed/stored verbatim. - StrField and TextField support an optional compressThreshold which limits compression (if enabled in the derived fields) to values which exceed a certain size (in characters). @@ -67,7 +67,7 @@ - If sortMissingLast="false" and sortMissingFirst="false" (the default), then default lucene sorting will be used which places docs without the field first in an ascending sort and last in a descending sort. - --> + --> <!-- numeric field types that store and index the text @@ -91,7 +91,7 @@ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and is a more restricted form of the canonical representation of dateTime - http://www.w3.org/TR/xmlschema-2/#dateTime + http://www.w3.org/TR/xmlschema-2/#dateTime The trailing "Z" designates UTC time and is mandatory. Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z All other components are mandatory. @@ -106,7 +106,7 @@ NOW/DAY+6MONTHS+3DAYS ... 6 months and 3 days in the future from the start of the current day - + Consult the DateField javadocs for more information. --> <fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/> @@ -114,11 +114,11 @@ <!-- The "RandomSortField" is not used to store or search any data. You can declare fields of this type it in your schema - to generate psuedo-random orderings of your docs for sorting - purposes. The ordering is generated based on the field name + to generate psuedo-random orderings of your docs for sorting + purposes. The ordering is generated based on the field name and the version of the index, As long as the index version remains unchanged, and the same field name is reused, - the ordering of the docs will be consistent. + the ordering of the docs will be consistent. If you want differend psuedo-random orderings of documents, for the same version of the index, use a dynamicField and change the name @@ -188,7 +188,7 @@ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> </analyzer> </fieldType> - + <fieldType name="text_core" class="solr.TextField" positionIncrementGap="100"> <analyzer> <tokenizer class="solr.WhitespaceTokenizerFactory" /> @@ -244,13 +244,13 @@ <filter class="solr.TrimFilterFactory" /> <!-- The PatternReplaceFilter gives you the flexibility to use Java Regular expression to replace any sequence of characters - matching a pattern with an arbitrary replacement string, + matching a pattern with an arbitrary replacement string, which may include back refrences to portions of the orriginal string matched by the pattern. - + See the Java Regular Expression documentation for more infomation on pattern and replacement string syntax. - + http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html --> <filter class="solr.PatternReplaceFilterFactory" @@ -259,50 +259,56 @@ </analyzer> </fieldType> - <!-- since fields of this type are by default not stored or indexed, any data added to - them will be ignored outright - --> - <fieldtype name="ignored" stored="false" indexed="false" class="solr.StrField" /> + <!-- since fields of this type are by default not stored or indexed, any data added to + them will be ignored outright + --> + <fieldtype name="ignored" stored="false" indexed="false" class="solr.StrField" /> </types> <fields> - <field name="id" type="string" indexed="true" stored="true" required="true" /> - <field name="extensions" type="string" indexed="true" stored="false" multiValued="true"/> + <field name="id" type="text" indexed="true" stored="true" required="true" /> + <field name="extensions" type="text" indexed="true" stored="false" multiValued="true"/> <!-- catchall field, containing all other searchable text fields (implemented via copyField further on in this schema --> <field name="text" type="text" indexed="true" stored="false" multiValued="true"/> + <copyField source="*_i" dest="text"/> + <copyField source="*_s" dest="text"/> + <copyField source="*_l" dest="text"/> + <copyField source="*_t" dest="text"/> + <copyField source="*_b" dest="text"/> + <copyField source="*_f" dest="text"/> + <copyField source="*_d" dest="text"/> + <copyField source="*_dt" dest="text"/> - <dynamicField name="Label.label*" type="text" indexed="true" stored="true" multiValued="true"/> - <dynamicField name="*_i" type="sint" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_l" type="slong" indexed="true" stored="true" multiValued="true"/> - <dynamicField name="*_t" type="string" indexed="true" stored="true" multiValued="true"/> + <dynamicField name="*_t" type="text" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_b" type="boolean" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_f" type="sfloat" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_d" type="sdouble" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_dt" type="date" indexed="true" stored="true" multiValued="true"/> <!-- all wikitty field --> - <dynamicField name="*" type="string" indexed="true" stored="true" multiValued="true"/> + <dynamicField name="*" type="text" indexed="true" stored="true" multiValued="true"/> <!-- copy fields for optimisation --> - <dynamicField name="_*" type="string" indexed="true" stored="true" multiValued="true"/> + <dynamicField name="_*" type="text" indexed="true" stored="true" multiValued="true"/> </fields> - <!-- Field to use to determine and enforce document uniqueness. + <!-- Field to use to determine and enforce document uniqueness. Unless this field is marked with required="false", it will be a required field --> <uniqueKey>id</uniqueKey> <!-- field for the QueryParser to use when an explicit fieldname is absent --> - <defaultSearchField>id</defaultSearchField> + <defaultSearchField>text</defaultSearchField> <!-- SolrQueryParser configuration: defaultOperator="AND|OR" --> - <solrQueryParser defaultOperator="OR"/> + <solrQueryParser defaultOperator="AND"/> -</schema> +</schema> \ No newline at end of file Modified: wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/pom.xml =================================================================== --- wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/pom.xml 2009-11-17 18:48:40 UTC (rev 330) +++ wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/pom.xml 2009-11-19 16:29:55 UTC (rev 331) @@ -31,6 +31,7 @@ <artifactId>solr-core</artifactId> <version>1.3.0</version> </dependency> + <dependency> <groupId>javax.servlet</groupId> <artifactId>servlet-api</artifactId> Modified: wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/src/main/resources/schema.xml =================================================================== --- wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/src/main/resources/schema.xml 2009-11-17 18:48:40 UTC (rev 330) +++ wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/src/main/resources/schema.xml 2009-11-19 16:29:55 UTC (rev 331) @@ -268,8 +268,8 @@ <fields> - <field name="id" type="string" indexed="true" stored="true" required="true" /> - <field name="extensions" type="string" indexed="true" stored="false" multiValued="true"/> + <field name="id" type="text" indexed="true" stored="true" required="true" /> + <field name="extensions" type="text" indexed="true" stored="false" multiValued="true"/> <!-- catchall field, containing all other searchable text fields (implemented via copyField further on in this schema --> @@ -284,9 +284,9 @@ <copyField source="*_dt" dest="text"/> <dynamicField name="*_i" type="sint" indexed="true" stored="true" multiValued="true"/> - <dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/> + <dynamicField name="*_s" type="text" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_l" type="slong" indexed="true" stored="true" multiValued="true"/> - <dynamicField name="*_t" type="text" indexed="true" stored="true" multiValued="true"/> + <dynamicField name="*_t" type="text" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_b" type="boolean" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_f" type="sfloat" indexed="true" stored="true" multiValued="true"/> <dynamicField name="*_d" type="sdouble" indexed="true" stored="true" multiValued="true"/> Modified: wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/src/main/resources/solrconfig.xml =================================================================== --- wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/src/main/resources/solrconfig.xml 2009-11-17 18:48:40 UTC (rev 330) +++ wikengo_core-wikitty/wikengo_core-wikitty-solr-impl/src/main/resources/solrconfig.xml 2009-11-19 16:29:55 UTC (rev 331) @@ -17,8 +17,8 @@ --> <config> - <!-- Set this to 'false' if you want solr to continue working after it has - encountered an severe configuration error. In a production environment, + <!-- Set this to 'false' if you want solr to continue working after it has + encountered an severe configuration error. In a production environment, you may want solr to keep working even if one handler is mis-configured. You may also set this to false using by setting the system property: @@ -83,7 +83,7 @@ <!-- This option specifies which Lucene LockFactory implementation to use. - + single = SingleInstanceLockFactory - suggested for a read-only index or when there is no possibility of another process trying to modify the index. @@ -106,7 +106,7 @@ <maxMergeDocs>2147483647</maxMergeDocs> <maxFieldLength>10000</maxFieldLength> - <!-- If true, unlock any held write or commit locks on startup. + <!-- If true, unlock any held write or commit locks on startup. This defeats the locking mechanism that allows multiple processes to safely access a lucene index, and should be used with care. @@ -114,17 +114,17 @@ --> <unlockOnStartup>false</unlockOnStartup> </mainIndex> - - <!-- Enables JMX if and only if an existing MBeanServer is found, use + + <!-- Enables JMX if and only if an existing MBeanServer is found, use this if you want to configure JMX through JVM parameters. Remove this to disable exposing Solr configuration and statistics to JMX. - + If you want to connect to a particular server, specify the agentId e.g. <jmx agentId="myAgent" /> - + If you want to start a new MBeanServer, specify the serviceUrl e.g <jmx serviceurl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr" /> - + For more details see http://wiki.apache.org/solr/SolrJmx --> <jmx /> @@ -140,9 +140,9 @@ <!-- Perform a <commit/> automatically under certain conditions: maxDocs - number of updates since last commit is greater than this maxTime - oldest uncommited update (in ms) is this long ago - <autoCommit> + <autoCommit> <maxDocs>10000</maxDocs> - <maxTime>1000</maxTime> + <maxTime>1000</maxTime> </autoCommit> --> @@ -163,7 +163,7 @@ </listener> --> <!-- A postOptimize event is fired only after every optimize command, useful - in conjunction with index distribution to only distribute optimized indicies + in conjunction with index distribution to only distribute optimized indicies <listener event="postOptimize" class="solr.RunExecutableListener"> <str name="exe">snapshooter</str> <str name="dir">solr/bin</str> @@ -180,7 +180,7 @@ queries. An exception is thrown if exceeded. --> <maxBooleanClauses>1024</maxBooleanClauses> - + <!-- Cache used by SolrIndexSearcher for filters (DocSets), unordered sets of *all* documents that match a query. When a new searcher is opened, its caches may be prepopulated @@ -256,7 +256,7 @@ then documents 0 through 49 will be collected and cached. Any further requests in that range can be satisfied via the cache. --> <queryResultWindowSize>50</queryResultWindowSize> - + <!-- Maximum number of documents to cache for any entry in the queryResultCache. --> <queryResultMaxDocsCached>200</queryResultMaxDocsCached> @@ -302,7 +302,7 @@ </query> - <!-- + <!-- Let the dispatch filter handler /select?qt=XXX handleSelect=true will use consistent error handling for /select and /update handleSelect=false will use solr1.1 style error formatting @@ -310,9 +310,9 @@ <requestDispatcher handleSelect="true" > <!--Make sure your system has some authentication before enabling remote streaming! --> <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" /> - + <!-- Set HTTP caching related parameters (for proxy caches and clients). - + To get the behaviour of Solr 1.2 (ie: no caching related headers) use the never304="true" option and do not specify a value for <cacheControl> @@ -326,7 +326,7 @@ You can change it to lastModFrom="dirLastMod" if you want the value to exactly corrispond to when the physical index was last modified. - + etagSeed="..." is an option you can change to force the ETag header (and validation against If-None-Match requests) to be differnet even if the index has not changed (ie: when making @@ -338,7 +338,7 @@ <!-- If you include a <cacheControl> directive, it will be used to generate a Cache-Control header, as well as an Expires header if the value contains "max-age=" - + By default, no Cache-Control header is generated. You can use the <cacheControl> option even if you have set @@ -347,11 +347,11 @@ <!-- <cacheControl>max-age=30, public</cacheControl> --> </httpCaching> </requestDispatcher> - - + + <!-- requestHandler plugins... incoming queries will be dispatched to the correct handler based on the path or the qt (query type) param. - Names starting with a '/' are accessed with the a path equal to the + Names starting with a '/' are accessed with the a path equal to the registered name. Names without a leading '/' are accessed with: http://host/app/select?qt=name If no qt is defined, the requestHandler that declares default="true" @@ -361,7 +361,7 @@ <!-- default values for query parameters --> <lst name="defaults"> <str name="echoParams">explicit</str> - <!-- + <!-- <int name="rows">10</int> <str name="fl">*</str> <str name="version">2.1</str> @@ -373,7 +373,7 @@ <!-- DisMaxRequestHandler allows easy searching across multiple fields for simple user-entered phrases. It's implementation is now just the standard SearchHandler with a default query type - of "dismax". + of "dismax". see http://wiki.apache.org/solr/DisMaxRequestHandler --> <requestHandler name="dismax" class="solr.SearchHandler" > @@ -398,7 +398,7 @@ </str> <int name="ps">100</int> <str name="q.alt">*:*</str> - <!-- example highlighter config, enable per-query with hl=true --> + <!-- example highlighter config, enable per-query with hl=true --> <str name="hl.fl">text features name</str> <!-- for this field, we want no fragmenting, just highlighting --> <str name="f.name.hl.fragsize">0</str> @@ -462,13 +462,13 @@ <str name="facet.query">price:[500 TO *]</str> </lst> </requestHandler> - + <!-- Search components are registered to SolrCore and used by Search Handlers - + By default, the following components are avaliable: - + <searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" /> <searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" /> <searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" /> @@ -486,11 +486,11 @@ If you register a searchComponent to one of the standard names, that will be used instead. To insert handlers before or after the 'standard' components, use: - + <arr name="first-components"> <str>myFirstComponentName</str> </arr> - + <arr name="last-components"> <str>myLastComponentName</str> </arr> @@ -540,7 +540,7 @@ <str>spellcheck</str> </arr> </requestHandler> - + <!-- a search component that enables you to configure the top results for a given query regardless of the normal lucene scoring.--> @@ -562,12 +562,12 @@ </requestHandler> --> - <!-- Update request handler. - - Note: Since solr1.1 requestHandlers requires a valid content type header if posted in + <!-- Update request handler. + + Note: Since solr1.1 requestHandlers requires a valid content type header if posted in the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8' The response format differs from solr1.1 formatting and returns a standard error code. - + To enable solr1.1 behavior, remove the /update handler or change its path --> <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" /> @@ -577,33 +577,33 @@ for debugging and as a token server for other types of applications --> <requestHandler name="/analysis" class="solr.AnalysisRequestHandler" /> - + <!-- CSV update handler, loaded on demand --> <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" /> - <!-- - Admin Handlers - This will register all the standard admin RequestHandlers. Adding + <!-- + Admin Handlers - This will register all the standard admin RequestHandlers. Adding this single handler is equivolent to registering: - + <requestHandler name="/admin/luke" class="org.apache.solr.handler.admin.LukeRequestHandler" /> <requestHandler name="/admin/system" class="org.apache.solr.handler.admin.SystemInfoHandler" /> <requestHandler name="/admin/plugins" class="org.apache.solr.handler.admin.PluginInfoHandler" /> <requestHandler name="/admin/threads" class="org.apache.solr.handler.admin.ThreadDumpHandler" /> <requestHandler name="/admin/properties" class="org.apache.solr.handler.admin.PropertiesRequestHandler" /> <requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" > - + If you wish to hide files under ${solr.home}/conf, explicitly register the ShowFileRequestHandler using: <requestHandler name="/admin/file" class="org.apache.solr.handler.admin.ShowFileRequestHandler" > <lst name="invariants"> - <str name="hidden">synonyms.txt</str> - <str name="hidden">anotherfile.txt</str> + <str name="hidden">synonyms.txt</str> + <str name="hidden">anotherfile.txt</str> </lst> </requestHandler> --> <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" /> - + <!-- ping/healthcheck --> <requestHandler name="/admin/ping" class="PingRequestHandler"> <lst name="defaults"> @@ -612,7 +612,7 @@ <str name="echoParams">all</str> </lst> </requestHandler> - + <!-- Echo the request contents back to the client --> <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > <lst name="defaults"> @@ -620,7 +620,7 @@ <str name="echoHandler">true</str> </lst> </requestHandler> - + <highlighting> <!-- Configure the standard fragmenter --> <!-- This could most likely be commented out in the "default" case --> @@ -636,12 +636,12 @@ <!-- slightly smaller fragsizes work better because of slop --> <int name="hl.fragsize">70</int> <!-- allow 50% slop on fragment sizes --> - <float name="hl.regex.slop">0.5</float> + <float name="hl.regex.slop">0.5</float> <!-- a basic sentence pattern --> <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> </lst> </fragmenter> - + <!-- Configure the standard formatter --> <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true"> <lst name="defaults"> @@ -650,12 +650,12 @@ </lst> </formatter> </highlighting> - - + + <!-- queryResponseWriter plugins... query responses will be written using the writer specified by the 'wt' request parameter matching the name of a registered writer. - The "default" writer is the default and will be used if 'wt' is not specified + The "default" writer is the default and will be used if 'wt' is not specified in the request. XMLResponseWriter will be used if nothing is specified here. The json, python, and ruby writers are also available by default. @@ -671,11 +671,11 @@ <!-- XSLT response writer transforms the XML output by any xslt file found in Solr's conf/xslt directory. Changes to xslt files are checked for - every xsltCacheLifetimeSeconds. + every xsltCacheLifetimeSeconds. --> <queryResponseWriter name="xslt" class="org.apache.solr.request.XSLTResponseWriter"> <int name="xsltCacheLifetimeSeconds">5</int> - </queryResponseWriter> + </queryResponseWriter> <queryParser name="wikitty" class="org.sharengo.wikitty.solr.WikittyQueryParser"/> @@ -684,17 +684,17 @@ <queryParser name="lucene" class="org.apache.solr.search.LuceneQParserPlugin"/> --> - <!-- example of registering a custom function parser + <!-- example of registering a custom function parser <valueSourceParser name="myfunc" class="com.mycompany.MyValueSourceParser" /> --> - - <!-- config for the admin interface --> + + <!-- config for the admin interface --> <admin> <defaultQuery>solr</defaultQuery> - + <!-- configure a healthcheck file for servers behind a loadbalancer <healthcheck type="file">server-enabled</healthcheck> --> </admin> -</config> +</config> \ No newline at end of file