2015-02-05 2 views
0

혼자 mwikipage을에서 인덱스 하나 이상의 엔티티로, 다른 모든 매핑 된 엔티티는SOLR에 의해 색인되는 방법 ApacheSolr

데이터-config.xml에, 색인을하는 동안 ApacheSolr 무시/인덱싱되지 않습니다

<dataConfig> 
<dataSource batchSize="1" driver="com.mysql.jdbc.Driver" password="123" type="JdbcDataSource" url="jdbc:mysql://localhost:3306/cwiki" user="root"/> 
<document name="mediawiki-doc"> 
    <!-- mwiki page --> 
    <entity name="mwikipage" query="SELECT * FROM mwikipage"> 
     <field column="page_id" name="id"/> 
     <field column="page_title" name="page_title"/> 
     <field column="page_latest" name="page_latest"/> 
     <field column="page_counter" name="page_counter"/> 
     <!-- mwiki page links --> 
     <entity name="mwikipagelinks" 
      deltaQuery="SELECT pl_from FROM mwikipagelinks WHERE last_modified &gt;'${dataimporter.last_index_time}'" 
      parentDeltaQuery="SELECT page_id FROM mwikipage WHERE page_id = '${mwikipagelinks.pl_from}'" 
      query="SELECT * FROM mwikipagelinks WHERE pl_from = '${ mwikipage.page_id }'"> 
      <field column="pl_namespace" name="pl_namespace"/> 
      <field column="pl_title" name="pl_title"/> 
     </entity> 
     <!-- mwiki page redirects --> 
     <entity name="mwikiredirect" query="SELECT * FROM mwikiredirect WHERE rd_from = '${ mwikipage.page_id }'"> 
      <field column="rd_namespace" name="rd_namespace"/> 
      <field column="rd_title" name="rd_title"/> 
      <field column="rd_title" name="rd_title"/> 
     </entity> 
     <!-- mwiki page revisions --> 
     <entity name="mwikirevision" query="SELECT * FROM mwikirevision WHERE rev_id = '${ mwikipage.page_latest }'"> 
      <field column="rev_id" name="rev_id"/> 
      <field column="rev_page" name="rev_page"/> 
      <field column="rev_text_id" name="rev_text_id"/> 
      <!-- mwiki page texts --> 
      <entity name="mwikitext" query="select * from mwikitext WHERE old_id = '${ mwikirevision.rev_text_id }'"> 
       <field column="old_id" name="old_id"/> 
       <field column="old_text" name="old_text"/> 
       <field column="old_flags" name="old_flags"/> 
      </entity> 
     </entity> 
     <!-- mwiki category links --> 
     <entity name="mwikicategorylinks" query="SELECT * FROM mwikicategorylinks WHERE cl_from = '${ mwikipage.page_id }'"> 
      <field column="cl_from" name="cl_from"/> 
      <field column="cl_to" name="cl_to"/> 
     </entity> 
     <!-- mwiki external links --> 
     <entity name="mwikiexternallinks" query="SELECT * FROM mwikiexternallinks where el_from = '${ mwikipage.page_id }'"> 
      <field column="el_from" name="el_from"/> 
      <field column="el_to" name="el_to"/> 
      <field column="el_index" name="el_index"/> 
     </entity> 
    </entity> 
</document> 

shcema.xml

,536,913,632 10
<!-- Cwiki fields --> 
<!-- MWIKI PAGE --> 
<!-- <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false"/> --> 
<field name="page_title" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<field name="page_latest" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<field name="page_counter" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<!-- MWIKI PAGELINKS --> 
<field name="pl_namespace" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<field name="pl_title" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<!-- MWIKI REDIRECT --> 
<field name="rd_namespace" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<field name="rd_title" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<!-- MWIKI REVISION --> 
<field name="rev_id" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<field name="rev_page" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<field name="rev_text_id" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<!-- MWIKI TEXT--> 
<field name="old_id" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<field name="old_text" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<field name="old_flags" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<!-- MWIKI CATEGORY LINKS --> 
<field name="cl_from" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<field name="cl_to" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<!-- MWIKI CATEGORY LINKS --> 
<field name="el_from" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<field name="el_to" type="text_general" indexed="true" stored="true" multiValued="true"/> 
<field name="el_index" type="text_general" indexed="true" stored="true" multiValued="true"/> 

문서 고유성 집행자

<uniqueKey>id</uniqueKey> 

결과

Last Update: 13:24:39 Indexing completed. 
Added/Updated: 530 documents. Deleted 0 documents. (Duration: 1s) 
Requests: 2,651 (2,651/s), Fetched: 530 (530/s), Skipped: 0, Processed: 530 (530/s) 

mwikipage 엔티티 색인 조회 할 때, 그리고 쿼리 결과는 다음과 같다,

"docs": [ 
    { 
    "id": "1", 
    "page_title": [ 
     "Main_Page" 
    ], 
    "page_latest": [ 
     "1247" 
    ], 
    "page_counter": [ 
     "5223" 
    ], 
    "_version_": 1492251847862255600 
    } 

다른 엔티티는 무시됩니다. 개요를 색인/문서화하는 방법? $ {mwikipage.page_id} 에

감사와 안부, Bachan

+0

CirrusSearch의 스키마를 살펴 보셨습니까? https://www.mediawiki.org/wiki/Extension:CirrusSearch – Nemo

답변

1

메이크업의 $ {mwikipage.page_id은} 즉 중괄호에 공백을 제거합니다.

+0

나는 장엄한 텍스트 3의 코드 서식 지정을 위해 xml 자동 들여 쓰기를 사용하고 있습니다. 자동 서식은 자동 들여 쓰기를 할 때마다 공백을 남깁니다 .. thanks @Dheerendra Kulkarni – BalaajiChander