首页 文章

在solr中声明多个实体时出错

提问于
浏览
0

我刚开始学习solr . 我已经安装了apache tomcat server和solr 3.5 . 我已经成功配置了solr,用于从oracle数据库中搜索一个实体的数据 . 当我在data-config.xml中添加两个实体并在solr schema.xml中添加字段时,我遇到了问题 . 我像这样配置了我的data-config.xml ...

<dataConfig>
    <dataSource name="JdbcDataSource" 
        driver="oracle.jdbc.driver.OracleDriver"
        url="jdbc:oracle:thin:@//192.168.1.3:1521/orcl" 
        user="SSOHANI" 
        password="Ssohani123"/>

    <document name="doc">
        <entity name="PROJECTS" 
                query="select PROJECTS.ID, PROJECTS.BATCH_ID, PROJECTS.OPERATION, PROJECTS.NAME,
                PROJECTS.DESCRIPTION, PROJECTS.ESTIMATED_COST, PROJECTS.GRANTOR_AGENCY_ID,
                PROJECTS.GRANTEE_AGENCY_ID, PROJECTS.PROJECT_STATUS_ID,
                PROJECTS.PROJECT_TYPE_ID, PROJECTS.START_DATE, PROJECTS.END_DATE,
                NVL(PROJECTS.TRACS_PARENT_PROJECT_ID,0) TRACS_PARENT_PROJECT_ID,
                NVL(PROJECTS.STATE_PARENT_PROJECT_ID,0) STATE_PARENT_PROJECT_ID,
                NVL(PROJECTS.PLAN_ID,0) PLAN_ID,
                NVL(PROJECTS.PLAN_ID_TYPE,0) PLAN_ID_TYPE,
                NVL(PROJECTS.TRACS_ID,0) TRACS_ID,
                NVL(PROJECTS.STATE_ID,0) STATE_ID,
                PROJECTS.VALID, PROJECTS.APPLIED,
                NVL(PROJECTS.COMMENTS,'NULL') COMMENTS,
                PROJECTS.GENERATED_PLAN_ID, PROJECTS.TRACS_PROJECT_ID,
                PROJECTS.STATE_PLAN_ID from SSOHANI.PROJECTS" >

        <field column="ID" name="projects_id" />
        <field column="BATCH_ID" name="projects_batch_id" />
        <field column="OPERATION" name="projects_operation" />
        <field column="NAME" name="projects_name" />
        <field column="DESCRIPTION" name="projects_description" />
        <field column="ESTIMATED_COST" name="projects_estimated_cost" />
        <field column="GRANTOR_AGENCY_ID" name="projects_grantor_agency_id" />
        <field column="GRANTEE_AGENCY_ID" name="projects_grantee_agency_id" />
        <field column="PROJECT_STATUS_ID" name="projects_project_status_id" />
        <field column="PROJECT_TYPE_ID" name="projects_project_type_id" />
        <field column="START_DATE" name="projects_start_date" />
        <field column="END_DATE" name="projects_end_date" />
        <field column="TRACS_PARENT_PROJECT_ID" name="projects_tracs_parent_project_id" />
        <field column="STATE_PARENT_PROJECT_ID" name="projects_state_parent_project_id" />
        <field column="PLAN_ID" name="projects_plan_id" />
        <field column="PLAN_ID_TYPE" name="projects_plan_id_type" />
        <field column="TRACS_ID" name="projects_tracs_id" />
        <field column="STATE_ID" name="projects_state_id" />
        <field column="VALID" name="projects_valid" />
        <field column="APPLIED" name="projects_applied" />
        <field column="COMMENTS" name="projects_comments" />
        <field column="GENERATED_PLAN_ID" name="projects_generated_plan_id" />
        <field column="TRACS_PROJECT_ID" name="projects_tracs_project_id" />
        <field column="STATE_PLAN_ID" name="projects_state_plan_id" />

        </entity>

    <entity name="PLANS" 
          query="select PLANS.ID, PLANS.BATCH_ID, PLANS.OPERATION, PLANS.NAME, PLANS.DESCRIPTION,
               PLANS.CONTACT_ID, PLANS.PLAN_TYPE_ID, PLANS.AGENCY_ID, PLANS.START_DATE,
               NVL(PLANS.END_DATE,0) END_DATE,
               NVL(PLANS.TRACS_PARENT_PLAN_ID,0) TRACS_PARENT_PLAN_ID,
               NVL(PLANS.STATE_PARENT_PLAN_ID,0) STATE_PARENT_PLAN_ID,
               NVL(PLANS.TRACS_ID,0) TRACS_ID,
               NVL(PLANS.STATE_ID,0) STATE_ID,          
               PLANS.VALID, PLANS.APPLIED,
               NVL(PLANS.COMMENTS,'NULL') COMMENTS from SSOHANI.PLANS" >

        <field column="ID" name="plans_id" />
        <field column="BATCH_ID" name="plans_batch_id" />
        <field column="OPERATION" name="plans_operation" />
        <field column="NAME" name="plans_name" />
        <field column="DESCRIPTION" name="plans_description" />
        <field column="CONTACT_ID" name="plans_contact_id" />
        <field column="PLAN_TYPE_ID" name="plans_plan_type_id" />       
        <field column="AGENCY_ID" name="plans_agency_id" />
        <field column="START_DATE" name="plans_start_date" />
        <field column="END_DATE" name="plans_end_date" />
        <field column="TRACS_PARENT_PLAN_ID" name="plans_tracs_parent_plan_id" />
        <field column="STATE_PARENT_PLAN_ID" name="plans_state_parent_plan_id" />
        <field column="TRACS_ID" name="plans_tracs_id" />
        <field column="STATE_ID" name="plans_state_id" />
        <field column="VALID" name="plans_valid" />
        <field column="APPLIED" name="plans_applied" />
        <field column="COMMENTS" name="plans_comments" />       
    </entity>

</document>   
</dataConfig>

我像这样配置了schema.xml ......

<schema>
<fields>

        <field name="projects_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_batch_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_operation" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_name" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_description" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_estimated_cost" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_grantor_agency_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_grantee_agency_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_project_status_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_project_type_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_start_date" type="date" indexed="true" stored="true" required="true"/>
        <field name="projects_end_date" type="date" indexed="true" stored="true" required="true"/>      
        <field name="projects_tracs_parent_project_id" type="long" indexed="true" stored="true" required="true"/>       
        <field name="projects_state_parent_project_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_plan_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_plan_id_type" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_tracs_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_state_id" type="long" indexed="true" stored="true" required="true"/>      
        <field name="projects_valid" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_applied" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_comments" type="string" indexed="true" stored="true" required="true"/>
        <field name="projects_generated_plan_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_tracs_project_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="projects_state_plan_id" type="long" indexed="true" stored="true" required="true"/>

                <!--         fields for plan enity -->

        <field name="plans_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_batch_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_operation" type="string" indexed="true" stored="true" required="true"/>
        <field name="plans_name" type="string" indexed="true" stored="true" required="true"/>
        <field name="plans_description" type="string" indexed="true" stored="true" required="true"/>
        <field name="plans_contact_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_plan_type_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_agency_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_start_date" type="date" indexed="true" stored="true" required="true"/>
        <field name="plans_end_date" type="date" indexed="true" stored="true" required="true"/>
        <field name="plans_tracs_parent_plan_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_state_parent_plan_id" type="string" indexed="true" stored="true" required="true"/>       
        <field name="plans_tracs_id" type="long" indexed="true" stored="true" required="true"/>     
        <field name="plans_state_id" type="long" indexed="true" stored="true" required="true"/>
        <field name="plans_valid" type="string" indexed="true" stored="true" required="true"/>
        <field name="plans_applied" type="string" indexed="true" stored="true" required="true"/>
        <field name="plans_comments" type="string" indexed="true" stored="true" required="true"/>

    </fields>

    <uniqueKey>projects_id</uniqueKey>
    <uniqueKey>plans_id</uniqueKey>
    <defaultSearchField>projects_id</defaultSearchField>
</schema>

我的solrconfig.xml是......

<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
      <lst name="defaults">
          <str name="config">/opt/solr/core0/conf/data-config.xml</str>
      </lst>
  </requestHandler>

现在,当我运行完全导入命令时,我收到以下错误..

Apr 16, 2012 4:11:46 PM org.apache.solr.handler.dataimport.SolrWriter upload
WARNING: Error creating document : SolrInputDocument[{projects_tracs_id=projects_tracs_id(1.0)={0}, projects_name=projects_name(1.0)={Minnesota Firearms Safety Training Program}, projects_description=projects_description(1.0)={To train 17,500 students and 425 new instructors at 45 recruiting workshops. Hold one statwide training academy. Award 2,650 recognition awards for length of service.}, projects_comments=projects_comments(1.0)={NULL}, projects_plan_id=projects_plan_id(1.0)={0}, projects_end_date=projects_end_date(1.0)={2002-12-31 00:00:00.0}, projects_tracs_parent_project_id=projects_tracs_parent_project_id(1.0)={0}, projects_plan_id_type=projects_plan_id_type(1.0)={0}, projects_project_status_id=projects_project_status_id(1.0)={4}, projects_state_plan_id=projects_state_plan_id(1.0)={1126}, projects_estimated_cost=projects_estimated_cost(1.0)={600000}, projects_valid=projects_valid(1.0)={N}, projects_grantor_agency_id=projects_grantor_agency_id(1.0)={1154}, projects_start_date=projects_start_date(1.0)={2001-12-31 00:00:00.0}, projects_applied=projects_applied(1.0)={N}, projects_state_id=projects_state_id(1.0)={0}, projects_batch_id=projects_batch_id(1.0)={1433468017}, projects_generated_plan_id=projects_generated_plan_id(1.0)={2050667163}, projects_id=projects_id(1.0)={2009553709}, projects_operation=projects_operation(1.0)={INSERT}, projects_state_parent_project_id=projects_state_parent_project_id(1.0)={0}, projects_grantee_agency_id=projects_grantee_agency_id(1.0)={1235}, projects_tracs_project_id=projects_tracs_project_id(1.0)={1123}, projects_project_type_id=projects_project_type_id(1.0)={3}}]
org.apache.solr.common.SolrException: [doc=2009553709] missing required field: plans_applied
    at org.apache.solr.update.DocumentBuilder.toDocument(DocumentBuilder.java:346)
    at org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:60)
    at org.apache.solr.update.processor.LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:115)
    at org.apache.solr.handler.dataimport.SolrWriter.upload(SolrWriter.java:73)
    at org.apache.solr.handler.dataimport.DataImportHandler$1.upload(DataImportHandler.java:293)
    at org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:636)
    at org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:268)
    at org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:187)
    at org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:359)
    at org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:427)
    at org.apache.solr.handler.dataimport.DataImporter$1.run(DataImporter.java:408)

solr无法读取第二个实体的任何字段 . 任何人都可以帮我解决这个问题..?请告诉我在配置data-config.xml或schema.xml或两个文件时我犯了什么错误 .

1 回答

  • 0

    虽然从堆栈跟踪看起来您的Plan文档中有一个缺少必需的plans_applied字段中的值,但我认为您需要注意的第一件事是不应该在Solr中对数据进行规范化 . 它应该在进入索引之前被夷为平地 .

    因此,不应将这两个表索引为单独的实体,而应在这两个表之间创建一个连接(直接在data-config.xml中,而不是查询),以便每个结果(join)表行成为Solr文档 .

    这样,当你想获得关于单个项目的所有数据时,它们都将在一个文档中 - 不需要为这样的用例加入连接 .

    在Solr中,你应该拥抱冗余,而不是关系和约束 .

    说得通?

相关问题