2014-01-27 2 views
8

IndexWriter에 대한 코드 언급을 추가했습니다.Lucene.NET이 대용량 파일을 인덱싱 할 때 OutOfMemoryException을 발생시키는 이유는 무엇입니까?

가 나는 OutOfMemoryException(OOMException)을 피하기 위해

writer.SetRAMBufferSizeMB(32); 
writer.MergeFactor = 1000; 
writer.SetMaxFieldLength(Int32.MaxValue); 
writer.UseCompoundFile = false; 

모든 속성을 설정했습니다.

여기서이 코드의 writer.AddDocument(document);은 OOM 예외를 보여줍니다.

왜 내가이 오류 메시지를 표시 할 수 있습니까?
누구나 해결할 수 있습니까?

내 컴퓨터 구성 :
시스템 유형 : 64 비트 운영 체제.
RAM : 4 GB (사용 가능한 3.86 GB)
프로세서 : 인텔의 i5 - 3230M CPU의 @의 2.60GHz 단일 문서로 전체 데이터베이스를 추가하는 것 같습니다

using System; 
using System.Data.SqlClient; 
using Lucene.Net.Documents; 
using System.Data; 
using Lucene.Net.Analysis.Standard; 
using Lucene.Net.Search; 
using Lucene.Net.Store; 
using Lucene.Net.QueryParsers; 

namespace ConsoleApplication1 
{ 
    class Program 
    { 
     static String searchTerm = ""; 
     static void Main(string[] args) { 
      /** 
      * This will create dataset according to 
      * connectingString and query 
      **/ 
      Console.WriteLine("Connecting to Sql database server."); 
      String connectionString = "Data Source=proxy-pc;Initial Catalog=Snomed; User   ID=SA;password=admin"; 
      String query = "SELECT * FROM DESCRIPTION"; 
      String INDEX_DIRECTORY = "c:\\DatabaseIndex"; 

      Console.WriteLine("Creating dataset."); 
      DataSet dataSet = createDataset(connectionString, query); 
      Console.WriteLine("Created dataset successfully."); 

      Console.WriteLine("Creating document."); 
      Document document = createDocument(dataSet); 
      Console.WriteLine("Created document successfully."); 

      var version = Lucene.Net.Util.Version.LUCENE_30; 
      var length = Lucene.Net.Index.IndexWriter.MaxFieldLength.LIMITED; 
      Lucene.Net.Analysis.Standard.StandardAnalyzer analyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(version); 
      Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.Open(new System.IO.DirectoryInfo(@INDEX_DIRECTORY)); 
      Lucene.Net.Index.IndexWriter writer = new Lucene.Net.Index.IndexWriter(directory, analyzer, length); 
      writer.SetMergeScheduler(new Lucene.Net.Index.SerialMergeScheduler()); 
      writer.SetRAMBufferSizeMB(32); 
      writer.MergeFactor = 1000; 
      writer.SetMaxFieldLength(Int32.MaxValue); 
      writer.UseCompoundFile = false; 
      Console.WriteLine("Before Adding document"); 
      **writer.AddDocument(document); ** 
      Console.WriteLine("Indexing..."); 
      writer.Optimize(); 
      writer.Dispose(); 
      Console.WriteLine("Indexing finished"); 

      if (searchTerm == "") 
      { 
       searchTerm = "(keyword)"; 
      } 

      Console.WriteLine("Searching '" + searchTerm + "'..."); 

      var occurance = searchKeyword(INDEX_DIRECTORY, version, searchTerm); 

      if (occurance != -1) 
      { 
       Console.WriteLine("Your search found : " + occurance); 
      } 
      else 
      { 
       Console.WriteLine("Invalid index directory."); 
      } 

      Console.Read(); 
     } 

     /** 
     * Method works as a searcher 
     **/ 
     private static int searchKeyword(String index_Directory_Path, Lucene.Net.Util.Version version, String searchWord) { 
      if (index_Directory_Path != null) 
      { 
       var standAnalyzer = new StandardAnalyzer(version); 
       IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(index_Directory_Path)); 

       // parse the query, "term" is the default field to search 
       var parser = new QueryParser(version, "term", standAnalyzer); 
       Query searchQuery = parser.Parse(searchWord); 

       // search 
       TopDocs hits = searcher.Search(searchQuery, 100); 
       var total = hits.TotalHits; 
       return total; 
      } 

      else 
      { 
       return -1; 
      } 
     } 

     static DataSet createDataset(String connectionString, String query) { 
      DataSet ds = new DataSet(); 

      using (SqlConnection connection = new SqlConnection(connectionString)) 
      using (SqlCommand command = new SqlCommand(query, connection)) 
      using (SqlDataAdapter adapter = new SqlDataAdapter(command)) 
      { 
       adapter.Fill(ds); 
      } 

      return ds; 
     } 

     static Lucene.Net.Documents.Document createDocument(DataSet dataSet) { 
      Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); 
      using (dataSet) 
      { 
       foreach (DataTable table in dataSet.Tables) 
       { 
        foreach (DataRow row in table.Rows) 
        { 
         String id = row["id"].ToString(); 
         String rTime = row["rTime"].ToString(); 
         String active = row["active"].ToString(); 
         String mId = row["mId"].ToString(); 
         String cId = row["cId"].ToString(); 
         String lCode = row["lCode"].ToString(); 
         String tId = row["tId"].ToString(); 
         String detail = row["detail"].ToString(); 
         String sId = row["sId"].ToString(); 

         Field idField = new Field("id", id, Field.Store.YES, Field.Index.ANALYZED); 
         Field rTimeField = new Field("rTime", rTime, Field.Store.YES, Field.Index.ANALYZED); 
         Field activeField = new Field("active", active, Field.Store.YES, Field.Index.ANALYZED); 
         Field mIdField = new Field("mId", mId, Field.Store.YES, Field.Index.ANALYZED); 
         Field cIdField = new Field("cId", cId, Field.Store.YES, Field.Index.ANALYZED); 
         Field lCodeField = new Field("lCode", lCode, Field.Store.YES, Field.Index.ANALYZED); 
         Field tIdField = new Field("tId", tId, Field.Store.YES, Field.Index.ANALYZED); 
         Field detailField = new Field("detail", detail, Field.Store.YES, Field.Index.ANALYZED); 
         Field sIdField = new Field("sId", sId, Field.Store.YES, Field.Index.ANALYZED); 

         doc.Add(idField); 
         doc.Add(rTimeField); 
         doc.Add(activeField); 
         doc.Add(mIdField); 
         doc.Add(cIdField); 
         doc.Add(lCodeField); 
         doc.Add(tIdField); 
         doc.Add(detailField); 
         doc.Add(sIdField); 
        } 
       } 
      } 

      return doc; 
     } 
    } 
} 

답변

1

.

각 행을 별도의 문서로 추가해 보았습니까? 아마도 "createDocument"를 "createDocuments"로 변경하고 행당 하나의 Lucene.Net 문서를 생성 할 수 있습니다. 현재 코드의 대부분을 변경하지 않고 그대로두면 ...

호프가 도움이 되었으면

+0

감사합니다. Adrian Conlon! –