ElasticSearch NEST 집계

나는 googeling을 많이했고 NEST와 ElasticSearch에 대한 문서를 확인했지만 실제로 문제를 찾지 못했습니다./내 문제를 해결할 수 없었습니다.ElasticSearch NEST 집계

예제를 만들었습니다. 이 예에서는 가족 당 급여의 별개 Last_Names 및 SUM 수를 쿼리하려고합니다.

class Employee 
    { 
     public string First_Name { get; set; } 
     public string Last_Name { get; set; } 
     public int Salary { get; set; } 

     public Employee(string first_name, string last_name, int salary) 
     { 
      this.First_Name = first_name; 
      this.Last_Name = last_name; 
      this.Salary = salary; 
     } 
     public Employee() { } 
    } 
    private void button4_Click(object sender, EventArgs e) 
    { 
     // Create 4 employees 
     Employee al = new Employee("Al", "Bundy", 1500); 
     Employee bud = new Employee("Bud", "Bundy", 975); 
     Employee marcy = new Employee("Marcy", "Darcy", 4500); 
     Employee jefferson = new Employee("Jefferson", "Darcy", 0); 

     // add the 4 employees to the index 
     client.Index<Employee>(al); 
     client.Index<Employee>(bud); 
     client.Index<Employee>(marcy); 
     client.Index<Employee>(jefferson); 

     // query the index 
     var result = client.Search<Employee>(s => s 
      .Aggregations(a => a 
       .Terms("Families", ts => ts 
        .Field(o => o.Last_Name) 
        .Size(10) 
        .Aggregations(aa => aa 
         .Sum("FamilySalary", sa => sa 
          .Field(o => o.Salary) 
         ) 
        ) 
       ) 
      ) 
     ); 

     // Get the number of different families (Result should be 2: Bundy and Darcy) 
     // and get the family-salary of family Bundy and the family-salary for the Darcys 
     var names = result.Aggs.Terms("Families"); 
     // ?? var x = names.Sum("Bundy");   
    }

나는 탄성에서 다음과 같은 정보가 필요합니다
* 인덱스에 두 개의 서로 다른 가족
* 가족 번디는 2475
* 가족 다아시가 도와주세요 4500

벌고 벌고있다

출처

2017-12-22 Markus1980Wien

다음은 완전한 예입니다.

private static void Main() 
{ 
    var defaultIndex = "employees"; 

    var settings = new ConnectionSettings(new Uri("http://localhost:9200")) 
     .InferMappingFor<Employee>(i => i 
      .IndexName(defaultIndex) 
     ) 
     .DefaultIndex(defaultIndex) 
     // following settings are useful while developing 
     // but probably don't want to use them in production 
     .DisableDirectStreaming() 
     .PrettyJson() 
     .OnRequestCompleted(callDetails => 
     { 
      if (callDetails.RequestBodyInBytes != null) 
      { 
       Console.WriteLine(
        $"{callDetails.HttpMethod} {callDetails.Uri} \n" + 
        $"{Encoding.UTF8.GetString(callDetails.RequestBodyInBytes)}"); 
      } 
      else 
      { 
       Console.WriteLine($"{callDetails.HttpMethod} {callDetails.Uri}"); 
      } 

      Console.WriteLine(); 

      if (callDetails.ResponseBodyInBytes != null) 
      { 
       Console.WriteLine($"Status: {callDetails.HttpStatusCode}\n" + 
         $"{Encoding.UTF8.GetString(callDetails.ResponseBodyInBytes)}\n" + 
         $"{new string('-', 30)}\n"); 
      } 
      else 
      { 
       Console.WriteLine($"Status: {callDetails.HttpStatusCode}\n" + 
         $"{new string('-', 30)}\n"); 
      } 
     }); 

    var client = new ElasticClient(settings); 

    if (client.IndexExists(defaultIndex).Exists) 
     client.DeleteIndex(defaultIndex); 

    client.CreateIndex(defaultIndex, c => c 
     .Settings(s => s 
      .NumberOfShards(1) 
     ) 
     .Mappings(m => m 
      .Map<Employee>(mm => mm 
       .AutoMap() 
      ) 
     ) 
    ); 

    // Create 4 employees 
    var al = new Employee("Al", "Bundy", 1500); 
    var bud = new Employee("Bud", "Bundy", 975); 
    var marcy = new Employee("Marcy", "Darcy", 4500); 
    var jefferson = new Employee("Jefferson", "Darcy", 0); 

    client.IndexMany(new [] { al, bud, marcy, jefferson }); 

    // refresh the index after indexing. We do this here for example purposes, 
    // but in a production system, it's preferable to use the refresh interval 
    // see https://www.elastic.co/blog/refreshing_news 
    client.Refresh(defaultIndex); 

    // query the index 
    var result = client.Search<Employee>(s => s 
     .Aggregations(a => a 
      .Terms("Families", ts => ts 
       .Field(o => o.Last_Name.Suffix("keyword")) // use the keyword sub-field for terms aggregation 
       .Size(10) 
       .Aggregations(aa => aa 
        .Sum("FamilySalary", sa => sa 
         .Field(o => o.Salary) 
        ) 
       ) 
      ) 
     ) 
    ); 

    // Get the number of different families (Result should be 2: Bundy and Darcy) 
    // and get the family-salary of family Bundy and the family-salary for the Darcys 
    var names = result.Aggs.Terms("Families"); 

    foreach(var name in names.Buckets) 
    { 
     var sum = name.Sum("FamilySalary"); 
     Console.WriteLine($"* family {name.Key} earns {sum.Value}"); 
    } 
} 

public class Employee 
{ 
    public string First_Name { get; set; } 
    public string Last_Name { get; set; } 
    public int Salary { get; set; } 

    public Employee(string first_name, string last_name, int salary) 
    { 
     this.First_Name = first_name; 
     this.Last_Name = last_name; 
     this.Salary = salary; 
    } 
    public Employee() { } 
}

이 출력

가족 번디는 번다 2475

가족 다씨는 4500

몇 가지 포인트를 벌고 : I 명시 적으로 인덱스를 생성 한

automapping을 사용하는 직원 매핑 이 예제는 인덱스 나 명시 적 매핑 없이는 작동하지만, 콘솔 출력에서 생성 된 것을 볼 수 있도록 명확하게하기 위해 추가했습니다. 직원이 필요에 맞게 매핑되는 방식을 변경할 수 있습니다.
대량 요청시 모든 문서 색인
대량 색인 생성 후 색인을 새로 고칩니다. 프로덕션 시스템에서는 인덱싱 작업 후에 refresh을 호출하지 않습니다. Lucene 세그먼트가 기본 역 색인에 기록되기 때문입니다. 세그먼트를 병합하는 백그라운드 프로세스가 있지만 having many small segments can be a problem. 새로 고침 간격을 설정하는 것이 가장 좋습니다. 새로 고침은 문서를 사용하여 다음 색인을 검색 할 수있게하기 위해서만 호출됩니다.
집계 용어는 string 속성을 매핑 할 때 자동 매핑을 사용하여 만든 keyword sub field에서 실행해야합니다. 키워드 필드 데이터 유형은 축약어을 색인화하고 집계 및 정렬에 잘 작동하는 원주 자료 구조 인 doc 값을 활용합니다.
집합이라는 용어에는 버킷 집합이 들어 있으며 각 버킷 키는 용어입니다. 각 버킷에는 하위 집계가있을 수 있습니다. 이 경우 집계 합계입니다.

출처

2017-12-22 12:47:27

답변

관련 문제