2015-01-22 6 views
0

HTML 컨텐츠의 모든 이미지를 검색하기위한 작은 프로그램을 작성했습니다.주어진 HTML 파일 소스에서 모든 이미지 src 세부 정보를 다시 얻는 방법

IMG PATH : http://www.Newsletterservices.in/Templates/HNY_003/images/greet_header.jpg 
IMG OLD NAME : greet_header.jpg 
IMG EXT : .jpg 
IMG PATH : http://www.Newsletterservices.in/Templates/HNY_003/images/left_happy_banner.jpg 
IMG OLD NAME : left_happy_banner.jpg 
IMG EXT : .jpg 
IMG PATH : http://www.Newsletterservices.in/Templates/HNY_003/images/greet_footer.jpg 
IMG OLD NAME : greet_footer.jpg 
IMG EXT : .jpg 

는 이제 복잡한 작업을 수행합니다 여기

void Main() 
{ 
    string body = @"<p></p><p></p><p><title></title></p><tablecellpadding='0'cellspacing='0'style='width:100%;'width='100%'><tbody><tr><tdalign='center'style='vertical-align:top;text-align:center;'valign='top'><tablecellpadding='0'cellspacing='0'style='width:600px;'width='600px'><tbody><tr><tdalign='left'background='#CBE8F8'colspan='2'height='143px'style='background-color:#cbe8f8;vertical-align:top;text-align:left;border-width:1px1px0px;border-style:solidsolidnone;border-color:#a7a7a7;'valign='top'><imgalt=''height='143'src='http://www.Newsletterservices.in/Templates/HNY_003/images/greet_header.jpg'width='600'/></td></tr><tr><tdalign='left'background='#CBE8F8'style='background-color:#cbe8f8;width:50%;text-align:left;vertical-align:top;border-left:1pxsolid#a7a7a7;'valign='middle'><imgalt=''height='146'src='http://www.Newsletterservices.in/Templates/HNY_003/images/left_happy_banner.jpg'width='289'/></td><tdalign='left'background='#CBE8F8'style='background-color:#cbe8f8;width:50%;border-right:1pxsolid#a7a7a7;text-align:left;vertical-align:top;'valign='top'><spanstyle='font-family:verdana,'mssansserif';color:#024e9b;font-size:11px;'>DearAllUsers,<br/><br/>Asweallwelcomethenewyearaheadandprayforpeaceandhappiness,wesendyouourwarmestwishes.<br/><br/>Maythenewyearbethebeginningofabettertomorrow,thejourneyduringtheyearfilledwithjoyandmayeachdayintheyearaheadbefilledwithreasonstocelebrate.<br/><br/>WewishyouandyourfamilyaverywonderfulNewYear.<br/><br/>Love,<br/>ElectrocomSoftwarePVT.LTD</span></td></tr><tr><tdalign='left'background='#CBE8F8'colspan='2'height='161px'style='background-color:#cbe8f8;vertical-align:top;text-align:left;height:161px;border-width:0px1px1px;border-style:nonesolidsolid;'valign='top'><imgalt=''height='161'src='http://www.Newsletterservices.in/Templates/HNY_003/images/greet_footer.jpg'width='600'/></td></tr><tr><tdalign='left'colspan='2'height='5px'style='vertical-align:top;text-align:left;height:5px;'valign='top'><imgalt=''height='5'src='http://www.Newsletterservices.in/Templates/HNY_003/images/spacer.gif'width='600'/></td></tr><tr><tdalign='left'colspan='2'height='30px'style='vertical-align:top;text-align:left;height:30px;background-color:#ffffff;border:1pxsolid#a7a7a7;padding:5px;'valign='top'><tablecellpadding='0'cellspacing='0'style='width:100%;'width='100%'><tbody><tr><tdstyle='border-right:1pxsolid#a7a7a7;width:50%;'width='50%'><divstyle='margin:5px5px5px8px;'><spanstyle='font-size:22px;'><spanstyle='color:rgb(0,88,132);font-family:trebuchetms,verdana,'mssansserif';font-weight:bold;'>ELECTROCOMSOFTWAREPVT.LTD</span></span></div></td><tdstyle='width:50%;'width='50%'><divstyle='margin:5px5px5px8px;'><spanstyle='font-family:verdana,'mssansserif';color:#333333;font-size:11px;white-space:normal;'><b>Address:</b>505,EASYOFFICE,SUKHSAGARCOMPLEX,NEAR.FORTUNELANFMARKHOTEL</span><br/>ASHRAMROAD</div><divstyle='margin:6px5px5px8px;'><spanstyle='font-family:verdana,'mssansserif';color:#333333;font-size:11px;white-space:normal;'><b>Phone:</b><imgalt=''height='1'src='http://www.Newsletterservices.in/Templates/HNY_003/images/spacer12px.gif'style='width:12px;height:1px;'width='12'/></span></div><divstyle='margin:6px5px5px8px;'><spanstyle='font-family:verdana,'mssansserif';color:#333333;font-size:11px;white-space:normal;'><b>FAX:</b><imgalt=''height='1'src='http://www.Newsletterservices.in/Templates/HNY_003/images/spacer26px.gif'style='width:26px;height:1px;'width='26'/>##UserFax##</span></div><divstyle='margin:6px5px5px8px;'><spanstyle='font-family:verdana,'mssansserif';color:#333333;font-size:11px;white-space:normal;'><b>Email:</b><imgalt=''height='1'src='http://www.Newsletterservices.in/Templates/HNY_003/images/spacer18px.gif'style='width:18px;height:1px;'width='18'/>[email protected]</span></div><divstyle='margin:6px5px5px8px;'><spanstyle='font-family:verdana,'mssansserif';color:#333333;font-size:11px;white-space:normal;'><b>Visitus:</b>www.electrocom.in</span></div></td></tr></tbody></table></td></tr></tbody></table></td></tr></tbody></table><p></p>"; 
    string fbody = Regex.Replace(body, @"\s+", string.Empty); 
    do 
    { 
    int src = fbody.IndexOf("src"); 
      if (src != -1) 
      { 
       fbody = fbody.Remove(0, src + 5); 
       //var dot = Regex.Match(fbody,@"\.(jpg|gif|doc|pdf)$"); 
       int dot = fbody.IndexOf(".jpg"); 
       if (dot != -1) 
       { 
        Console.WriteLine("IMG PATH : {0} \nIMG OLD NAME : {1} \nIMG EXT : {2}",fbody.Substring(0, dot + 4),Path.GetFileName(fbody.Substring(0, dot + 4)), Path.GetExtension(fbody.Substring(0, dot + 4))); 
       } 
      fbody = fbody.Remove(0,dot + 4); 
     } 
    } 
    while(fbody.Contains("src")); 
} 

성공 내 결과입니다 : 여기

특정 요구 사항에 대한 내 첫 번째 프로그램이다 : 나는 일반적인 테스트 LINQPad이 같은이 프로그램을 작성 더 많은 파일 확장자를 사용하여 작업을 검색하십시오.

void Main() 
{ 
    string body = @"<p></p><p></p><p><title></title></p><tablecellpadding='0'cellspacing='0'style='width:100%;'width='100%'><tbody><tr><tdalign='center'style='vertical-align:top;text-align:center;'valign='top'><tablecellpadding='0'cellspacing='0'style='width:600px;'width='600px'><tbody><tr><tdalign='left'background='#CBE8F8'colspan='2'height='143px'style='background-color:#cbe8f8;vertical-align:top;text-align:left;border-width:1px1px0px;border-style:solidsolidnone;border-color:#a7a7a7;'valign='top'><imgalt=''height='143'src='http://www.Newsletterservices.in/Templates/HNY_003/images/greet_header.jpg'width='600'/></td></tr><tr><tdalign='left'background='#CBE8F8'style='background-color:#cbe8f8;width:50%;text-align:left;vertical-align:top;border-left:1pxsolid#a7a7a7;'valign='middle'><imgalt=''height='146'src='http://www.Newsletterservices.in/Templates/HNY_003/images/left_happy_banner.jpg'width='289'/></td><tdalign='left'background='#CBE8F8'style='background-color:#cbe8f8;width:50%;border-right:1pxsolid#a7a7a7;text-align:left;vertical-align:top;'valign='top'><spanstyle='font-family:verdana,'mssansserif';color:#024e9b;font-size:11px;'>DearAllUsers,<br/><br/>Asweallwelcomethenewyearaheadandprayforpeaceandhappiness,wesendyouourwarmestwishes.<br/><br/>Maythenewyearbethebeginningofabettertomorrow,thejourneyduringtheyearfilledwithjoyandmayeachdayintheyearaheadbefilledwithreasonstocelebrate.<br/><br/>WewishyouandyourfamilyaverywonderfulNewYear.<br/><br/>Love,<br/>ElectrocomSoftwarePVT.LTD</span></td></tr><tr><tdalign='left'background='#CBE8F8'colspan='2'height='161px'style='background-color:#cbe8f8;vertical-align:top;text-align:left;height:161px;border-width:0px1px1px;border-style:nonesolidsolid;'valign='top'><imgalt=''height='161'src='http://www.Newsletterservices.in/Templates/HNY_003/images/greet_footer.jpg'width='600'/></td></tr><tr><tdalign='left'colspan='2'height='5px'style='vertical-align:top;text-align:left;height:5px;'valign='top'><imgalt=''height='5'src='http://www.Newsletterservices.in/Templates/HNY_003/images/spacer.gif'width='600'/></td></tr><tr><tdalign='left'colspan='2'height='30px'style='vertical-align:top;text-align:left;height:30px;background-color:#ffffff;border:1pxsolid#a7a7a7;padding:5px;'valign='top'><tablecellpadding='0'cellspacing='0'style='width:100%;'width='100%'><tbody><tr><tdstyle='border-right:1pxsolid#a7a7a7;width:50%;'width='50%'><divstyle='margin:5px5px5px8px;'><spanstyle='font-size:22px;'><spanstyle='color:rgb(0,88,132);font-family:trebuchetms,verdana,'mssansserif';font-weight:bold;'>ELECTROCOMSOFTWAREPVT.LTD</span></span></div></td><tdstyle='width:50%;'width='50%'><divstyle='margin:5px5px5px8px;'><spanstyle='font-family:verdana,'mssansserif';color:#333333;font-size:11px;white-space:normal;'><b>Address:</b>505,EASYOFFICE,SUKHSAGARCOMPLEX,NEAR.FORTUNELANFMARKHOTEL</span><br/>ASHRAMROAD</div><divstyle='margin:6px5px5px8px;'><spanstyle='font-family:verdana,'mssansserif';color:#333333;font-size:11px;white-space:normal;'><b>Phone:</b><imgalt=''height='1'src='http://www.Newsletterservices.in/Templates/HNY_003/images/spacer12px.gif'style='width:12px;height:1px;'width='12'/></span></div><divstyle='margin:6px5px5px8px;'><spanstyle='font-family:verdana,'mssansserif';color:#333333;font-size:11px;white-space:normal;'><b>FAX:</b><imgalt=''height='1'src='http://www.Newsletterservices.in/Templates/HNY_003/images/spacer26px.gif'style='width:26px;height:1px;'width='26'/>##UserFax##</span></div><divstyle='margin:6px5px5px8px;'><spanstyle='font-family:verdana,'mssansserif';color:#333333;font-size:11px;white-space:normal;'><b>Email:</b><imgalt=''height='1'src='http://www.Newsletterservices.in/Templates/HNY_003/images/spacer18px.gif'style='width:18px;height:1px;'width='18'/>[email protected]</span></div><divstyle='margin:6px5px5px8px;'><spanstyle='font-family:verdana,'mssansserif';color:#333333;font-size:11px;white-space:normal;'><b>Visitus:</b>www.electrocom.in</span></div></td></tr></tbody></table></td></tr></tbody></table></td></tr></tbody></table><p></p>"; 
    string fbody = Regex.Replace(body, @"\s+", string.Empty); 
    do 
    { 
    int src = fbody.IndexOf("src"); 
      if (src != -1) 
      { 
       fbody = fbody.Remove(0, src + 5); 
       var dot = Regex.Match(fbody,@"\.(jpg|gif|doc|pdf)$"); 
       //int dot = fbody.IndexOf(".jpg"); 
       if (dot.Success) 
       { 
        Console.WriteLine("IMG PATH : {0} \nIMG OLD NAME : {1} \nIMG EXT : {2}",fbody.Substring(0, dot.Index + 4),Path.GetFileName(fbody.Substring(0, dot.Index + 4)), Path.GetExtension(fbody.Substring(0, dot.Index + 4))); 
       } 
      fbody = fbody.Remove(0,dot.Index + 4); 
     } 
    } 
    while(fbody.Contains("src")); 
} 

그녀 e LINQPad 결과에는 아무 것도 없습니다. 아무도 정규식 검증을 통해 여러 파일 확장자 확인을 위해이 두 번째 프로그램 결과로 첫 번째 프로그램 결과를 얻은 방법에 대한 아이디어가 있습니까? 이 몸 .JPG 또는 .gif 참고 또는 .DOC 또는 .PDF 유하지 않고 내 시간을 저장

답변

2

은 정규식의 끝에서 $ 기호를 제거하십시오. 감사..
+0

위대한로 끝나는 경우에만 일치하는 의미로 –