2013-07-14 2 views

답변

1

Example.com은 단 하나의 링크 만 있으므로 아주 좋은 예는 아닙니다!

<?xml version="1.0" encoding="UTF-8"?> 

<config> 
     <!-- 1: provide inputs   --> 
     <script><![CDATA[ 
       url="http://stackoverflow.com/questions/17635763/trying-to-extract-urls-from-a-website-using-web-harvest"; 

       output_path = "C:/webharvest/"; 
       file_name = "urllist.txt";    
       output_file = output_path + file_name;     

      ]]></script> 

     <!-- 5 : save the resulting list in a variable  -->  
     <var-def name="urls"> 
      <!-- 4 : select only links (outputs a list variable)   -->  
      <xpath expression='//a/@href'> 
       <!-- 3 : convert it to XML, for querying   --> 
       <html-to-xml> 
        <!-- 2 : load the page  --> 
        <http url="${url}"/> 
       </html-to-xml> 
      </xpath> 
     </var-def> 

     <!-- 7: write to output file   --> 
     <file action="write" path="${output_file}"> 
      <!-- 6 : convert the list variable into a string with each link on a new line  --> 
      <text delimiter="${sys.cr}${sys.lf}"> 
      <var name="urls" /> 
      </text> 
     </file>    

</config> 
: :)

는 여기에 몇 가지 주석을 내 코드입니다