노드 js azure SDK getBlobToStream은 많은 양의 메모리를 사용합니다.

특정 Azure 계정의 모든 blob 컨테이너에있는 모든 blob을 다운로드하는 백업 스크립트를 작성하고 있습니다.노드 js azure SDK getBlobToStream은 많은 양의 메모리를 사용합니다.

스크립트는 async.js를 사용하여 너무 많은 스레드 만 동시에 실행되어 서버에 과부하가 걸리지 않도록합니다. 이 스크립트를 실행하면 잘 작동하지만 큰 파일에 도달하면 메모리가 부족합니다. 나는 디스크가 쓸 수있는 것보다 빠르게 다운로드가 실행되는 것을 추측하고 있으며, 결국 메모리상의 버퍼를 가득 채우며 메모리를 완전히 소모하지만 정확한 원인을 디버깅하는 것은 지금까지 불가능했습니다.

다음과 같이 많은 메모리가 호출 사용 나타나는 특정 기능 :

blobService.getBlobToStream(
    containerName, 
    blob.name, 
    fs.createWriteStream(fullPath), 
    function(error) { 
    if(error){ //Something went wrong, write it to the console but finish the queue item and continue. 
    console.log("Failed writing " + blob.name + " (" + error + ")"); 
    callback(); 
    } 
    else if(!error) { //Write the last modified date and finish the queue item silently 
    fs.writeFile(fullPath + ".date", blobLastModified, function(err) 
    { if(err) console.log("Couldn't write .date file: " + err); }); 
    callback(); 
    } 
    });

심지어 하나의 700메가바이트 다운로드 쉽게 내 옆에 1GB 메모리를 채울 것입니다.

이 문제가 발생합니까? Azure SDK가 모든 것을 버퍼링하고 부엌 싱크대를 마술처럼 막는 매개 변수가 누락 되었습니까?

전체 코드 : 당신이 가능하게 스트림 대신 전체 BLOB 데이터에 데이터의 덩어리를 읽을 할 수

#!/usr/bin/env node 

//Requires 
var azure = require('azure'); 
var fs = require('fs'); 
var mkdirp = require('mkdirp'); 
var path = require('path'); 
var async = require('async'); 

var maxconcurrency = 1; //Max amount of simultaneous running threads of getBlobsAndSaveThem() running through async.js. 

var blobService = azure.createBlobService(); 

backupPrefix='/backups/azurebackup/' //Always end with a '/'!! 

//Main flow of the script is near the bottom of the file. 
var containerProcessingQueue = async.queue(
function getBlobsAndSaveThem(containerName) { 
console.log(containerName); //DEBUG 
    blobService.listBlobs(containerName, 
    function(error, blobs) { 
    if(!error){ 
     var blobProcessingQueue = 
     async.queue(function(index,callback) { 
       var blob = blobs[index]; 
       console.log(blob); //DEBUG 
       var fullPath = backupPrefix + containerName + '/' + blob.name; 
       var blobLastModified = new Date(blob.properties['last-modified']); 

       //Only create if the directoy doesn't exist, since mkdirp fails if the directory exists. 
       if(!fs.existsSync(path.dirname(fullPath))){ //And do it sync, because otherwise it'll check 99999 times if the directory exists simultaneously, doesn't find it, then fails to create it 99998 times. 
         mkdirp.sync(path.dirname(fullPath), function(err) { console.log('Failed to create directory ' + path.dirname(fullPath) + " ("+ err + ")"); }); 
         } 


       if(fs.existsSync(fullPath + ".date")){ 
         if(blobLastModified == fs.readFileSync(fullPath + ".date").toString()) { 
           callback(); 
           return; //If the file is unmodified, return. No this won't exit the program, because it's called within a function definition (async.queue(function ...)) 
           } 
         } 

       blobService.getBlobToStream(
        containerName, 
        blob.name, 
        fs.createWriteStream(fullPath), 
        function(error) { 
         if(error){ //Something went wrong, write it to the console but finish the queue item and continue. 
           console.log("Failed writing " + blob.name + " (" + error + ")"); 
           callback(); 
           } 
         else if(!error) { //Write the last modified date and finish the queue item silently 
           fs.writeFile(fullPath + ".date", blobLastModified, function(err) 
           { if(err) console.log("Couldn't write .date file: " + err); }); 
           callback(); 
           } 
          }); 

       },maxconcurrency); 

     for(var blobindex in blobs){ 
       blobProcessingQueue.push(blobindex); 
       } //Push new items to the queue for processing 



     } 
     else { 
     console.log("An error occurred listing the blobs: " + error); 
     } 
}); 
},1); 

blobService.listContainers(function(err, result){ 
     for(var i=0;i<result.length;i++) { 
       containerProcessingQueue.push(result[i].name); 
     } 
});

출처

2013-10-07 AlexanderYpema_Infi

것은, 파일에 저를 추가하고 다음 청크를 참조하십시오. Blob 스토리지 서비스는이를 지원합니다. getBlobToStream (https://github.com/WindowsAzure/azure-sdk-for-node/blob/master/lib/services/blob/blobservice.js)의 소스 코드를 보면, rangeStartHeader과 rangeEndHeader 옵션에서 시작/끝 바이트를 지정할 수 있습니다. 도움이되는지 확인하십시오.

그냥 코드를 해킹했습니다. (내 코드에서 알 수 있듯이 node.js에 대한 지식은 매우 원시적입니다. :)). 시작과 끝의 변수가 변경 이제 호기심이 모든 사람들을 위해

var azure = require('azure'); 
var fs = require('fs'); 

var blobService = azure.createBlobService("account", "accountkey"); 
var containerName = "container name"; 
var blobName = "blob name"; 
var blobSize; 
var chunkSize = 1024 * 512;//chunk size -- we'll read 512 KB at a time. 
var startPos = 0; 
var fullPath = "D:\\node\\"; 
var blobProperties = blobService.getBlobProperties(containerName, blobName, null, function (error, blob) { 
     if (error) { 
      throw error; 
     } 
     else { 
      blobSize = blob.contentLength; 
      fullPath = fullPath + blobName; 
      console.log(fullPath); 
      doDownload(); 
     } 
    } 
); 

function doDownload() { 
    var stream = fs.createWriteStream(fullPath, {flags: 'a'}); 
    var endPos = startPos + chunkSize; 
    if (endPos > blobSize) { 
     endPos = blobSize; 
    } 
    console.log("Downloading " + (endPos - startPos) + " bytes starting from " + startPos + " marker."); 
    blobService.getBlobToStream("test", blobName, stream, 
     { "rangeStartHeader": startPos, "rangeEndHeader": endPos-1 }, function(error) { 
     if (error) { 
      throw error; 
     } 
     else if (!error) { 
      startPos = endPos; 
      if (startPos <= blobSize - 1) { 
       doDownload(); 
      } 
     } 
    }); 
}

출처

2013-10-08 13:29:54

마침내이 작업을 시도했습니다. 그리고 그것은 작동합니다! 메모리 사용량은 30 ~ 40MB 정도로 훨씬 낮아 보인다. 실제로 모든 것을 메모리에 넣으려고 시도한 것 같다. 이 예제 코드를 가져 주셔서 감사합니다! 그것은 꽤 많이 일했습니다 :) –

[나는 그것이 여전히 몇 가지 결함이 생각하는 당신이 청크 분할 다운로드를 할 수있는 방법에 대한 아이디어를 얻을이 코드를 사용하십시오]를. 그들은 이제 단지 rangeStart와 rangeEnd입니다. 자세한 도움말을 보려면 azure 노드 설명서를 참조하십시오. http://dl.windowsazure.com/nodestoragedocs/BlobService.html

출처

2015-06-22 20:26:59

당신은 당신의 대답에 링크에서 관련 정보를 포함해야합니다. 그렇게하면 링크에 문제가 발생해도 답변이 유효합니다. –

노드 js azure SDK getBlobToStream은 많은 양의 메모리를 사용합니다.

답변

관련 문제