이 문제는 비교적 쉽게 해결할 수 있지만 문제를 찾기 위해 애 쓰고 있습니다. 내 코드는 파일에서 모든 단어를 읽은 다음 각 단어, 단어 위치, 문장의 시작과 끝을 배열에 저장합니다. 배열은 다른 텍스트 파일로 출력됩니다.파일에서 문제가 발생했습니다. 조기에 EOF에 도달하는 것처럼 보입니다
마지막 문장까지 모든 정보를 읽을 수 있으며 버그가 있습니다. 이견있는 사람?
/**
* Programmer: fryeguy
* Course:
* Program: TxtCrawl for MicroSearch
*
* Algorithm:
* TxtCrawl is the component of MicroSearch that reads text
* documents for search terms and stores them for
* indexing
*
* 1. Count words in doc, then initialize
* wordsFromDoc array to wordCount
* 2. Initiate output file for writing.
* 3. Open input file for reading words.
* 4. Until reaching EOF:
* 4.a. Set value for start "get pointer" in startSentence (.tellg()).
* 4.b. Store value for end "get pointer" in endSentence (.tellg()).
* 4.c. Reset "get pointer" to startSentence location.
* 4.d. Until reaching endSentence, Read into the
* array theWord, wordPos, startSent, and endSent
* 5. Write wordsFromDoc array to file
* 6. When EOF is reached close the files.
*/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
using namespace std;
struct wordProps // stores word info to be placed in array
{
string theWord; // stores the word
int wordPos; // stores the position of word
int startSent; // stores the start point of the sentence
int endSent; // stores the end point of the sentence
};
void countWords(string, int&, int&);
int main()
{
ifstream iFile; // file stream for reading in data
ofstream oFile; // file stream for writing data
string iFileName = "TextFile2.txt"; // name of test file to read from
string oFileName = "OutputFile.txt"; // name of test file to write to
string aLine = ""; // stores a line preceeding a newline character (\n)
string aWord = ""; // stores words from doc for indexing
int charCount = 0; // count of characters in doc
int wordCount = 0; // count of words in doc
int aLineWordCount = 0; // count of words in a single line being processed
int wordBegin = 0; // stores location of word in doc
int startSentence = 0; // stores pointer value for start of sentence
int endSentence = 0; // stores pointer value for end of sentence
/**
* 1. Count words in doc, then initialize
* wordsFromDoc array to wordCount
*/
countWords(iFileName, charCount, wordCount);
cout << "charCount: " << charCount << endl; // DEBUG CODE
cout << "wordCount: " << wordCount << endl; // DEBUG CODE
wordProps wordsFromDoc[wordCount];
cout<< "length of array: " << (sizeof(wordsFromDoc)/sizeof(*wordsFromDoc)) << endl; // DEBUG CODE
/**
* 2. Initiate output file for writing
*/
oFile.open (oFileName.c_str()); // setup output file and write header
oFile << setw(20) << left << "File Name: " << iFileName << endl;
oFile << setw(20) << "---------------------------------------" << endl << endl;
/**
* 3. Open input file for reading words
*/
iFile.open (iFileName.c_str());
if (!iFile.is_open())
cout << "No such file exists!" << endl;
else
{
/**
* 4. Until reaching EOF:
*/
// I have been attempting different counting methods assuming the eof was being reached prematurely
// The results really have not varied with this code
// while (iFile.tellg() != charCount)
while (!iFile.eof())
{
//cout << "count: " << count << endl;
/**
* 4.a. Set value for start "get pointer" in startSentence (.tellg()).
*/
startSentence = iFile.tellg();
cout << "startSentence: " << startSentence << endl; // DEBUG CODE
/**
* 4.b. Store value for end "get pointer" in endSentence (.tellg()).
*/
getline(iFile, aLine, '.');
cout << aLine << endl; // DEBUG CODE
endSentence = iFile.tellg();
aLine.clear();
cout << "endSentence: " << endSentence << endl; // DEBUG CODE
if (!iFile.is_open())
{
cout << "The if, iFile.tellg(): " << iFile.tellg() << endl; // DEBUG CODE
iFile.close();
iFile.open (iFileName.c_str());
}
/**
* 4.c. Reset "get pointer" to startSentence location.
*/
iFile.seekg(startSentence);
cout << "iFile.tellg(): " << iFile.tellg() << endl; // DEBUG CODE
/**
* 4.d. Until reaching endSentence, Read into the
* array theWord, wordPos, startSent, and endSent
*/
// As the last line is about to be read there is an error of some sort.
// My guess is that somehow I exceed the end of the file but my startSentence
// and endSentence variables are pointing where I think they should.
for (; iFile.tellg() < endSentence; aLineWordCount++)
{
wordsFromDoc[aLineWordCount].wordPos = iFile.tellg();
cout << "wordPos: " << wordsFromDoc[aLineWordCount].wordPos << endl; // DEBUG CODE
iFile >> wordsFromDoc[aLineWordCount].theWord;
cout << "theWord: " << wordsFromDoc[aLineWordCount].theWord << endl; // DEBUG CODE
wordsFromDoc[aLineWordCount].startSent = startSentence;
cout << "startSent: " << wordsFromDoc[aLineWordCount].startSent << endl; // DEBUG CODE
wordsFromDoc[aLineWordCount].endSent = endSentence;
cout << "endSent: " << wordsFromDoc[aLineWordCount].endSent << endl << endl; // DEBUG CODE
cout << "aLineWordCount: " << aLineWordCount << endl;
} // end for
} // end while !=iFile.eof
// THIS section of code is never reached because of the hang up above.
/**
* 5. Write wordsFromDoc array to file
*/
for (int count = 0; count < aLineWordCount; count++)
{
oFile << setw(20) << left
<< wordsFromDoc[count].theWord << " "
<< wordsFromDoc[count].wordPos << " "
<< wordsFromDoc[count].startSent << " "
<< wordsFromDoc[count].endSent << endl;
}
} // end else
/**
* 6. When EOF is reached close the files.
*/
iFile.close();
oFile.close();
// DEBUG CDODE for verifying results
// for (int count = 0; count < wordCount; count++) {
// cout << "theWord: " << wordsFromDoc[count].theWord << endl;
// cout << "wordPos: " << wordsFromDoc[count].wordPos << endl;
// cout << "startSent: " << wordsFromDoc[count].startSent << endl;
// cout << "endSent: " << wordsFromDoc[count].endSent << endl << endl;
// }
}
/**
* Implement countWords function
*/
void countWords(string theFileName, int &charCount, int &wordCount)
{
string theWord = "";
char theChar = ' ';
fstream inFile;
//count the chars
inFile.open (theFileName.c_str());
if (!inFile.is_open())
cout << "No such file exists!" << endl;
else
{
inFile.get(theChar);
while (!inFile.eof())
{
charCount++;
inFile.get(theChar);
}
}
inFile.close();
// count the words
inFile.open (theFileName.c_str());
if (!inFile.is_open())
cout << "No such file exists!" << endl;
else
{
while (!inFile.eof())
{
inFile >> theWord;
wordCount++;
}
}
inFile.close();
}
은 실패 할 것 같다하는 입력을 볼 필요가; 프로그램이 임의의 입력에 대해 괜찮아 보인다. – frayser
응답 해 주셔서 감사합니다. 일부 다른 파일을 테스트 한 결과이 텍스트는 원하는 (원하지 않는) 오류를 제공합니다. "검색 엔진 크롤러로 읽을 텍스트의 샘플입니다. 몇 분의 휴식을 제공하는 마침표를 포함하여 여러 문장을 입력합니다. 이 줄 두 개의 개행 문자 뒤에옵니다. 마지막으로 한 개의 텍스트가 있어야합니다! " 코드가 마지막 문장에 도달하면 wordSos (iFile.tellg())뿐만 아니라 startSent 및 endSent 값을 읽은 다음 iFile이 릴리스 된 것처럼 보입니다. – fryeguy
마침표 대신 느낌표 (!)로 끝나는 문장으로 인해 실패한 것을 말하고 있습니까?코드는 마침표로 끝나는 문장 만 처리하도록 엄격하게 작성되었습니다. – frayser