summaryrefslogtreecommitdiff
path: root/zen/read_txt.cpp
blob: 7566ff1466a3bd94dfac86814d16259d92a03d28 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include "read_txt.h"

using namespace zen;


namespace
{
warn_static("superfluous method")
std::string detectLineBreak(const Zstring& filename) //throw FileError
{
    //read a (hopefully) significant portion of data
    zen::FileInput input(filename);

    std::vector<char> buffer(64 * 1024);
    size_t bytesRead = input.read(&buffer[0], buffer.size()); //throw FileError
    buffer.resize(bytesRead);

    //detect line break
    std::string linebreakChars = "\r\n";
    std::vector<char>::iterator iter = std::find_first_of(buffer.begin(), buffer.end(),
                                                          linebreakChars.begin(), linebreakChars.end());
    if (iter != buffer.end())
    {
        if (*iter == '\r')
        {
            ++iter;
            if (iter != buffer.end())
            {

                if (*iter == '\n')
                    return "\r\n"; //Windows
                else
                    return "\r"; //Mac
            }
        }
        else if (*iter == '\n')
            return "\n"; //Linux
    }
    //fallback
    return "\n";
}
}


ExtractLines::ExtractLines(const Zstring& filename, const std::string& lineBreak) :  //throw FileError
    inputStream(filename), bufferLogBegin(buffer.begin()), lineBreak_(lineBreak)
{
    if (lineBreak.empty())
        lineBreak_ = detectLineBreak(filename); //throw FileError
}


bool ExtractLines::getLine(std::string& output) //throw FileError
{
    warn_static("don't use lineBreak, but support any of r, n, rn!!!")
    for (;;)
    {
        //check if full line is in buffer
        std::vector<char>::iterator iter = std::search(bufferLogBegin, buffer.end(), lineBreak_.begin(), lineBreak_.end());
        if (iter != buffer.end())
        {
            output.assign(bufferLogBegin, iter);
            bufferLogBegin = iter + lineBreak_.size();
            return true;
        }

        buffer.erase(buffer.begin(), bufferLogBegin);
        bufferLogBegin = buffer.begin();

        //if done: cleanup
        if (inputStream.eof())
        {
            if (buffer.empty())
                return false;

            output.assign(buffer.begin(), buffer.end());
            buffer.clear();
            return true;
        }

        //read next block
        const size_t BLOCK_SIZE = 512 * 1024;
        buffer.resize(buffer.size() + BLOCK_SIZE);

        size_t bytesRead = inputStream.read(&buffer[0] + buffer.size() - BLOCK_SIZE, BLOCK_SIZE); //throw FileError
        assert(bytesRead <= BLOCK_SIZE); //promised by FileInput()

        if (bytesRead < BLOCK_SIZE)
            buffer.resize(buffer.size() - (BLOCK_SIZE - bytesRead));

        bufferLogBegin = buffer.begin();
    }
}
bgstack15