mutable
A Database System for Research and Fast Prototyping
Loading...
Searching...
No Matches
Reader.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <iostream>
9
10
11namespace m {
12
15struct M_EXPORT Reader
16{
17 const Table &table;
20
21 public:
22 Reader(const Table &table, Diagnostic &diag, Scheduler::Transaction *transaction = nullptr) : table(table), diag(diag), transaction(transaction) { }
23 virtual ~Reader() { }
24
25 virtual void operator()(std::istream &in, const char *name = "-") = 0;
26};
27
30{
44 struct M_EXPORT Config
45 {
47 char delimiter = ',';
49 char quote = '"';
51 char escape = '\\';
53 bool has_header = false;
55 bool skip_header = false;
57 std::size_t num_rows = std::numeric_limits<decltype(num_rows)>::max();
58
61 static Config CSV() {
62 Config cfg;
63 cfg.delimiter = ',';
64 cfg.quote = '"';
65 cfg.escape = '"'; // in RFC 4180, the escape character *is* the quote character
66 return cfg;
67 }
68 };
69
70 private:
72
74 char c;
75 std::istream *in = nullptr;
76 std::vector<char> buf;
78 std::size_t col_idx;
79
80 public:
81 DSVReader(const Table &table, Config cfg, Diagnostic &diag, Scheduler::Transaction *transaction = nullptr);
82
83 void operator()(std::istream &in, const char *name) override;
84
85 const Config & config() const { return cfg_; }
86 size_t num_rows() const { return cfg_.num_rows; }
87 size_t delimiter() const { return cfg_.delimiter; }
88 size_t escape() const { return cfg_.escape; }
89 size_t quote() const { return cfg_.quote; }
90 size_t has_header() const { return cfg_.has_header; }
91 size_t skip_header() const { return cfg_.skip_header; }
92
93 private:
94 using ConstTypeVisitor::operator();
95 void operator()(Const<ErrorType> &ty) override;
96 void operator()(Const<NoneType> &ty) override;
97 void operator()(Const<Boolean> &ty) override;
98 void operator()(Const<Bitmap> &ty) override;
99 void operator()(Const<CharacterSequence> &ty) override;
100 void operator()(Const<Date> &ty) override;
101 void operator()(Const<DateTime> &ty) override;
102 void operator()(Const<Numeric> &ty) override;
103 void operator()(Const<FnType> &ty) override;
104
105 int step() {
106 switch (c) {
107 case '\n':
108 pos.column = 1;
109 pos.line++;
110 case EOF:
111 break;
112
113 default:
114 pos.column++;
115 }
116 return c = in->get();
117 };
118
119 void push() { buf.push_back(c); step(); }
120
121 bool accept(char chr) { if (c == chr) { step(); return true; } return false; }
122
124 if (c == config().quote) {
125 step();
126 while (c != EOF and c != '\n' and c != config().quote) { step(); }
127 accept(config().quote);
128 } else
129 while (c != EOF and c != '\n' and c != config().delimiter) { step(); }
130 }
131 void discard_row() { while (c != EOF and c != '\n') { step(); } }
132
133 int64_t read_unsigned_int();
134};
135
136}
and(sizeof(T)==4) U64x1 reinterpret_to_U64(m
Definition: WasmAlgo.cpp:266
‍mutable namespace
Definition: Backend.hpp:10
std::string escape(char c)
Definition: fn.hpp:292
std::string quote(const std::string &str)
Definition: fn.hpp:306
Configuration parameters for importing a DSV file.
Definition: Reader.hpp:45
static Config CSV()
Creates a Config for CSV files, with delimiter, escape, and quote set accordingly to RFC 4180 (see ht...
Definition: Reader.hpp:61
char delimiter
‍the delimiter separating cells
Definition: Reader.hpp:47
char quote
‍the quotation mark for strings
Definition: Reader.hpp:49
bool skip_header
‍whether to ignore the headline (requires has_header = true)
Definition: Reader.hpp:55
char escape
‍the character to escape special characters within strings, e.g. \n
Definition: Reader.hpp:51
bool has_header
‍whether the first line of the file is a headline describing the columns
Definition: Reader.hpp:53
std::size_t num_rows
‍the maximum number of rows to read from the file (may exceed actual number of rows)
Definition: Reader.hpp:57
A reader for delimiter separated value (DSV) files.
Definition: Reader.hpp:30
std::size_t col_idx
Definition: Reader.hpp:78
size_t skip_header() const
Definition: Reader.hpp:91
void discard_cell()
Definition: Reader.hpp:123
size_t delimiter() const
Definition: Reader.hpp:87
Config cfg_
Definition: Reader.hpp:71
bool accept(char chr)
Definition: Reader.hpp:121
size_t num_rows() const
Definition: Reader.hpp:86
Tuple tup
intermediate tuple to store values of a row
Definition: Reader.hpp:77
size_t escape() const
Definition: Reader.hpp:88
size_t quote() const
Definition: Reader.hpp:89
const Config & config() const
Definition: Reader.hpp:85
int step()
Definition: Reader.hpp:105
size_t has_header() const
Definition: Reader.hpp:90
void discard_row()
Definition: Reader.hpp:131
void push()
Definition: Reader.hpp:119
Position pos
Definition: Reader.hpp:73
std::vector< char > buf
Definition: Reader.hpp:76
unsigned line
Definition: Position.hpp:14
unsigned column
Definition: Position.hpp:15
An interface for all readers.
Definition: Reader.hpp:16
const Table & table
the table to insert the data into
Definition: Reader.hpp:17
Scheduler::Transaction * transaction
Definition: Reader.hpp:19
Diagnostic & diag
Definition: Reader.hpp:18
Reader(const Table &table, Diagnostic &diag, Scheduler::Transaction *transaction=nullptr)
Definition: Reader.hpp:22
virtual ~Reader()
Definition: Reader.hpp:23
virtual void operator()(std::istream &in, const char *name="-")=0
A table is a sorted set of attributes.
Definition: Schema.hpp:388