diff --git a/engine/CSVParser.cc b/engine/CSVParser.cc index 93026d34e..a33384264 100644 --- a/engine/CSVParser.cc +++ b/engine/CSVParser.cc @@ -307,15 +307,12 @@ namespace { const char* what() const noexcept override {return "No data columns specified\nIf dataset has no data, try selecting counter";} }; + struct DuplicateKey: public std::exception { std::string msg="Duplicate key"; - DuplicateKey(const vector& x) { - for (auto& i: x) - msg+=":"+str(i); - msg+="\nTry selecting a different duplicate key action"; - } - DuplicateKey(const Key& x, const Tokens& tokens) { + Key key; + DuplicateKey(const Key& x, const Tokens& tokens): key(x) { for (auto& i: x) msg+=":"+tokens[i]; msg+="\nTry selecting a different duplicate key action"; @@ -323,6 +320,18 @@ namespace const char* what() const noexcept override {return msg.c_str();} }; + struct InvalidData: public std::exception + { + string data; ///< data received in field + string type; ///< type of data + string colName; ///< column name + string msg; + InvalidData(const string& data, const string& type,const string& colName): + data(data), type(type), colName(colName) + {msg="Invalid data: "+data+" for "+type+" dimensioned column: "+colName;} + const char* what() const noexcept override {return msg.c_str();} + }; + double quotedStoD(const string& s,size_t& charsProcd) { //strip possible quote characters @@ -703,8 +712,17 @@ namespace minsky line[i-1]=spec.escape; } - template - void loadValueFromCSVFileT(VariableValue& vv, istream& input, const DataSpec& spec, uintmax_t fileSize) + /// handle reporting errors in loadValueFromCSVFileT when loading files + struct OnError + { + /// called on error - \a ex message to pass on, \a row - current row + void operator()(const std::exception& ex, size_t row) {throw ex;} + /// update a map of keys to first rows for duplicate key processing + void rowKeyInsert(const Key&, size_t) {} + }; + + template + void loadValueFromCSVFileT(VariableValue& vv, istream& input, const DataSpec& spec, uintmax_t fileSize, E& onError) { const BusyCursor busy(minsky()); const ProgressUpdater pu(minsky().progressState, "Importing CSV",6); @@ -843,9 +861,7 @@ namespace minsky if (spec.dontFail) goto invalidKeyGotoNextLine; else - throw std::runtime_error("Invalid data: "+*field+" for "+ - to_string(spec.dimensions[dim].type)+ - " dimensioned column: "+spec.dimensionNames[dim]); + onError(InvalidData(*field,to_string(spec.dimensions[dim].type),spec.dimensionNames[dim]),row); } dim++; } @@ -885,7 +901,10 @@ namespace minsky { v=stod(s); if (i==tmpData.end()) - tmpData.emplace(key,v); + { + tmpData.emplace(key,v); + onError.rowKeyInsert(key,v); + } } catch (const std::bad_alloc&) {throw;} @@ -898,7 +917,7 @@ namespace minsky switch (spec.duplicateKeyAction) { case DataSpec::throwException: - throw DuplicateKey(key,sliceLabelTokens); + onError(DuplicateKey(key,sliceLabelTokens),row); case DataSpec::sum: i->second+=v; break; @@ -1066,11 +1085,78 @@ namespace minsky void loadValueFromCSVFile(VariableValue& v, istream& input, const DataSpec& spec, uintmax_t fileSize) { + OnError onError; if (spec.separator==' ') - loadValueFromCSVFileT(v,input,spec,fileSize); + loadValueFromCSVFileT(v,input,spec,fileSize,onError); else - loadValueFromCSVFileT(v,input,spec,fileSize); + loadValueFromCSVFileT(v,input,spec,fileSize,onError); } + + template + void reportFromCSVFileT(istream& input, ostream& output, const DataSpec& spec, uintmax_t fileSize ) + { + struct ErrorReporter //: public OnError // using duck typing, not dynamic polymorphism + { + Map firstRow; + map duplicates; + map invalidData; + void operator()(const DuplicateKey& ex, size_t row) { + duplicates.emplace(firstRow[ex.key],ex.key); + duplicates.emplace(row,ex.key); + } + void operator()(const InvalidData& ex, size_t row) {invalidData.emplace(row, ex.msg);} + /// update a map of keys to first rows for duplicate key processing + void rowKeyInsert(const Key& key, size_t row) {firstRow.emplace(key,row);} + } onError; + + VariableValue vv(VariableType::parameter); + + // parse file to extract error locations + loadValueFromCSVFileT

(vv, input, spec, fileSize, onError); + + input.seekg(0); + string buf; + size_t row=0; + + // extract all error lines + multimap duplicateLines; + vector invalidDataLines; + string sep{spec.separator}; + for (; getWholeLine(input, buf, spec); ++row) + { + if (onError.duplicates.contains(row)) + duplicateLines.emplace(onError.duplicates[row],"duplicate key"+sep+buf); + if (onError.invalidData.contains(row)) + invalidDataLines.push_back(onError.invalidData[row]+sep+buf); + } + + // now output report + input.seekg(0); + // process header + for (row=0; row(input,output,spec,fileSize); + else + reportFromCSVFileT(input,output,spec,fileSize); + } + + } CLASSDESC_ACCESS_EXPLICIT_INSTANTIATION(minsky::DataSpec); diff --git a/engine/CSVParser.h b/engine/CSVParser.h index 2097533bf..ef324e269 100644 --- a/engine/CSVParser.h +++ b/engine/CSVParser.h @@ -107,7 +107,7 @@ namespace minsky /// creates a report CSV file from input, with errors sorted at /// begining of file, with a column for error messages - void reportFromCSVFile(std::istream& input, std::ostream& output, const DataSpec& spec); + void reportFromCSVFile(std::istream& input, std::ostream& output, const DataSpec& spec, uintmax_t fileSize); /// load a variableValue from a stream according to data spec /// @param fileSize size of file to read (for progress bar) diff --git a/model/CSVDialog.cc b/model/CSVDialog.cc index 18aa223f8..a3da5aa71 100644 --- a/model/CSVDialog.cc +++ b/model/CSVDialog.cc @@ -52,6 +52,7 @@ using ecolab::cairo::CairoSave; using tcp = boost::asio::ip::tcp; namespace ssl = boost::asio::ssl; namespace http = boost::beast::http; +using boost::filesystem::file_size; const unsigned CSVDialog::numInitialLines; @@ -60,7 +61,7 @@ void CSVDialog::reportFromFile(const std::string& input, const std::string& outp ifstream is(input); stripByteOrderingMarker(is); ofstream of(output); - reportFromCSVFile(is,of,spec); + reportFromCSVFile(is,of,spec,file_size(input)); } namespace