diff --git a/README.md b/README.md
index 5bac55cd..ce71793c 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,7 @@
+The website can be accessed using this link.
This program uses the fast flow-based method developed by Michael T. Gastner, Vivien Seguy, and Pratyush More. For more information, you may refer to the following [paper](https://www.pnas.org/content/115/10/E2156):
diff --git a/include/string_to_decimal_converter.hpp b/include/string_to_decimal_converter.hpp
index 8d4a741f..fc6ff2b1 100644
--- a/include/string_to_decimal_converter.hpp
+++ b/include/string_to_decimal_converter.hpp
@@ -4,6 +4,20 @@
#include
#include
+/**
+ * @brief A utility class for converting string representations of numbers to decimal format.
+ *
+ * This class handles various number formats including:
+ * - Regular decimal numbers (e.g. "123.456", "123,456")
+ * - Scientific notation (e.g. "1.23e-4", "1.23E4")
+ * - Special value "NA"
+ *
+ * For scientific notation:
+ * - Both 'e' and 'E' are supported as exponent markers
+ * - The mantissa can use either '.' or ',' as decimal separator
+ * - The exponent must be an integer and can be negative
+ * - The mantissa must be a valid decimal number
+ */
class StringToDecimalConverter
{
private:
diff --git a/requirements.txt b/requirements.txt
index 8ef9808b..b2b3a188 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
pip
wheel
conan==2.16.1
-cmake==3.30.0
\ No newline at end of file
+cmake==3.30.0
diff --git a/src/misc/string_to_decimal_converter.cpp b/src/misc/string_to_decimal_converter.cpp
index 29e6b322..2967af43 100644
--- a/src/misc/string_to_decimal_converter.cpp
+++ b/src/misc/string_to_decimal_converter.cpp
@@ -7,7 +7,7 @@ const std::string StringToDecimalConverter::NA_ = "NA";
bool StringToDecimalConverter::is_valid_char(char ch)
{
- return (std::isdigit(ch)) || ch == point_ || ch == comma_ || ch == minus_;
+ return (std::isdigit(ch)) || ch == point_ || ch == comma_ || ch == minus_ || ch == 'e' || ch == 'E';
}
std::string StringToDecimalConverter::remove_char(std::string str, char ch)
@@ -87,22 +87,66 @@ bool StringToDecimalConverter::is_str_valid_characters(const std::string &str)
return true;
}
- // Only 0 to 9, '.', '-', and ',' are allowed
+ // Only 0 to 9, '.', '-', ',', 'e', and 'E' are allowed
for (const auto &c : str) {
if (!is_valid_char(c)) {
return false;
}
}
- // '-' can only be used once
- if (count_char(str, minus_) > 1) {
- return false;
+ // '-' can only be used once in the mantissa and once in the exponent
+ size_t first_e = str.find_first_of("eE");
+ if (first_e != std::string::npos) {
+ // Check mantissa part
+ std::string mantissa = str.substr(0, first_e);
+ if (count_char(mantissa, minus_) > 1) {
+ return false;
+ }
+ if (count_char(mantissa, minus_) == 1 && mantissa[0] != minus_) {
+ return false;
+ }
+ // Check exponent part
+ std::string exponent = str.substr(first_e + 1);
+ if (count_char(exponent, minus_) > 1) {
+ return false;
+ }
+ if (count_char(exponent, minus_) == 1 && exponent[0] != minus_) {
+ return false;
+ }
+ } else {
+ // No scientific notation, check as before
+ if (count_char(str, minus_) > 1) {
+ return false;
+ }
+ if (count_char(str, minus_) == 1 && str[0] != minus_) {
+ return false;
+ }
}
- // '-' can only be used at the beginning
- if (count_char(str, minus_) == 1 and str[0] != minus_) {
+ // Check for valid scientific notation format
+ size_t e_count = count_char(str, 'e') + count_char(str, 'E');
+ if (e_count > 1) {
return false;
}
+ if (e_count == 1) {
+ size_t e_pos = str.find_first_of("eE");
+ // Must have digits before and after 'e'/'E'
+ if (e_pos == 0 || e_pos == str.length() - 1) {
+ return false;
+ }
+ // Must have at least one digit after 'e'/'E'
+ bool has_digit_after = false;
+ for (size_t i = e_pos + 1; i < str.length(); i++) {
+ if (std::isdigit(str[i])) {
+ has_digit_after = true;
+ break;
+ }
+ }
+ if (!has_digit_after) {
+ return false;
+ }
+ }
+
return true;
}
@@ -111,19 +155,37 @@ bool StringToDecimalConverter::is_str_correct_format(const std::string &str)
assert(is_str_valid_characters(str));
assert(is_str_NA(str) == false);
- // if the number of commas and points both are more than 1, then this format
- // does not belong to any known convention
+ // Handle scientific notation separately
+ size_t e_pos = str.find_first_of("eE");
+ if (e_pos != std::string::npos) {
+ // Check mantissa part
+ std::string mantissa = str.substr(0, e_pos);
+ if (has_multiple_commas_and_points(mantissa)) {
+ return false;
+ }
+ if (has_invalid_comma_point_sequence(mantissa)) {
+ return false;
+ }
+ if (has_separator_at_the_end(mantissa)) {
+ return false;
+ }
+ // Exponent part should only contain digits and optional minus sign
+ std::string exponent = str.substr(e_pos + 1);
+ for (char c : exponent) {
+ if (!std::isdigit(c) && c != minus_) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // Original format validation for non-scientific notation
if (has_multiple_commas_and_points(str)) {
return false;
}
-
- // Check for commas before and after a point, or points before and after a
- // comma
if (has_invalid_comma_point_sequence(str)) {
return false;
}
-
- // Check for separators at the end of the string
if (has_separator_at_the_end(str)) {
return false;
}
diff --git a/tests/unit/test_string_to_decimal_converter.cpp b/tests/unit/test_string_to_decimal_converter.cpp
index 0e33be60..6c4bec0d 100644
--- a/tests/unit/test_string_to_decimal_converter.cpp
+++ b/tests/unit/test_string_to_decimal_converter.cpp
@@ -78,4 +78,87 @@ BOOST_AUTO_TEST_CASE(TestCorrectFormat_PointAtEnd)
BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format("123456789."));
}
+BOOST_AUTO_TEST_CASE(TestValidCharacters_ScientificNotation)
+{
+ // Basic scientific notation
+ BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1.23e4"));
+ BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1.23E4"));
+
+ // Negative exponents
+ BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1.23e-4"));
+ BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1.23E-4"));
+
+ // With commas as thousand separators
+ BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1,234.56e4"));
+ BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1.234,56E4"));
+
+ // Negative numbers
+ BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("-1.23e4"));
+ BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("-1.23E-4"));
+
+ // Invalid scientific notation
+ BOOST_CHECK(!StringToDecimalConverter::is_str_valid_characters(
+ "1.23e")); // Missing exponent
+ BOOST_CHECK(!StringToDecimalConverter::is_str_valid_characters(
+ "e4")); // Missing mantissa
+ BOOST_CHECK(!StringToDecimalConverter::is_str_valid_characters(
+ "1.23ee4")); // Multiple e's
+ BOOST_CHECK(!StringToDecimalConverter::is_str_valid_characters(
+ "1.23e4.5")); // Non-integer exponent
+ BOOST_CHECK(!StringToDecimalConverter::is_str_valid_characters(
+ "1.23e-")); // Incomplete negative exponent
+}
+
+BOOST_AUTO_TEST_CASE(TestCorrectFormat_ScientificNotation)
+{
+ // Valid formats
+ BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("1.23e4"));
+ BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("1.23E4"));
+ BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("1,234.56e4"));
+ BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("1.234,56E4"));
+ BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("-1.23e4"));
+ BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("-1.23E-4"));
+
+ // Invalid formats
+ BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
+ "1.23e4.5")); // Non-integer exponent
+ BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
+ "1.23e4,")); // Comma at end
+ BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
+ "1.23e4.")); // Point at end
+ BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
+ "1.23,456.789e4")); // Multiple separators
+ BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
+ "1.23e4e5")); // Multiple e's
+ BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
+ "1.23e")); // Missing exponent
+ BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
+ "e4")); // Missing mantissa
+}
+
+BOOST_AUTO_TEST_CASE(TestParseStr_ScientificNotation)
+{
+ // Test parsing with point as decimal separator
+ BOOST_CHECK_EQUAL(
+ StringToDecimalConverter::parse_str("1.23e4", true),
+ "1.23e4");
+ BOOST_CHECK_EQUAL(
+ StringToDecimalConverter::parse_str("1,234.56e4", true),
+ "1234.56e4");
+ BOOST_CHECK_EQUAL(
+ StringToDecimalConverter::parse_str("-1.23e-4", true),
+ "-1.23e-4");
+
+ // Test parsing with comma as decimal separator
+ BOOST_CHECK_EQUAL(
+ StringToDecimalConverter::parse_str("1,23e4", false),
+ "1.23e4");
+ BOOST_CHECK_EQUAL(
+ StringToDecimalConverter::parse_str("1.234,56E4", false),
+ "1234.56E4");
+ BOOST_CHECK_EQUAL(
+ StringToDecimalConverter::parse_str("-1,23e-4", false),
+ "-1.23e-4");
+}
+
BOOST_AUTO_TEST_SUITE_END()