lmi-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[lmi-commits] [lmi] master f14ad11 01/22: Implement MD5 checksum files r


From: Greg Chicares
Subject: [lmi-commits] [lmi] master f14ad11 01/22: Implement MD5 checksum files reading in lmi code
Date: Sat, 28 Mar 2020 18:23:35 -0400 (EDT)

branch: master
commit f14ad11bb5d24bf64dd45be4c03c848e401446bd
Author: Ilya Sinitsyn <address@hidden>
Commit: Gregory W. Chicares <address@hidden>

    Implement MD5 checksum files reading in lmi code
    
    Implement reading MD5 checksum files and computing MD5 checksums in lmi
    code and use the new function for doing it in Authenticity::Assay
    instead of calling the external md5sum program.
---
 Makefile.am           |   4 +
 authenticity.cpp      |  59 ++++---------
 authenticity.hpp      |  11 +--
 authenticity_test.cpp |   1 +
 generate_passkey.cpp  |   3 +-
 md5sum.cpp            | 225 ++++++++++++++++++++++++++++++++++++++++++++++++++
 md5sum.hpp            | 115 ++++++++++++++++++++++++++
 objects.make          |   3 +
 8 files changed, 369 insertions(+), 52 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index d2bd3ab..b69df63 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -417,6 +417,7 @@ liblmi_la_SOURCES = \
     irc7702_tables.cpp \
     lmi.cpp \
     md5.cpp \
+    md5sum.cpp \
     mec_input.cpp \
     mec_server.cpp \
     mec_state.cpp \
@@ -461,6 +462,7 @@ generate_passkey_SOURCES = \
     generate_passkey.cpp \
     global_settings.cpp \
     md5.cpp \
+    md5sum.cpp \
     miscellany.cpp \
     null_stream.cpp \
     path_utility.cpp \
@@ -596,6 +598,7 @@ test_authenticity_SOURCES = \
   calendar_date.cpp \
   global_settings.cpp \
   md5.cpp \
+  md5sum.cpp \
   miscellany.cpp \
   null_stream.cpp \
   path_utility.cpp \
@@ -1342,6 +1345,7 @@ noinst_HEADERS = \
     mc_enum_types.hpp \
     mc_enum_types.xpp \
     mc_enum_types_aux.hpp \
+    md5sum.hpp \
     md5.hpp \
     mec_document.hpp \
     mec_input.hpp \
diff --git a/authenticity.cpp b/authenticity.cpp
index 4ac640a..6bf24e0 100644
--- a/authenticity.cpp
+++ b/authenticity.cpp
@@ -29,6 +29,7 @@
 #include "global_settings.hpp"
 #include "handle_exceptions.hpp"
 #include "md5.hpp"
+#include "md5sum.hpp"
 #include "path_utility.hpp"             // fs::path inserter
 #include "platform_dependent.hpp"       // chdir()
 #include "system_command.hpp"
@@ -46,11 +47,6 @@
 // have been validated with 'md5sum'. This problem will grow worse
 // when the binary database files are replaced with xml.
 
-namespace
-{
-    int const chars_per_formatted_hex_byte = CHAR_BIT / 4;
-} // Unnamed namespace.
-
 Authenticity& Authenticity::Instance()
 {
     try
@@ -189,19 +185,25 @@ std::string Authenticity::Assay
         }
 
     // Validate all data files.
-    fs::path original_path(fs::current_path());
-    if(0 != chdir(data_path.string().c_str()))
-        {
-        oss
-            << "Unable to change directory to '"
-            << data_path
-            << "'. Try reinstalling."
-            ;
-        return oss.str();
-        }
     try
         {
-        system_command("md5sum --check --status " + 
std::string(md5sum_file()));
+        auto const sums = md5_read_checksum_file(data_path / md5sum_file());
+        for(auto const& s : sums)
+            {
+            auto const file_path = data_path / s.filename;
+            auto const md5 = md5_calculate_file_checksum
+                (data_path / s.filename
+                ,s.file_mode
+                );
+            if(md5 != s.md5sum)
+                {
+                    throw std::runtime_error
+                        ( "Integrity check failed for '"
+                        + s.filename.string()
+                        + "'"
+                        );
+                }
+            }
         }
     catch(...)
         {
@@ -212,15 +214,6 @@ std::string Authenticity::Assay
             ;
         return oss.str();
         }
-    if(0 != chdir(original_path.string().c_str()))
-        {
-        oss
-            << "Unable to restore directory to '"
-            << original_path
-            << "'. Try reinstalling."
-            ;
-        return oss.str();
-        }
 
     // The passkey must match the md5 sum of the md5 sum of the file
     // of md5 sums of secured files.
@@ -277,19 +270,3 @@ void authenticate_system()
         std::exit(EXIT_FAILURE);
         }
 }
-
-std::string md5_hex_string(std::vector<unsigned char> const& vuc)
-{
-    LMI_ASSERT(md5len == vuc.size());
-    std::stringstream oss;
-    oss << std::hex;
-    for(auto const& j : vuc)
-        {
-        oss
-            << std::setw(chars_per_formatted_hex_byte)
-            << std::setfill('0')
-            << static_cast<int>(j)
-            ;
-        }
-    return oss.str();
-}
diff --git a/authenticity.hpp b/authenticity.hpp
index a615786..928aef7 100644
--- a/authenticity.hpp
+++ b/authenticity.hpp
@@ -25,19 +25,14 @@
 #include "config.hpp"
 
 #include "calendar_date.hpp"
+#include "md5sum.hpp"
 #include "so_attributes.hpp"
 
 #include <boost/filesystem/path.hpp>
 
-#include <climits>                      // CHAR_BIT
 #include <string>
 #include <vector>
 
-// The gnu libc md5 implementation seems to assume this:
-static_assert(8 == CHAR_BIT || 16 == CHAR_BIT);
-// so md5 output is 128 bits == 16 8-bit bytes or 8 16-bit bytes:
-enum {md5len = 128 / CHAR_BIT};
-
 /// Permit running the system iff data files and date are valid.
 ///
 /// Implemented as a simple Meyers singleton, with the expected
@@ -75,10 +70,6 @@ class Authenticity final
 
 void LMI_SO authenticate_system();
 
-/// Hex representation of an md5 sum as a string.
-
-std::string md5_hex_string(std::vector<unsigned char> const&);
-
 /// Name of file containing md5sums of secured files.
 
 inline char const* md5sum_file() {return "validated.md5";}
diff --git a/authenticity_test.cpp b/authenticity_test.cpp
index 8c1578e..4e51f33 100644
--- a/authenticity_test.cpp
+++ b/authenticity_test.cpp
@@ -26,6 +26,7 @@
 #include "assert_lmi.hpp"
 #include "contains.hpp"
 #include "md5.hpp"
+#include "md5sum.hpp"
 #include "miscellany.hpp"
 #include "system_command.hpp"
 #include "test_tools.hpp"
diff --git a/generate_passkey.cpp b/generate_passkey.cpp
index 3170187..f454034 100644
--- a/generate_passkey.cpp
+++ b/generate_passkey.cpp
@@ -19,9 +19,10 @@
 // email: <address@hidden>
 // snail: Chicares, 186 Belle Woods Drive, Glastonbury CT 06033, USA
 
-#include "authenticity.hpp"             // md5_hex_string()
+#include "authenticity.hpp"       // md5sum_file()
 #include "main_common.hpp"
 #include "md5.hpp"
+#include "md5sum.hpp"             // md5_hex_string()
 
 #include <cstdio>
 #include <cstdlib>
diff --git a/md5sum.cpp b/md5sum.cpp
new file mode 100644
index 0000000..743dc3a
--- /dev/null
+++ b/md5sum.cpp
@@ -0,0 +1,225 @@
+// Compute checksums of files or strings.
+//
+// Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 
2013, 2014, 2015, 2016, 2017, 2018, 2019 Gregory W. Chicares.
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// http://savannah.nongnu.org/projects/lmi
+// email: <address@hidden>
+// snail: Chicares, 186 Belle Woods Drive, Glastonbury CT 06033, USA
+
+#include "pchfile.hpp"
+
+#include "assert_lmi.hpp"
+#include "md5.hpp"
+#include "md5sum.hpp"
+#include "miscellany.hpp"
+
+#include <fstream>
+#include <memory>
+
+std::vector<md5sum_for_file> md5_read_checksum_stream
+    (std::istream& is
+    ,std::string const& stream_description
+    )
+{
+    std::vector<md5sum_for_file> result_vec;
+
+    int line_number = 0;
+    auto error_with_message = [&line_number, stream_description] (char const* 
message)
+        {
+        std::ostringstream oss;
+        oss << "'" << stream_description << "': " << message << " at line " << 
line_number;
+        return oss.str();
+        };
+
+    std::string line;
+    while(std::getline(is, line))
+        {
+        ++line_number;
+
+        // The minimal length: 32(md5sum) + 2(spaces) + 1(shortest file name)
+        size_t const minimal_line_length = 35;
+
+        // Ignore comment lines, which begin with a '#' character.
+        // Empty lines are not allowed.
+        if(!line.empty() && line.front() == '#')
+            {
+            continue;
+            }
+
+        if(line.size() < minimal_line_length)
+            {
+            throw std::runtime_error(error_with_message("line too short"));
+            }
+
+        auto const space_pos = line.find(' ');
+        if(line.size() - 1 <= space_pos)
+            {
+            throw std::runtime_error(error_with_message("incorrect checksum 
line format"));
+            }
+
+        char const second_delimiter = line[space_pos + 1];
+        md5_file_mode file_mode;
+        if(second_delimiter == ' ')
+            {
+            file_mode = md5_file_mode::text;
+            }
+        else if(second_delimiter == '*')
+            {
+            file_mode = md5_file_mode::binary;
+            }
+        else
+            {
+            throw std::runtime_error(error_with_message("incorrect checksum 
line format"));
+            }
+
+        std::string md5sum = line.substr(0, space_pos);
+        std::string file = line.substr(space_pos + 2);
+
+        if(md5sum.size() != chars_per_formatted_hex_byte * md5len)
+            {
+            throw std::runtime_error(error_with_message("incorrect MD5 sum 
format"));
+            }
+
+        result_vec.emplace_back(std::move(file), std::move(md5sum), file_mode);
+        }
+
+    return result_vec;
+}
+
+std::vector<md5sum_for_file> md5_read_checksum_file(fs::path const& filename)
+{
+    auto const filename_string = filename.string();
+
+    std::ifstream is(filename_string);
+    if(!is)
+        {
+        std::ostringstream oss;
+        oss << "'" << filename_string << "': no such file or directory";
+        throw std::runtime_error(oss.str());
+        }
+
+    return md5_read_checksum_stream(is, filename_string);
+}
+
+std::string md5_calculate_stream_checksum
+    (std::istream& is
+    ,std::string const& stream_description
+    )
+{
+    std::vector<unsigned char> md5(md5len);
+
+    // Note that block_size must be a multiple of 64 to use md5_process_block()
+    // below.
+    constexpr std::streamsize block_size = 4096;
+    md5_ctx ctx;
+    char buffer[block_size];
+    std::streamsize read_count;
+
+    // Initialize the computation context.
+    md5_init_ctx(&ctx);
+
+    // Iterate over full file contents.
+    for(;;)
+        {
+        // We read the file in blocks of block_size bytes. One call of the
+        // computation function processes the whole buffer so that with the
+        // next round of the loop another block can be read.
+        is.read(buffer, block_size);
+        read_count = is.gcount();
+
+        // If end of file is reached, end the loop.
+        if (is.eof())
+            {
+            break;
+            }
+
+        if(read_count != block_size || !is)
+            {
+            std::ostringstream oss;
+            oss
+                << "'"
+                << stream_description
+                << "': failed to read data while computing md5sum"
+                ;
+            throw std::runtime_error(oss.str());
+            }
+
+        // Process buffer with block_size bytes. Note that
+        // block_size % 64 == 0
+        md5_process_block(buffer, block_size, &ctx);
+        }
+
+    // Add the last bytes if necessary.
+    if(read_count > 0)
+        {
+        // Note that we have to use md5_process_bytes() and not the faster
+        // md5_process_block() here because the read_count is not necessarily
+        // a multiple of 64 here.
+        md5_process_bytes(buffer, read_count, &ctx);
+        }
+
+    // Construct result in desired memory.
+    md5_finish_ctx(&ctx, md5.data());
+
+    return md5_hex_string(md5);
+}
+
+std::string LMI_SO md5_calculate_file_checksum
+    (fs::path const& filename
+    ,md5_file_mode file_mode
+    )
+{
+    auto const filename_string = filename.string();
+
+    std::vector<unsigned char> md5(md5len);
+
+    std::ios_base::openmode open_mode{std::ios_base::in};
+    switch (file_mode)
+        {
+        case md5_file_mode::binary:
+            open_mode |= std::ios_base::binary;
+            break;
+        case md5_file_mode::text:
+            // Nothing to do.
+            break;
+        }
+
+    std::ifstream is(filename_string, open_mode);
+    if(!is)
+        {
+        std::ostringstream oss;
+        oss << "'" << filename_string << "': no such file or directory";
+        throw std::runtime_error(oss.str());
+        }
+
+    return md5_calculate_stream_checksum(is, filename_string);
+}
+
+std::string md5_hex_string(std::vector<unsigned char> const& vuc)
+{
+    LMI_ASSERT(md5len == vuc.size());
+    std::stringstream oss;
+    oss << std::hex;
+    for(auto const& j : vuc)
+        {
+        oss
+            << std::setw(chars_per_formatted_hex_byte)
+            << std::setfill('0')
+            << static_cast<int>(j)
+            ;
+        }
+    return oss.str();
+}
diff --git a/md5sum.hpp b/md5sum.hpp
new file mode 100644
index 0000000..a4c24d9
--- /dev/null
+++ b/md5sum.hpp
@@ -0,0 +1,115 @@
+// Compute checksums of files or strings.
+//
+// Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 
2013, 2014, 2015, 2016, 2017, 2018, 2019 Gregory W. Chicares.
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// http://savannah.nongnu.org/projects/lmi
+// email: <address@hidden>
+// snail: Chicares, 186 Belle Woods Drive, Glastonbury CT 06033, USA
+
+#ifndef md5sum_hpp
+#define md5sum_hpp
+
+#include "config.hpp"
+#include "so_attributes.hpp"
+
+#include <boost/filesystem/path.hpp>
+
+#include <climits>                      // CHAR_BIT
+#include <string>
+#include <vector>
+
+// The gnu libc md5 implementation seems to assume this:
+static_assert(8 == CHAR_BIT || 16 == CHAR_BIT);
+// so md5 output is 128 bits == 16 8-bit bytes or 8 16-bit bytes:
+enum { md5len = 128 / CHAR_BIT };
+
+enum { chars_per_formatted_hex_byte = CHAR_BIT / 4 };
+
+enum class md5_file_mode
+{
+    binary,
+    text
+};
+
+struct md5sum_for_file
+{
+    md5sum_for_file(fs::path&& path, std::string&& sum, md5_file_mode mode)
+        :filename{std::move(path)}
+        ,md5sum{std::move(sum)}
+        ,file_mode{mode}
+    {}
+
+    fs::path      filename;
+    std::string   md5sum;
+    md5_file_mode file_mode{md5_file_mode::binary};
+};
+
+/// Reads the vector of structs with the file name and the md5 sum from the 
given input
+/// stream.
+///
+/// Throws an std::runtime_error in case of an error.
+///
+/// The input stream must consist of lines with checksum and filename pairs 
and optional
+/// comments introduced by '#' character at the beginning of the line.
+/// Sample:
+/// 595f44fec1e92a71d3e9e77456ba80d1  filetohashA.txt
+/// 71f920fa275127a7b60fa4d4d41432a3  filetohashB.txt
+/// 43c191bf6d6c3f263a8cd0efd4a058ab  filetohashC.txt
+///
+/// There must be two spaces or a space and an asterisk between each md5sum
+/// value and filename to be compared (the second space indicates text mode,
+/// the asterisk binary mode). Otherwise, a std::runtime_error will be thrown.
+///
+/// The stream_description parameter is only used in exceptions messages.
+
+std::vector<md5sum_for_file> LMI_SO md5_read_checksum_stream
+    (std::istream& is
+    ,std::string const& stream_description
+    );
+
+/// Reads the vector of structs with the file name and the md5 sum from the 
file.
+///
+/// Throws an std::runtime_error in case of an error.
+///
+/// Uses md5_read_checksum_stream to read the content of the file.
+
+std::vector<md5sum_for_file> LMI_SO md5_read_checksum_file
+    (fs::path const& filename
+    );
+
+/// Reads the content of the input stream and calculates the md5sum from it.
+///
+/// Throws an std::runtime_error in case of an error.
+
+std::string LMI_SO md5_calculate_stream_checksum
+    (std::istream& is
+    ,std::string const& stream_description
+    );
+
+/// Reads the content of the file and calculates the md5sum from it.
+///
+/// Throws an std::runtime_error in case of an error.
+
+std::string LMI_SO md5_calculate_file_checksum
+    (fs::path const& filename
+    ,md5_file_mode file_mode = md5_file_mode::binary
+    );
+
+/// Hex representation of an md5 sum as a string.
+
+std::string LMI_SO md5_hex_string(std::vector<unsigned char> const&);
+
+#endif // md5sum_hpp
diff --git a/objects.make b/objects.make
index 435eaff..2ac7dca 100644
--- a/objects.make
+++ b/objects.make
@@ -300,6 +300,7 @@ lmi_common_objects := \
   irc7702_tables.o \
   lmi.o \
   md5.o \
+  md5sum.o \
   mec_input.o \
   mec_server.o \
   mec_state.o \
@@ -536,6 +537,7 @@ authenticity_test$(EXEEXT): \
   calendar_date.o \
   global_settings.o \
   md5.o \
+  md5sum.o \
   miscellany.o \
   null_stream.o \
   path_utility.o \
@@ -1090,6 +1092,7 @@ generate_passkey$(EXEEXT): \
   generate_passkey.o \
   global_settings.o \
   md5.o \
+  md5sum.o \
   miscellany.o \
   null_stream.o \
   path_utility.o \



reply via email to

[Prev in Thread] Current Thread [Next in Thread]