5. ハッシュ表
著者:梅谷 武
各言語においてハッシュ表を比較する。
作成:2024-04-26
更新:2024-09-13

C++

 ハッシュ表の実装にはstd::unordered_mapを用いる。
 ハッシュ表の実装にはModule Hashtblを用いる。
 ハッシュ表についての概説はThe Racket Guide>3 Built-in Datatypes>3.10 Hash Tables、詳細仕様はThe Racket Reference>4 Datatypes>4.15 Hash Tablesにある。
 入力ファイルを入力リスト:lstに読み込み、ハッシュ表:tblを生成し、全要素を検索し、全要素を削除する。
  1. 入力ファイルを読み込み、入力リスト:lstに格納
  2. lstからハッシュ表:tblに挿入
  3. ハッシュ表を全要素で検索
  4. ハッシュ表の全項目を削除
ハッシュ表に挿入する際、すでにキーが存在するときは古い項目を削除してから新規に追加し、キーの重複は許さないものとする。
 入力ファイルとして基準データ:uniform1m.txtを用い、整数リストの各要素 key をキーとし、対応する値は "value" + to_string(key)という文字列にする。

C++

 hashtable.cpp:
#include <chrono>
#include <fstream>
#include <iostream>
#include <print>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>
 
bool test(const std::string &infile) {
  std::vector<long> lst;
  std::unordered_map<long, std::string> tbl;
  // read file
  auto start = std::chrono::high_resolution_clock::now();
  std::ifstream fin(infile);
  if (!fin) {
    throw std::runtime_error("Could not open file for reading.");
    return false;
  }
  int num;
  while (fin >> num)
    lst.push_back(num);
  fin.close();
  auto end = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> duration = end - start;
  std::println("length = {}", lst.size());
  std::println("read time:  {:9.6f} [sec]", duration.count());
  // make hash
  start = std::chrono::high_resolution_clock::now();
  for (long key : lst)
    tbl[key] = "value" + std::to_string(key);
  end = std::chrono::high_resolution_clock::now();
  duration = end - start;
  std::println("hashtable items = {}", tbl.size());
  std::println("make hash:  {:9.6f} [sec]", duration.count());
  // search
  start = std::chrono::high_resolution_clock::now();
  for (long key : lst) {
    auto it = tbl.find(key);
    if (it != tbl.end())
      ;
    else
      std::println("{} is not found.", key);
  }
  end = std::chrono::high_resolution_clock::now();
  duration = end - start;
  std::println("search time:{:9.6f} [sec]", duration.count());
  // delete
  start = std::chrono::high_resolution_clock::now();
  for (long key : lst)
    tbl.erase(key);
  end = std::chrono::high_resolution_clock::now();
  duration = end - start;
  std::println("hashtable items = {}", tbl.size());
  std::println("delete time:{:9.6f} [sec]", duration.count());
  return true;
}
 
int main(int argc, char *argv[]) {
  try {
    if (argc != 2) {
      throw std::invalid_argument(
          "Usage: hashtable <infile>");
    }
    std::string infile(argv[1]);
    if (!test(infile))
      std::println("An error has occurred.");
  } catch (const std::logic_error &e) {
    std::println(stderr, "Logic error: {}", e.what());
  } catch (const std::runtime_error &e) {
    std::println(stderr, "Runtime error: {}", e.what());
  } catch (const std::exception &e) {
    std::println(stderr, "Another std::exception: {}", e.what());
  } catch (...) {
    std::println(stderr, "Unknown exception.");
  }
  return 0;
}


 makefile:
CPPG = g++
CPPL = clang++
CPPFLAGS = -std=c++23 -O2
SRC = hashtable.cpp
EXES = hashtable_g hashtable_l
 
all: $(EXES)
 
hashtable_g: $(SRC)
	$(CPPG) $(CPPFLAGS) -o $@ $(SRC)
 
hashtable_l: $(SRC)
	$(CPPL) $(CPPFLAGS) -o $@ $(SRC)
 
clean:
	rm -f $(EXES)


 基準データ:uniform1m.txtで測定する。
$ measure ./hashtable_g uniform1m.txt
length = 1000000
read time:   0.095539 [sec]
hashtable items = 995168
make hash:   0.565349 [sec]
search time: 0.099856 [sec]
hashtable items = 0
delete time: 0.307995 [sec]
======================================
Process exited with status: 0
total time:  1.077240 [sec]
mem  size:      84660 [KB]
code size:        137 [KB]
$ measure ./hashtable_l uniform1m.txt
length = 1000000
read time:   0.095704 [sec]
hashtable items = 995168
make hash:   0.551604 [sec]
search time: 0.070454 [sec]
hashtable items = 0
delete time: 0.289365 [sec]
======================================
Process exited with status: 0
total time:  1.015173 [sec]
mem  size:      84852 [KB]
code size:        111 [KB]
 hashtable.ml:
open Printf
 
let read_data filename =
  let ichan = open_in filename in
  let rec read_lines acc =
    match input_line ichan with
    | line ->
      let num = int_of_string line in
      read_lines (num :: acc)
    | exception End_of_file ->
      close_in ichan;
      List.rev acc in
  read_lines []
 
let test infile =
  (* read file *)
  let start = Unix.gettimeofday () in
  let lst = read_data infile in
  let end_ = Unix.gettimeofday () in
  let len = List.length lst in
  printf "length = %d\n" len;
  printf "read time:  %9.6f [sec]\n" (end_ -. start);
  (* make hash *)
  let start = Unix.gettimeofday () in
  let tbl = Hashtbl.create len in
  List.iter (fun key ->
    let value = "value" ^ string_of_int key in
    Hashtbl.replace tbl key value) lst;
  let end_ = Unix.gettimeofday () in
  printf "hashtable items = %d\n" (Hashtbl.length tbl);
  printf "make hash:  %9.6f [sec]\n" (end_ -. start);
  (* search *)
  let start = Unix.gettimeofday () in
  List.iter (fun v ->
    if (Hashtbl.mem tbl v) then ()
    else printf "%d is not found.\n" v) lst;
  let end_ = Unix.gettimeofday () in
  printf "search time:%9.6f [sec]\n" (end_ -. start);
  (* delete *)
  let start = Unix.gettimeofday () in
  List.iter (fun v -> Hashtbl.remove tbl v) lst;
  let end_ = Unix.gettimeofday () in
  printf "hashtable items = %d\n" (Hashtbl.length tbl);
  printf "delete time:%9.6f [sec]\n" (end_ -. start)
 
let () =
  try
    if Array.length Sys.argv <> 2 then
      raise (Invalid_argument
        "Usage: hashtable <infile>")
    else
      let infile =  Sys.argv.(1) in
      test infile
  with
    | Invalid_argument msg ->
        eprintf "Logic error: %s\n" msg
    | Failure msg ->
        eprintf "Runtime error: %s\n" msg
    | exn ->
        eprintf "Another exception: %s\n" (Printexc.to_string exn)


 makefile:
OCAMLOPT = ocamlopt
OCAMLFLAGS = -O2
LIBPATH = -I +unix
LIBS = unix.cmxa
SRC = hashtable.ml
EXES = hashtable
 
all: $(EXES)
 
hashtable: $(SRC)
	$(OCAMLOPT) $(OCAMLFLAGS) $(LIBPATH) -o $@ $(LIBS) $(SRC)
 
clean:
	rm -f $(EXES) *.o *.cmx *.cmi


 基準データ:uniform1m.txtで測定する。
$ measure ./hashtable uniform1m.txt
length = 1000000
read time:   0.287928 [sec]
hashtable items = 995168
make hash:   0.572085 [sec]
search time: 0.173136 [sec]
hashtable items = 0
delete time: 0.213460 [sec]
======================================
Process exited with status: 0
total time:  1.249614 [sec]
mem  size:      98260 [KB]
code size:       3290 [KB]
 hashtable.rkt:
#lang racket
(require iso-printf)
 
(define (read-data filename)
  (call-with-input-file filename
    (lambda (port)
      (let loop ([lst '()])
        (let ([line (read-line port)])
          (if (eof-object? line)
              (reverse lst)
              (loop (cons (string->number line) lst))))))))
 
(define (test infile)
  ; read file
  (define start (current-inexact-milliseconds))
  (define lst (read-data infile))
  (define end (current-inexact-milliseconds))
  (printf "length = %d\n" (length lst))
  (printf "read time:  %9.6f [sec]\n" (/ (- end start) 1000.0))
  ; make hash
  (set! start (current-inexact-milliseconds))
  (define tbl (make-hash))
  (for-each (λ (key)
            (hash-set! tbl key
              (string-append "value" (number->string key))))
            lst)
  (set! end (current-inexact-milliseconds))
  (printf "hashtable items = %d\n" (hash-count tbl))
  (printf "make hash:  %9.6f [sec]\n" (/ (- end start) 1000.0))
  ; search
  (set! start (current-inexact-milliseconds))
  (for ([i lst])
    (if (hash-has-key? tbl i)
      (void)
      (printf "%d is not found.\n" i)))
  (set! end (current-inexact-milliseconds))
  (printf "search time:%9.6f [sec]\n" (/ (- end start) 1000.0))
  ; delete
  (set! start (current-inexact-milliseconds))
  (for-each (λ (key) (hash-remove! tbl key)) lst)
  (set! end (current-inexact-milliseconds))
  (printf "hashtable items = %d\n" (hash-count tbl))
  (printf "delete time:%9.6f [sec]\n" (/ (- end start) 1000.0)))
 
(define (main args)
  (with-handlers
    ([exn:fail? (λ (e) (eprintf "%s\n" (exn-message e)))]
     [exn? (λ (e) (eprintf "Unexpected: %s\n" (exn-message e)))])
  (cond
    [(not (= (vector-length args) 1))
      (error "Usage: hashtable <infile>")]
    [else
      (let ([infile (vector-ref args 0)])
        (test infile))])))
 
(main (current-command-line-arguments))


 基準データ:uniform1m.txtで測定する。
$ raco exe hashtable.rkt
$ measure ./hashtable uniform1m.txt
length = 1000000
read time:   0.762054 [sec]
hashtable items = 995168
make hash:   1.307911 [sec]
search time: 0.456106 [sec]
hashtable items = 0
delete time: 0.490496 [sec]
======================================
Process exited with status: 0
total time:  3.559983 [sec]
mem  size:     285524 [KB]
code size:      12446 [KB]
 測定結果を表にまとめる。単位は秒[sec]。
測定項目 GCC Clang OCaml Racket
read file 0.096 0.096 0.288 0.762
make hash 0.565 0.552 0.572 1.308
search 0.100 0.070 0.173 0.456
delete 0.308 0.289 0.213 0.490
This document is licensed under the MIT License.
Copyright (C) 2024 Takeshi Umetani