5. ハッシュ表
著者:梅谷 武
各言語においてハッシュ表を比較する。
作成:2024-04-26
更新:2024-09-13
更新:2024-09-13
ハッシュ表の実装にはstd::unordered_mapを用いる。
ハッシュ表の実装にはModule Hashtblを用いる。
ハッシュ表についての概説はThe Racket Guide>3 Built-in Datatypes>3.10 Hash Tables、詳細仕様はThe Racket Reference>4 Datatypes>4.15 Hash Tablesにある。
入力ファイルを入力リスト:lstに読み込み、ハッシュ表:tblを生成し、全要素を検索し、全要素を削除する。
- 入力ファイルを読み込み、入力リスト:lstに格納
- lstからハッシュ表:tblに挿入
- ハッシュ表を全要素で検索
- ハッシュ表の全項目を削除
ハッシュ表に挿入する際、すでにキーが存在するときは古い項目を削除してから新規に追加し、キーの重複は許さないものとする。
入力ファイルとして基準データ:uniform1m.txtを用い、整数リストの各要素 key をキーとし、対応する値は "value" + to_string(key)という文字列にする。
hashtable.cpp:
#include <chrono>
#include <fstream>
#include <iostream>
#include <print>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>
bool test(const std::string &infile) {
std::vector<long> lst;
std::unordered_map<long, std::string> tbl;
// read file
auto start = std::chrono::high_resolution_clock::now();
std::ifstream fin(infile);
if (!fin) {
throw std::runtime_error("Could not open file for reading.");
return false;
}
int num;
while (fin >> num)
lst.push_back(num);
fin.close();
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> duration = end - start;
std::println("length = {}", lst.size());
std::println("read time: {:9.6f} [sec]", duration.count());
// make hash
start = std::chrono::high_resolution_clock::now();
for (long key : lst)
tbl[key] = "value" + std::to_string(key);
end = std::chrono::high_resolution_clock::now();
duration = end - start;
std::println("hashtable items = {}", tbl.size());
std::println("make hash: {:9.6f} [sec]", duration.count());
// search
start = std::chrono::high_resolution_clock::now();
for (long key : lst) {
auto it = tbl.find(key);
if (it != tbl.end())
;
else
std::println("{} is not found.", key);
}
end = std::chrono::high_resolution_clock::now();
duration = end - start;
std::println("search time:{:9.6f} [sec]", duration.count());
// delete
start = std::chrono::high_resolution_clock::now();
for (long key : lst)
tbl.erase(key);
end = std::chrono::high_resolution_clock::now();
duration = end - start;
std::println("hashtable items = {}", tbl.size());
std::println("delete time:{:9.6f} [sec]", duration.count());
return true;
}
int main(int argc, char *argv[]) {
try {
if (argc != 2) {
throw std::invalid_argument(
"Usage: hashtable <infile>");
}
std::string infile(argv[1]);
if (!test(infile))
std::println("An error has occurred.");
} catch (const std::logic_error &e) {
std::println(stderr, "Logic error: {}", e.what());
} catch (const std::runtime_error &e) {
std::println(stderr, "Runtime error: {}", e.what());
} catch (const std::exception &e) {
std::println(stderr, "Another std::exception: {}", e.what());
} catch (...) {
std::println(stderr, "Unknown exception.");
}
return 0;
}
makefile:
CPPG = g++
CPPL = clang++
CPPFLAGS = -std=c++23 -O2
SRC = hashtable.cpp
EXES = hashtable_g hashtable_l
all: $(EXES)
hashtable_g: $(SRC)
$(CPPG) $(CPPFLAGS) -o $@ $(SRC)
hashtable_l: $(SRC)
$(CPPL) $(CPPFLAGS) -o $@ $(SRC)
clean:
rm -f $(EXES)
基準データ:uniform1m.txtで測定する。
$ measure ./hashtable_g uniform1m.txt
length = 1000000
read time: 0.095539 [sec]
hashtable items = 995168
make hash: 0.565349 [sec]
search time: 0.099856 [sec]
hashtable items = 0
delete time: 0.307995 [sec]
======================================
Process exited with status: 0
total time: 1.077240 [sec]
mem size: 84660 [KB]
code size: 137 [KB]
$ measure ./hashtable_l uniform1m.txt
length = 1000000
read time: 0.095704 [sec]
hashtable items = 995168
make hash: 0.551604 [sec]
search time: 0.070454 [sec]
hashtable items = 0
delete time: 0.289365 [sec]
======================================
Process exited with status: 0
total time: 1.015173 [sec]
mem size: 84852 [KB]
code size: 111 [KB]
hashtable.ml:
open Printf
let read_data filename =
let ichan = open_in filename in
let rec read_lines acc =
match input_line ichan with
| line ->
let num = int_of_string line in
read_lines (num :: acc)
| exception End_of_file ->
close_in ichan;
List.rev acc in
read_lines []
let test infile =
(* read file *)
let start = Unix.gettimeofday () in
let lst = read_data infile in
let end_ = Unix.gettimeofday () in
let len = List.length lst in
printf "length = %d\n" len;
printf "read time: %9.6f [sec]\n" (end_ -. start);
(* make hash *)
let start = Unix.gettimeofday () in
let tbl = Hashtbl.create len in
List.iter (fun key ->
let value = "value" ^ string_of_int key in
Hashtbl.replace tbl key value) lst;
let end_ = Unix.gettimeofday () in
printf "hashtable items = %d\n" (Hashtbl.length tbl);
printf "make hash: %9.6f [sec]\n" (end_ -. start);
(* search *)
let start = Unix.gettimeofday () in
List.iter (fun v ->
if (Hashtbl.mem tbl v) then ()
else printf "%d is not found.\n" v) lst;
let end_ = Unix.gettimeofday () in
printf "search time:%9.6f [sec]\n" (end_ -. start);
(* delete *)
let start = Unix.gettimeofday () in
List.iter (fun v -> Hashtbl.remove tbl v) lst;
let end_ = Unix.gettimeofday () in
printf "hashtable items = %d\n" (Hashtbl.length tbl);
printf "delete time:%9.6f [sec]\n" (end_ -. start)
let () =
try
if Array.length Sys.argv <> 2 then
raise (Invalid_argument
"Usage: hashtable <infile>")
else
let infile = Sys.argv.(1) in
test infile
with
| Invalid_argument msg ->
eprintf "Logic error: %s\n" msg
| Failure msg ->
eprintf "Runtime error: %s\n" msg
| exn ->
eprintf "Another exception: %s\n" (Printexc.to_string exn)
makefile:
OCAMLOPT = ocamlopt
OCAMLFLAGS = -O2
LIBPATH = -I +unix
LIBS = unix.cmxa
SRC = hashtable.ml
EXES = hashtable
all: $(EXES)
hashtable: $(SRC)
$(OCAMLOPT) $(OCAMLFLAGS) $(LIBPATH) -o $@ $(LIBS) $(SRC)
clean:
rm -f $(EXES) *.o *.cmx *.cmi
基準データ:uniform1m.txtで測定する。
$ measure ./hashtable uniform1m.txt
length = 1000000
read time: 0.287928 [sec]
hashtable items = 995168
make hash: 0.572085 [sec]
search time: 0.173136 [sec]
hashtable items = 0
delete time: 0.213460 [sec]
======================================
Process exited with status: 0
total time: 1.249614 [sec]
mem size: 98260 [KB]
code size: 3290 [KB]
hashtable.rkt:
#lang racket
(require iso-printf)
(define (read-data filename)
(call-with-input-file filename
(lambda (port)
(let loop ([lst '()])
(let ([line (read-line port)])
(if (eof-object? line)
(reverse lst)
(loop (cons (string->number line) lst))))))))
(define (test infile)
; read file
(define start (current-inexact-milliseconds))
(define lst (read-data infile))
(define end (current-inexact-milliseconds))
(printf "length = %d\n" (length lst))
(printf "read time: %9.6f [sec]\n" (/ (- end start) 1000.0))
; make hash
(set! start (current-inexact-milliseconds))
(define tbl (make-hash))
(for-each (λ (key)
(hash-set! tbl key
(string-append "value" (number->string key))))
lst)
(set! end (current-inexact-milliseconds))
(printf "hashtable items = %d\n" (hash-count tbl))
(printf "make hash: %9.6f [sec]\n" (/ (- end start) 1000.0))
; search
(set! start (current-inexact-milliseconds))
(for ([i lst])
(if (hash-has-key? tbl i)
(void)
(printf "%d is not found.\n" i)))
(set! end (current-inexact-milliseconds))
(printf "search time:%9.6f [sec]\n" (/ (- end start) 1000.0))
; delete
(set! start (current-inexact-milliseconds))
(for-each (λ (key) (hash-remove! tbl key)) lst)
(set! end (current-inexact-milliseconds))
(printf "hashtable items = %d\n" (hash-count tbl))
(printf "delete time:%9.6f [sec]\n" (/ (- end start) 1000.0)))
(define (main args)
(with-handlers
([exn:fail? (λ (e) (eprintf "%s\n" (exn-message e)))]
[exn? (λ (e) (eprintf "Unexpected: %s\n" (exn-message e)))])
(cond
[(not (= (vector-length args) 1))
(error "Usage: hashtable <infile>")]
[else
(let ([infile (vector-ref args 0)])
(test infile))])))
(main (current-command-line-arguments))
基準データ:uniform1m.txtで測定する。
$ raco exe hashtable.rkt
$ measure ./hashtable uniform1m.txt
length = 1000000
read time: 0.762054 [sec]
hashtable items = 995168
make hash: 1.307911 [sec]
search time: 0.456106 [sec]
hashtable items = 0
delete time: 0.490496 [sec]
======================================
Process exited with status: 0
total time: 3.559983 [sec]
mem size: 285524 [KB]
code size: 12446 [KB]
測定結果を表にまとめる。単位は秒[sec]。
測定項目 | GCC | Clang | OCaml | Racket |
---|---|---|---|---|
read file | 0.096 | 0.096 | 0.288 | 0.762 |
make hash | 0.565 | 0.552 | 0.572 | 1.308 |
search | 0.100 | 0.070 | 0.173 | 0.456 |
delete | 0.308 | 0.289 | 0.213 | 0.490 |
This document is licensed under the MIT License.
Copyright (C) 2024 Takeshi Umetani