1. 文字列照合
著者:梅谷 武
 UNICODE文字列処理にはICUライブラリを用いる。(Ⅱ-5 UNICODE参照)
 文字列の実装にはModule Stringを用いる。
 Stringモジュールには文字列照合の機能が見当たらないので、文字列照合には正規表現モジュールModule Strを用いる。
 文字についての概説はThe Racket Guide>3 Built-in Datatypes>3.3 Characters、詳細仕様はThe Racket Reference>4 Datatypes>4.6 Charactersにある。
 文字列についての概説はThe Racket Guide>3 Built-in Datatypes>3.4 Strings (Unicode)、詳細仕様はThe Racket Reference>4 Datatypes>4.4 Stringsにある。
 文字列処理ライブラリSRFI 13: String Librariesとファイルシステムユーティリティ:racket/fileを使っている。racket/fileの詳細仕様は15.2 Filesystemにある。
  1. Lincoln
  2. Aristotle
  3. Laplace
  4. Poisson


#include <array>
#include <fstream>
#include <iostream>
#include <print>
#include <string>
int main(int argc, char *argv[]) {
  if (argc != 2) {
    std::println("Usage: pmatch <positive integer>");
    return 1;
  int n = std::stoll(argv[1]);
  if (n < 1) {
    std::println("{} is not positive.", n);
    return 1;
  std::ifstream file("input.txt");
  if (!file.is_open()) {
    std::println(stderr, "\"input.txt\" cannot be opened.");
    return 1;
  std::string line, text;
  while (std::getline(file, line))
    text += line + "\n";
  std::array<std::string, 4> words = {"Lincoln",
  size_t wordCount = words.size();
  for (int j = 0; j < n; ++j)
    for (int i = 0; i < wordCount; ++i) {
      size_t found = text.find(words[i]);
      if (j == 0) {
        if (found != std::string::npos)
          std::println("{} found at position: {}", words[i], found);
          std::println("{} not found.", words[i]);
  std::println("loop count = {}", n);
  return 0;

CPPG = g++
CPPL = clang++
CPPFLAGS = -std=c++23 -O2
SRC = pmatch.cpp
EXES = pmatch_g pmatch_l
all: $(EXES)
pmatch_g: $(SRC)
	$(CPPG) $(CPPFLAGS) -o $@ $(SRC)
pmatch_l: $(SRC)
	$(CPPL) $(CPPFLAGS) -o $@ $(SRC)
	rm -f $(EXES)

 n = 100000で測定する。
$ measure ./pmatch_g 100000
Lincoln found at position: 4225
Aristotle found at position: 7262
Laplace found at position: 8406
Poisson found at position: 50550
loop count = 100000
Process exited with status: 0
total time:  0.195227 [sec]
mem  size:       4120 [KB]
code size:        127 [KB]
$ measure ./pmatch_l 100000
Lincoln found at position: 4225
Aristotle found at position: 7262
Laplace found at position: 8406
Poisson found at position: 50550
loop count = 100000
Process exited with status: 0
total time:  0.200083 [sec]
mem  size:       4104 [KB]
code size:        102 [KB]
open Printf
let read_file filename =
    let ic = open_in filename in
    let rec read_lines acc =
      match input_line ic with
      | line ->
        read_lines (line :: acc)
      | exception End_of_file ->
        close_in ic;
        List.rev acc
    read_lines []
    | Sys_error msg ->
      print_endline ("Error: " ^ msg);
      exit 1
let () =
  let len = Array.length Sys.argv in
  if len <> 2 then
    printf "Usage: pmatch <positive integer>\n"
    let n = int_of_string (Sys.argv.(1)) in
    if n < 1 then
      printf "%d is not positive.\n" n
    else (* ← shift 4 *)
  let lines = read_file "input.txt" in
  let text = String.concat "\n" lines in
  let patterns = ["Lincoln"; "Aristotle"; "Laplace"; "Poisson"] in
  for j = 0 to n - 1 do
    List.iter (fun word ->
      match Str.search_forward (Str.regexp_string word) text 0 with
      | exception Not_found ->
        if j = 0 then
          print_endline (word ^ " not found.")
      | index ->
        if j = 0 then
          print_endline (word ^ " found at position: " ^ string_of_int index)
  printf "loop count = %d\n" n

OCAMLOPT = ocamlopt
LIBPATH = -I +str
LIBS = str.cmxa
SRC = pmatch.ml
EXES = pmatch
all: $(EXES)
pmatch: $(SRC)
	rm -f $(EXES) *.o *.cmx *.cmi

 n = 100000で測定する。
$ measure ./pmatch 100000
Lincoln found at position: 4225
Aristotle found at position: 7262
Laplace found at position: 8406
Poisson found at position: 50550
loop count = 100000
Process exited with status: 0
total time:  6.636662 [sec]
mem  size:       5700 [KB]
code size:       2616 [KB]
#lang racket
(require iso-printf)
(require srfi/13)
(require racket/file)
(define (pmatch-body n)
  (define input-file "input.txt")
  (if (not (file-exists? input-file))
        (printf "\"input.txt\" cannot be opened.\n")
        (exit 1))
  (define text (file->string input-file)) 
  (define patterns '("Lincoln" "Aristotle" "Laplace" "Poisson"))
  (for ([j (in-range n)])
    (for ([pattern patterns])
      (define pos (string-contains text pattern))
      (when (= j 0)
        (if (integer? pos)
            (printf "%s found at position: %d\n" pattern pos)
            (printf "%s is not found." pattern)))))
  (printf "loop count = %d\n" n))
(define (main args)
  (let ([len (vector-length args)]
        [n 0])
      [(= len 1)
        (set! n (string->number (vector-ref args 0)))
          [(> n 0)
            (pmatch-body n)]
            (printf "%d is not positive.\n" n)])]
        (printf "Usage: pmatch <positive integer>\n")])))
(main (current-command-line-arguments))

 n = 10000で測定する。
$ raco exe pmatch.rkt
$ measure ./pmatch 10000
Lincoln found at position: 4198
Aristotle found at position: 7219
Laplace found at position: 8361
Poisson found at position: 50388
loop count = 10000
Process exited with status: 0
total time:  9.294210 [sec]
mem  size:     134312 [KB]
code size:      12741 [KB]
  1. カルダーノ
  2. デル・フェッロ
  3. フォンタナ
  4. ブール


#include <fstream>
#include <iostream>
#include <print>
#include <string>
#include <unicode/unistr.h>
int main(int argc, char *argv[]) {
  if (argc != 2) {
    std::println("Usage: pmatch_u <positive integer>");
    return 1;
  int n = std::stoll(argv[1]);
  if (n < 1) {
    std::println("{} is not positive.", n);
    return 1;
  std::ifstream file("unicode.txt");
  if (!file.is_open()) {
    std::println(stderr, "\"unicode.txt\" cannot be opened.");
    return 1;
  std::string line, u8text;
  while (std::getline(file, line))
    u8text += line + "\n";
  icu::UnicodeString ustr = icu::UnicodeString::fromUTF8(u8text);
  std::array<icu::UnicodeString, 4> words = {
  size_t wordCount = words.size();
  for (int j = 0; j < n; ++j)
    for (int i = 0; i < wordCount; ++i) {
      int32_t pos = ustr.indexOf(words[i]);
      if (j == 0) {
        std::string u8Word;
        if (pos != -1)
          std::println("{} found at position: {}", u8Word, pos);
          std::println("{} not found.", u8Word);
  std::println("loop count = {}", n);
  return 0;

CPPG = g++
CPPL = clang++
CPPFLAGS = -std=c++23 -O2
LIBS = -licuuc
SRC = pmatch_u.cpp
EXES = pmatch_ug pmatch_ul
all: $(EXES)
pmatch_ug: $(SRC)
	$(CPPG) $(CPPFLAGS) -o $@ $(SRC) $(LIBS)
pmatch_ul: $(SRC)
	$(CPPL) $(CPPFLAGS) -o $@ $(SRC) $(LIBS)
	rm -f $(EXES)

 n = 100000で測定する。
$ measure ./pmatch_ug 100000
カルダーノ found at position: 10
デル・フェッロ found at position: 221
フォンタナ found at position: 110
ブール found at position: 2125
loop count = 100000
Process exited with status: 0
total time:  0.164566 [sec]
mem  size:       4876 [KB]
code size:        133 [KB]
$ measure ./pmatch_ul 100000
カルダーノ found at position: 10
デル・フェッロ found at position: 221
フォンタナ found at position: 110
ブール found at position: 2125
loop count = 100000
Process exited with status: 0
total time:  0.147316 [sec]
mem  size:       4856 [KB]
code size:        103 [KB]
#lang racket
(require iso-printf)
(require srfi/13)
(require racket/file)
(define (pmatch-body n)
  (define input-file "unicode.txt")
  (if (not (file-exists? input-file))
        (printf "\"unicode.txt\" cannot be opened.\n")
        (exit 1))
  (define text (file->string input-file)) 
  (define patterns '("カルダーノ" "デル・フェッロ" "フォンタナ" "ブール"))
  (for ([j (in-range n)])
    (for ([pattern patterns])
      (define pos (string-contains text pattern))
      (when (= j 0)
        (if (integer? pos)
            (printf "%s found at position: %d\n" pattern pos)
            (printf "%s is not found." pattern)))))
  (printf "loop count = %d\n" n))
(define (main args)
  (let ([len (vector-length args)]
        [n 0])
      [(= len 1)
        (set! n (string->number (vector-ref args 0)))
          [(> n 0)
            (pmatch-body n)]
            (printf "%d is not positive.\n" n)])]
        (printf "Usage: pmatch_u <positive integer>\n")])))
(main (current-command-line-arguments))

 n = 100000で測定する。
$ raco exe pmatch_u.rkt
$ measure ./pmatch_u 100000
カルダーノ found at position: 10
デル・フェッロ found at position: 221
フォンタナ found at position: 110
ブール found at position: 2125
loop count = 100000
Process exited with status: 0
total time:  3.727788 [sec]
mem  size:     139936 [KB]
code size:      12741 [KB]
測定項目 GCC Clang OCaml Racket
バイト文字列 0.195 0.200 6.637 -
UNICODE文字列 0.165 0.147 - 3.546
© 2024 Takeshi Umetani