解答8: 配列と文字列の実践
概要
この解答では、配列と文字列の高度な操作を実装します。配列のソートやマージ、文字列の検索と置換、UTF-8エンコーディングの扱いなど、実用的なアルゴリズムとテクニックを学びます。
Part 1: 配列操作
完全な実装
const std = @import("std");
// 配列を反転
fn reverseArray(comptime T: type, array: []T) void {
if (array.len == 0) return;
var left: usize = 0;
var right: usize = array.len - 1;
while (left < right) {
const temp = array[left];
array[left] = array[right];
array[right] = temp;
left += 1;
right -= 1;
}
}
// 配列をローテート(左に n 要素シフト)
fn rotateLeft(comptime T: type, array: []T, n: usize) void {
if (array.len == 0 or n == 0) return;
const rotate_count = n % array.len; // 実際のローテーション数
if (rotate_count == 0) return;
// 3回の反転でローテーションを実現
reverseArray(T, array[0..rotate_count]);
reverseArray(T, array[rotate_count..]);
reverseArray(T, array);
}
// 2つの配列をマージ(ソート済み配列をマージしてソート済み配列を作成)
fn mergeSorted(
comptime T: type,
allocator: std.mem.Allocator,
arr1: []const T,
arr2: []const T,
) ![]T {
const result = try allocator.alloc(T, arr1.len + arr2.len);
var i: usize = 0;
var j: usize = 0;
var k: usize = 0;
// 両方の配列に要素が残っている間
while (i < arr1.len and j < arr2.len) {
if (arr1[i] <= arr2[j]) {
result[k] = arr1[i];
i += 1;
} else {
result[k] = arr2[j];
j += 1;
}
k += 1;
}
// arr1の残りをコピー
while (i < arr1.len) {
result[k] = arr1[i];
i += 1;
k += 1;
}
// arr2の残りをコピー
while (j < arr2.len) {
result[k] = arr2[j];
j += 1;
k += 1;
}
return result;
}
// 配列から重複を削除
fn removeDuplicates(
comptime T: type,
allocator: std.mem.Allocator,
array: []const T,
) ![]T {
if (array.len == 0) {
return try allocator.alloc(T, 0);
}
var result = std.ArrayList(T).init(allocator);
defer result.deinit();
// 最初の要素は必ず追加
try result.append(array[0]);
// 2番目以降の要素をチェック
for (array[1..]) |item| {
var is_duplicate = false;
// 既に追加された要素と比較
for (result.items) |existing| {
if (item == existing) {
is_duplicate = true;
break;
}
}
if (!is_duplicate) {
try result.append(item);
}
}
return result.toOwnedSlice();
}
// 配列の分割
fn partition(
comptime T: type,
array: []T,
predicate: *const fn(T) bool,
) struct { true_count: usize, false_count: usize } {
var write_pos: usize = 0;
// 述語がtrueの要素を前に移動
for (array) |item| {
if (predicate(item)) {
array[write_pos] = item;
write_pos += 1;
}
}
const true_count = write_pos;
// 述語がfalseの要素を後ろに移動
write_pos = true_count;
for (array) |item| {
if (!predicate(item)) {
array[write_pos] = item;
write_pos += 1;
}
}
return .{
.true_count = true_count,
.false_count = array.len - true_count,
};
}
fn isEven(x: i32) bool {
return @mod(x, 2) == 0;
}
pub fn main() !void {
const allocator = std.heap.page_allocator;
std.debug.print("=== Reverse Array ===\n", .{});
var arr1 = [_]i32{ 1, 2, 3, 4, 5 };
std.debug.print("Before: ", .{});
for (arr1) |n| std.debug.print("{} ", .{n});
std.debug.print("\n", .{});
reverseArray(i32, &arr1);
std.debug.print("After: ", .{});
for (arr1) |n| std.debug.print("{} ", .{n});
std.debug.print("\n", .{});
std.debug.print("\n=== Rotate Left ===\n", .{});
var arr2 = [_]i32{ 1, 2, 3, 4, 5 };
std.debug.print("Before: ", .{});
for (arr2) |n| std.debug.print("{} ", .{n});
std.debug.print("\n", .{});
rotateLeft(i32, &arr2, 2);
std.debug.print("After rotate left by 2: ", .{});
for (arr2) |n| std.debug.print("{} ", .{n});
std.debug.print("\n", .{});
std.debug.print("\n=== Merge Sorted ===\n", .{});
const sorted1 = [_]i32{ 1, 3, 5 };
const sorted2 = [_]i32{ 2, 4, 6 };
const merged = try mergeSorted(i32, allocator, &sorted1, &sorted2);
defer allocator.free(merged);
std.debug.print("Merged: ", .{});
for (merged) |n| std.debug.print("{} ", .{n});
std.debug.print("\n", .{});
std.debug.print("\n=== Remove Duplicates ===\n", .{});
const with_dups = [_]i32{ 1, 2, 2, 3, 3, 3, 4, 5, 5 };
const unique = try removeDuplicates(i32, allocator, &with_dups);
defer allocator.free(unique);
std.debug.print("Original: ", .{});
for (with_dups) |n| std.debug.print("{} ", .{n});
std.debug.print("\nUnique: ", .{});
for (unique) |n| std.debug.print("{} ", .{n});
std.debug.print("\n", .{});
std.debug.print("\n=== Partition ===\n", .{});
var arr3 = [_]i32{ 1, 2, 3, 4, 5, 6, 7, 8 };
const counts = partition(i32, &arr3, &isEven);
std.debug.print("Partitioned (even first): ", .{});
for (arr3) |n| std.debug.print("{} ", .{n});
std.debug.print("\nEven: {}, Odd: {}\n", .{counts.true_count, counts.false_count});
}
実装のポイント
- ローテーション: 3回の反転で効率的にローテーションを実現します。
- マージ: 2つのソート済み配列を線形時間でマージします。
- 重複削除: ArrayListを使って動的に結果を構築します。
- 分割: パーティショニングアルゴリズムで条件に応じて要素を分けます。
Part 2: 文字列基本操作
完全な実装
const std = @import("std");
// 文字列を大文字に変換
fn toUpper(allocator: std.mem.Allocator, str: []const u8) ![]u8 {
const result = try allocator.alloc(u8, str.len);
for (str, 0..) |ch, i| {
result[i] = std.ascii.toUpper(ch);
}
return result;
}
// 文字列を小文字に変換
fn toLower(allocator: std.mem.Allocator, str: []const u8) ![]u8 {
const result = try allocator.alloc(u8, str.len);
for (str, 0..) |ch, i| {
result[i] = std.ascii.toLower(ch);
}
return result;
}
// 文字列を反転
fn reverseString(allocator: std.mem.Allocator, str: []const u8) ![]u8 {
const result = try allocator.alloc(u8, str.len);
for (str, 0..) |ch, i| {
result[str.len - 1 - i] = ch;
}
return result;
}
// 文字列が回文か判定
fn isPalindrome(str: []const u8) bool {
if (str.len == 0) return true;
var left: usize = 0;
var right: usize = str.len - 1;
while (left < right) {
if (str[left] != str[right]) {
return false;
}
left += 1;
right -= 1;
}
return true;
}
// 文字列内の単語数をカウント
fn countWords(str: []const u8) usize {
var count: usize = 0;
var in_word = false;
for (str) |ch| {
if (std.ascii.isWhitespace(ch)) {
in_word = false;
} else {
if (!in_word) {
count += 1;
in_word = true;
}
}
}
return count;
}
pub fn main() !void {
const allocator = std.heap.page_allocator;
std.debug.print("=== To Upper/Lower ===\n", .{});
const str = "Hello, World!";
const upper = try toUpper(allocator, str);
defer allocator.free(upper);
const lower = try toLower(allocator, str);
defer allocator.free(lower);
std.debug.print("Original: {s}\n", .{str});
std.debug.print("Upper: {s}\n", .{upper});
std.debug.print("Lower: {s}\n", .{lower});
std.debug.print("\n=== Reverse String ===\n", .{});
const reversed = try reverseString(allocator, str);
defer allocator.free(reversed);
std.debug.print("Original: {s}\n", .{str});
std.debug.print("Reversed: {s}\n", .{reversed});
std.debug.print("\n=== Palindrome ===\n", .{});
std.debug.print("'racecar' is palindrome: {}\n", .{isPalindrome("racecar")});
std.debug.print("'hello' is palindrome: {}\n", .{isPalindrome("hello")});
std.debug.print("'A man a plan a canal Panama' (no spaces): {}\n", .{isPalindrome("AmanaplanacanalPanama")});
std.debug.print("\n=== Count Words ===\n", .{});
const text = "The quick brown fox jumps";
const word_count = countWords(text);
std.debug.print("'{s}' has {} words\n", .{text, word_count});
const text2 = " Multiple spaces between words ";
const word_count2 = countWords(text2);
std.debug.print("'{s}' has {} words\n", .{text2, word_count2});
}
実装のポイント
Part 3: 文字列検索と置換
完全な実装
const std = @import("std");
// 部分文字列を検索(すべての出現位置)
fn findAll(
allocator: std.mem.Allocator,
haystack: []const u8,
needle: []const u8,
) ![]usize {
var positions = std.ArrayList(usize).init(allocator);
defer positions.deinit();
if (needle.len == 0 or needle.len > haystack.len) {
return positions.toOwnedSlice();
}
var i: usize = 0;
while (i <= haystack.len - needle.len) {
if (std.mem.eql(u8, haystack[i..i + needle.len], needle)) {
try positions.append(i);
i += needle.len; // 重複を避けるためneedle.len分進む
} else {
i += 1;
}
}
return positions.toOwnedSlice();
}
// 文字列を置換(すべての出現を置換)
fn replaceAll(
allocator: std.mem.Allocator,
haystack: []const u8,
needle: []const u8,
replacement: []const u8,
) ![]u8 {
if (needle.len == 0) {
return try allocator.dupe(u8, haystack);
}
var result = std.ArrayList(u8).init(allocator);
defer result.deinit();
var i: usize = 0;
while (i < haystack.len) {
if (i + needle.len <= haystack.len and
std.mem.eql(u8, haystack[i..i + needle.len], needle))
{
try result.appendSlice(replacement);
i += needle.len;
} else {
try result.append(haystack[i]);
i += 1;
}
}
return result.toOwnedSlice();
}
// 文字列を分割
fn split(
allocator: std.mem.Allocator,
str: []const u8,
delimiter: []const u8,
) ![][]const u8 {
var parts = std.ArrayList([]const u8).init(allocator);
defer parts.deinit();
if (delimiter.len == 0) {
try parts.append(str);
return parts.toOwnedSlice();
}
var start: usize = 0;
var i: usize = 0;
while (i <= str.len - delimiter.len) {
if (std.mem.eql(u8, str[i..i + delimiter.len], delimiter)) {
// デリミタが見つかった
const part = try allocator.dupe(u8, str[start..i]);
try parts.append(part);
i += delimiter.len;
start = i;
} else {
i += 1;
}
}
// 最後の部分を追加
const last_part = try allocator.dupe(u8, str[start..]);
try parts.append(last_part);
return parts.toOwnedSlice();
}
// 文字列を結合
fn join(
allocator: std.mem.Allocator,
strings: []const []const u8,
separator: []const u8,
) ![]u8 {
if (strings.len == 0) {
return try allocator.alloc(u8, 0);
}
// 必要な総バイト数を計算
var total_len: usize = 0;
for (strings, 0..) |str, i| {
total_len += str.len;
if (i < strings.len - 1) {
total_len += separator.len;
}
}
var result = try allocator.alloc(u8, total_len);
var pos: usize = 0;
for (strings, 0..) |str, i| {
@memcpy(result[pos..pos + str.len], str);
pos += str.len;
if (i < strings.len - 1) {
@memcpy(result[pos..pos + separator.len], separator);
pos += separator.len;
}
}
return result;
}
// 先頭と末尾の空白を削除
fn trim(str: []const u8) []const u8 {
if (str.len == 0) return str;
var start: usize = 0;
var end: usize = str.len;
// 先頭の空白をスキップ
while (start < end and std.ascii.isWhitespace(str[start])) {
start += 1;
}
// 末尾の空白をスキップ
while (end > start and std.ascii.isWhitespace(str[end - 1])) {
end -= 1;
}
return str[start..end];
}
pub fn main() !void {
const allocator = std.heap.page_allocator;
std.debug.print("=== Find All ===\n", .{});
const text = "hello world, hello universe, hello there";
const positions = try findAll(allocator, text, "hello");
defer allocator.free(positions);
std.debug.print("'hello' found at positions: ", .{});
for (positions) |pos| std.debug.print("{} ", .{pos});
std.debug.print("\n", .{});
std.debug.print("\n=== Replace All ===\n", .{});
const replaced = try replaceAll(allocator, text, "hello", "hi");
defer allocator.free(replaced);
std.debug.print("Original: {s}\n", .{text});
std.debug.print("Replaced: {s}\n", .{replaced});
std.debug.print("\n=== Split ===\n", .{});
const csv = "apple,banana,cherry,date";
const parts = try split(allocator, csv, ",");
defer {
for (parts) |part| allocator.free(part);
allocator.free(parts);
}
std.debug.print("Original: {s}\n", .{csv});
std.debug.print("Split parts:\n", .{});
for (parts, 0..) |part, i| {
std.debug.print(" [{}]: {s}\n", .{i, part});
}
std.debug.print("\n=== Join ===\n", .{});
const joined = try join(allocator, parts, " | ");
defer allocator.free(joined);
std.debug.print("Joined: {s}\n", .{joined});
std.debug.print("\n=== Trim ===\n", .{});
const padded = " hello world ";
const trimmed = trim(padded);
std.debug.print("Original: '{s}' (length: {})\n", .{padded, padded.len});
std.debug.print("Trimmed: '{s}' (length: {})\n", .{trimmed, trimmed.len});
}
実装のポイント
Part 4: UTF-8処理
完全な実装
const std = @import("std");
// UTF-8文字列の文字数をカウント
fn countUtf8Chars(str: []const u8) !usize {
var count: usize = 0;
var i: usize = 0;
while (i < str.len) {
const byte_len = try std.unicode.utf8ByteSequenceLength(str[i]);
i += byte_len;
count += 1;
}
return count;
}
// UTF-8文字列からn番目の文字を取得
fn getUtf8Char(str: []const u8, n: usize) ![]const u8 {
var count: usize = 0;
var i: usize = 0;
while (i < str.len) {
const byte_len = try std.unicode.utf8ByteSequenceLength(str[i]);
if (count == n) {
return str[i..i + byte_len];
}
i += byte_len;
count += 1;
}
return error.IndexOutOfBounds;
}
// UTF-8文字列を反転(文字単位)
fn reverseUtf8(allocator: std.mem.Allocator, str: []const u8) ![]u8 {
// まず各文字の位置を収集
var char_positions = std.ArrayList(struct { start: usize, len: usize }).init(allocator);
defer char_positions.deinit();
var i: usize = 0;
while (i < str.len) {
const byte_len = try std.unicode.utf8ByteSequenceLength(str[i]);
try char_positions.append(.{ .start = i, .len = byte_len });
i += byte_len;
}
// 結果の配列を確保
const result = try allocator.alloc(u8, str.len);
var pos: usize = 0;
// 逆順でコピー
var j: usize = char_positions.items.len;
while (j > 0) {
j -= 1;
const char_info = char_positions.items[j];
@memcpy(result[pos..pos + char_info.len], str[char_info.start..char_info.start + char_info.len]);
pos += char_info.len;
}
return result;
}
// 文字列のバイト数と文字数を返す
fn stringInfo(str: []const u8) !struct { bytes: usize, chars: usize } {
const char_count = try countUtf8Chars(str);
return .{
.bytes = str.len,
.chars = char_count,
};
}
// すべての文字がASCIIか判定
fn isAscii(str: []const u8) bool {
for (str) |ch| {
if (ch > 127) {
return false;
}
}
return true;
}
pub fn main() !void {
const allocator = std.heap.page_allocator;
std.debug.print("=== Count UTF-8 Chars ===\n", .{});
const str1 = "Hello";
const str2 = "こんにちは";
const str3 = "Hello 世界";
std.debug.print("'{s}': {} chars\n", .{str1, try countUtf8Chars(str1)});
std.debug.print("'{s}': {} chars\n", .{str2, try countUtf8Chars(str2)});
std.debug.print("'{s}': {} chars\n", .{str3, try countUtf8Chars(str3)});
std.debug.print("\n=== Get UTF-8 Char ===\n", .{});
const char0 = try getUtf8Char(str2, 0);
const char1 = try getUtf8Char(str2, 1);
std.debug.print("First char of '{s}': {s}\n", .{str2, char0});
std.debug.print("Second char of '{s}': {s}\n", .{str2, char1});
std.debug.print("\n=== Reverse UTF-8 ===\n", .{});
const reversed = try reverseUtf8(allocator, str2);
defer allocator.free(reversed);
std.debug.print("Original: {s}\n", .{str2});
std.debug.print("Reversed: {s}\n", .{reversed});
std.debug.print("\n=== String Info ===\n", .{});
const info1 = try stringInfo(str1);
const info2 = try stringInfo(str2);
const info3 = try stringInfo(str3);
std.debug.print("'{s}': {} bytes, {} chars\n", .{str1, info1.bytes, info1.chars});
std.debug.print("'{s}': {} bytes, {} chars\n", .{str2, info2.bytes, info2.chars});
std.debug.print("'{s}': {} bytes, {} chars\n", .{str3, info3.bytes, info3.chars});
std.debug.print("\n=== Is ASCII ===\n", .{});
std.debug.print("'{s}' is ASCII: {}\n", .{str1, isAscii(str1)});
std.debug.print("'{s}' is ASCII: {}\n", .{str2, isAscii(str2)});
std.debug.print("'{s}' is ASCII: {}\n", .{str3, isAscii(str3)});
}
実装のポイント
ボーナス課題の解答
Bonus 1: パターンマッチング
const std = @import("std");
// ワイルドカードパターンマッチング
fn wildcardMatch(pattern: []const u8, str: []const u8) bool {
var p: usize = 0;
var s: usize = 0;
var star_p: ?usize = null;
var star_s: ?usize = null;
while (s < str.len) {
if (p < pattern.len and (pattern[p] == '?' or pattern[p] == str[s])) {
// 文字が一致するか、'?'の場合
p += 1;
s += 1;
} else if (p < pattern.len and pattern[p] == '*') {
// '*'を記録
star_p = p;
star_s = s;
p += 1;
} else if (star_p != null) {
// '*'でバックトラック
p = star_p.? + 1;
star_s = star_s.? + 1;
s = star_s.?;
} else {
return false;
}
}
// 残りのパターンが全て'*'か確認
while (p < pattern.len and pattern[p] == '*') {
p += 1;
}
return p == pattern.len;
}
// グロブパターンで複数のファイル名をフィルタ
fn filterByPattern(
allocator: std.mem.Allocator,
files: []const []const u8,
pattern: []const u8,
) ![][]const u8 {
var result = std.ArrayList([]const u8).init(allocator);
defer result.deinit();
for (files) |file| {
if (wildcardMatch(pattern, file)) {
try result.append(file);
}
}
return result.toOwnedSlice();
}
pub fn main() !void {
const allocator = std.heap.page_allocator;
std.debug.print("=== Wildcard Match ===\n", .{});
const tests = [_]struct { pattern: []const u8, str: []const u8 }{
.{ .pattern = "*.txt", .str = "file.txt" },
.{ .pattern = "*.txt", .str = "file.md" },
.{ .pattern = "test?.c", .str = "test1.c" },
.{ .pattern = "test?.c", .str = "test12.c" },
.{ .pattern = "*hello*", .str = "say hello world" },
.{ .pattern = "*hello*", .str = "goodbye" },
};
for (tests) |test| {
const matches = wildcardMatch(test.pattern, test.str);
std.debug.print("'{s}' matches '{s}': {}\n", .{test.pattern, test.str, matches});
}
std.debug.print("\n=== Filter by Pattern ===\n", .{});
const files = [_][]const u8{
"main.zig",
"test.zig",
"readme.md",
"util.zig",
"config.json",
};
const filtered = try filterByPattern(allocator, &files, "*.zig");
defer allocator.free(filtered);
std.debug.print("Files matching '*.zig':\n", .{});
for (filtered) |file| {
std.debug.print(" {s}\n", .{file});
}
}
Bonus 2: 文字列圧縮
const std = @import("std");
// RLE圧縮
fn compress(allocator: std.mem.Allocator, str: []const u8) ![]u8 {
if (str.len == 0) {
return try allocator.alloc(u8, 0);
}
var result = std.ArrayList(u8).init(allocator);
defer result.deinit();
var i: usize = 0;
while (i < str.len) {
const current_char = str[i];
var count: usize = 1;
// 同じ文字が続く限りカウント
while (i + count < str.len and str[i + count] == current_char) {
count += 1;
}
// カウントと文字を追加
const count_str = try std.fmt.allocPrint(allocator, "{}", .{count});
defer allocator.free(count_str);
try result.appendSlice(count_str);
try result.append(current_char);
i += count;
}
return result.toOwnedSlice();
}
// RLE展開
fn decompress(allocator: std.mem.Allocator, str: []const u8) ![]u8 {
var result = std.ArrayList(u8).init(allocator);
defer result.deinit();
var i: usize = 0;
while (i < str.len) {
// 数字を読み取る
var count: usize = 0;
while (i < str.len and std.ascii.isDigit(str[i])) {
count = count * 10 + (str[i] - '0');
i += 1;
}
// 文字を読み取る
if (i < str.len) {
const ch = str[i];
i += 1;
// count回文字を追加
var j: usize = 0;
while (j < count) : (j += 1) {
try result.append(ch);
}
}
}
return result.toOwnedSlice();
}
// 圧縮率を計算
fn compressionRatio(original: []const u8, compressed: []const u8) f64 {
if (original.len == 0) return 0.0;
return @as(f64, @floatFromInt(compressed.len)) / @as(f64, @floatFromInt(original.len));
}
pub fn main() !void {
const allocator = std.heap.page_allocator;
std.debug.print("=== Compress ===\n", .{});
const original = "aaabbbcccdddd";
const compressed = try compress(allocator, original);
defer allocator.free(compressed);
std.debug.print("Original: {s} ({} bytes)\n", .{original, original.len});
std.debug.print("Compressed: {s} ({} bytes)\n", .{compressed, compressed.len});
const ratio = compressionRatio(original, compressed);
std.debug.print("Compression ratio: {d:.2}%\n", .{ratio * 100.0});
std.debug.print("\n=== Decompress ===\n", .{});
const decompressed = try decompress(allocator, compressed);
defer allocator.free(decompressed);
std.debug.print("Decompressed: {s}\n", .{decompressed});
const match = std.mem.eql(u8, original, decompressed);
std.debug.print("Match: {}\n", .{match});
}
よくある間違い
1. メモリリークの忘れ
// 悪い例: splitの結果を適切に解放していない
const parts = try split(allocator, "a,b,c", ",");
// 各partとparts配列自体を解放する必要がある!
// 良い例: deferで確実に解放
const parts = try split(allocator, "a,b,c", ",");
defer {
for (parts) |part| allocator.free(part);
allocator.free(parts);
}
2. UTF-8のバイト長を無視
// 悪い例: バイト単位で文字列を反転(UTF-8が壊れる)
fn badReverse(str: []u8) void {
var i: usize = 0;
var j = str.len - 1;
while (i < j) {
const temp = str[i];
str[i] = str[j];
str[j] = temp;
i += 1;
j -= 1;
}
}
// 良い例: 文字単位で反転
fn goodReverse(allocator: std.mem.Allocator, str: []const u8) ![]u8 {
return reverseUtf8(allocator, str);
}
発展課題
1. KMPアルゴリズム
Knuth-Morris-Pratt文字列検索アルゴリズムを実装してみましょう。
2. レーベンシュタイン距離
2つの文字列間の編集距離を計算する関数を実装してみましょう。
3. 正規表現エンジン
簡単な正規表現エンジンを実装してみましょう。
4. Unicode正規化
Unicode正規化(NFC、NFD)を実装してみましょう。
まとめ
この解答では、以下の重要な概念を学びました:
これらの技術を使うことで、実用的な文字列処理とデータ操作が可能になります。