Zig選択課題02: コンパイラ/インタプリタ実装

課題説明

背景

コンパイラとインタプリタは、プログラミング言語を機械が理解できる形式に変換する重要なソフトウェアです。この課題では、Zigを使って簡易プログラミング言語「MiniLang」のインタプリタを実装します。

MiniLangは以下の機能を持つシンプルな言語です:

  • 変数宣言と代入
  • 四則演算と比較演算
  • if文とwhile文
  • 関数定義と呼び出し
  • print文
  • この実装を通じて、コンパイラ理論の基礎(字句解析、構文解析、意味解析、実行)を実践的に学びます。

    MiniLang言語仕様

    // 変数宣言と代入
    var x = 10;
    var y = 20;
    
    // 四則演算
    var sum = x + y;
    var product = x * y;
    
    // 比較と条件分岐
    if (x < y) {
        print("x is less than y");
    }
    
    // ループ
    var i = 0;
    while (i < 10) {
        print(i);
        i = i + 1;
    }
    
    // 関数定義
    func fibonacci(n) {
        if (n <= 1) {
            return n;
        }
        return fibonacci(n - 1) + fibonacci(n - 2);
    }
    
    // 関数呼び出し
    print(fibonacci(10));
    

    要件

    以下のコンポーネントを実装してください:

  • 字句解析器(Lexer)
- ソースコードをトークン列に変換 - キーワード、識別子、数値リテラル、演算子、括弧などを認識

  • 構文解析器(Parser)
- トークン列を抽象構文木(AST)に変換 - 再帰下降パーサーで実装 - エラー検出とエラーメッセージ

  • 抽象構文木(AST)
- プログラムの構造を木構造で表現 - 式ノード、文ノード、関数ノードなど

  • インタプリタ(Evaluator)
- ASTを走査して実行 - 変数スコープの管理 - 関数呼び出しのスタック管理

  • 実行環境
- REPL(Read-Eval-Print Loop)モード - ファイルからの実行モード - エラーハンドリング

制約

  • Zig標準ライブラリは使用可能
  • サードパーティライブラリは不可
  • 全てのコードはコンパイル可能で動作すること

---

想定解答

プロジェクト構造

minilang/
├── build.zig
├── src/
│   ├── main.zig
│   ├── lexer.zig
│   ├── token.zig
│   ├── parser.zig
│   ├── ast.zig
│   ├── evaluator.zig
│   └── environment.zig
└── examples/
    ├── hello.ml
    ├── fibonacci.ml
    └── factorial.ml

トークン定義 (token.zig)

const std = @import("std");

pub const TokenType = enum {
    // リテラル
    number,
    identifier,

    // キーワード
    var_keyword,
    func_keyword,
    if_keyword,
    else_keyword,
    while_keyword,
    return_keyword,
    print_keyword,

    // 演算子
    plus,
    minus,
    star,
    slash,
    assign,
    equal,
    not_equal,
    less_than,
    less_equal,
    greater_than,
    greater_equal,

    // 区切り文字
    semicolon,
    comma,
    lparen,
    rparen,
    lbrace,
    rbrace,

    // 特殊
    eof,
    illegal,
};

pub const Token = struct {
    type: TokenType,
    lexeme: []const u8,
    line: usize,
    column: usize,

    pub fn init(token_type: TokenType, lexeme: []const u8, line: usize, column: usize) Token {
        return .{
            .type = token_type,
            .lexeme = lexeme,
            .line = line,
            .column = column,
        };
    }

    pub fn format(self: Token, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
        try writer.print("{s}('{s}') at {}:{}", .{ @tagName(self.type), self.lexeme, self.line, self.column });
    }
};

pub fn keywordType(word: []const u8) ?TokenType {
    const keywords = std.ComptimeStringMap(TokenType, .{
        .{ "var", .var_keyword },
        .{ "func", .func_keyword },
        .{ "if", .if_keyword },
        .{ "else", .else_keyword },
        .{ "while", .while_keyword },
        .{ "return", .return_keyword },
        .{ "print", .print_keyword },
    });
    return keywords.get(word);
}

字句解析器 (lexer.zig)

const std = @import("std");
const Token = @import("token.zig").Token;
const TokenType = @import("token.zig").TokenType;
const keywordType = @import("token.zig").keywordType;

pub const Lexer = struct {
    source: []const u8,
    position: usize,
    line: usize,
    column: usize,
    allocator: std.mem.Allocator,

    pub fn init(allocator: std.mem.Allocator, source: []const u8) Lexer {
        return .{
            .source = source,
            .position = 0,
            .line = 1,
            .column = 1,
            .allocator = allocator,
        };
    }

    pub fn nextToken(self: *Lexer) !Token {
        self.skipWhitespace();

        if (self.isAtEnd()) {
            return Token.init(.eof, "", self.line, self.column);
        }

        const start_line = self.line;
        const start_column = self.column;
        const ch = self.advance();

        // 数値リテラル
        if (std.ascii.isDigit(ch)) {
            return self.makeNumber(start_line, start_column);
        }

        // 識別子またはキーワード
        if (std.ascii.isAlphabetic(ch) or ch == '_') {
            return self.makeIdentifier(start_line, start_column);
        }

        // 演算子と区切り文字
        const token_type: TokenType = switch (ch) {
            '+' => .plus,
            '-' => .minus,
            '*' => .star,
            '/' => blk: {
                // コメント処理
                if (self.peek() == '/') {
                    self.skipLineComment();
                    return self.nextToken();
                }
                break :blk .slash;
            },
            '=' => if (self.match('=')) .equal else .assign,
            '!' => if (self.match('=')) .not_equal else .illegal,
            '<' => if (self.match('=')) .less_equal else .less_than,
            '>' => if (self.match('=')) .greater_equal else .greater_than,
            ';' => .semicolon,
            ',' => .comma,
            '(' => .lparen,
            ')' => .rparen,
            '{' => .lbrace,
            '}' => .rbrace,
            else => .illegal,
        };

        const lexeme = self.source[self.position - 1 .. self.position];
        return Token.init(token_type, lexeme, start_line, start_column);
    }

    fn makeNumber(self: *Lexer, line: usize, column: usize) Token {
        const start = self.position - 1;
        while (!self.isAtEnd() and std.ascii.isDigit(self.peek())) {
            _ = self.advance();
        }
        const lexeme = self.source[start..self.position];
        return Token.init(.number, lexeme, line, column);
    }

    fn makeIdentifier(self: *Lexer, line: usize, column: usize) Token {
        const start = self.position - 1;
        while (!self.isAtEnd()) {
            const ch = self.peek();
            if (!std.ascii.isAlphanumeric(ch) and ch != '_') break;
            _ = self.advance();
        }
        const lexeme = self.source[start..self.position];

        // キーワードチェック
        const token_type = keywordType(lexeme) orelse .identifier;
        return Token.init(token_type, lexeme, line, column);
    }

    fn skipWhitespace(self: *Lexer) void {
        while (!self.isAtEnd()) {
            const ch = self.peek();
            switch (ch) {
                ' ', '\r', '\t' => _ = self.advance(),
                '\n' => {
                    _ = self.advance();
                    self.line += 1;
                    self.column = 1;
                },
                else => break,
            }
        }
    }

    fn skipLineComment(self: *Lexer) void {
        while (!self.isAtEnd() and self.peek() != '\n') {
            _ = self.advance();
        }
    }

    fn advance(self: *Lexer) u8 {
        const ch = self.source[self.position];
        self.position += 1;
        self.column += 1;
        return ch;
    }

    fn peek(self: *Lexer) u8 {
        if (self.isAtEnd()) return 0;
        return self.source[self.position];
    }

    fn match(self: *Lexer, expected: u8) bool {
        if (self.isAtEnd() or self.peek() != expected) return false;
        _ = self.advance();
        return true;
    }

    fn isAtEnd(self: *Lexer) bool {
        return self.position >= self.source.len;
    }
};

抽象構文木 (ast.zig)

const std = @import("std");

pub const NodeType = enum {
    // 式
    number_literal,
    identifier,
    binary_op,
    unary_op,
    call,

    // 文
    var_decl,
    assignment,
    if_stmt,
    while_stmt,
    return_stmt,
    print_stmt,
    expr_stmt,
    block,

    // 定義
    function,
    program,
};

pub const BinaryOp = enum {
    add,
    subtract,
    multiply,
    divide,
    equal,
    not_equal,
    less_than,
    less_equal,
    greater_than,
    greater_equal,
};

pub const Node = union(NodeType) {
    // 式ノード
    number_literal: i64,
    identifier: []const u8,
    binary_op: struct {
        op: BinaryOp,
        left: *Node,
        right: *Node,
    },
    unary_op: struct {
        op: u8, // '-'など
        operand: *Node,
    },
    call: struct {
        callee: []const u8,
        args: []const *Node,
    },

    // 文ノード
    var_decl: struct {
        name: []const u8,
        initializer: *Node,
    },
    assignment: struct {
        name: []const u8,
        value: *Node,
    },
    if_stmt: struct {
        condition: *Node,
        then_branch: *Node,
        else_branch: ?*Node,
    },
    while_stmt: struct {
        condition: *Node,
        body: *Node,
    },
    return_stmt: struct {
        value: ?*Node,
    },
    print_stmt: struct {
        value: *Node,
    },
    expr_stmt: struct {
        expr: *Node,
    },
    block: struct {
        statements: []const *Node,
    },

    // 定義ノード
    function: struct {
        name: []const u8,
        params: []const []const u8,
        body: *Node,
    },
    program: struct {
        statements: []const *Node,
    },

    pub fn format(self: Node, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
        switch (self) {
            .number_literal => |val| try writer.print("Num({})", .{val}),
            .identifier => |name| try writer.print("Id({})", .{name}),
            .binary_op => |op| try writer.print("BinOp({s})", .{@tagName(op.op)}),
            else => try writer.print("{s}", .{@tagName(self)}),
        }
    }
};

構文解析器 (parser.zig)

const std = @import("std");
const Token = @import("token.zig").Token;
const TokenType = @import("token.zig").TokenType;
const Lexer = @import("lexer.zig").Lexer;
const ast = @import("ast.zig");
const Node = ast.Node;
const BinaryOp = ast.BinaryOp;

pub const Parser = struct {
    lexer: *Lexer,
    current: Token,
    allocator: std.mem.Allocator,

    pub fn init(allocator: std.mem.Allocator, lexer: *Lexer) !Parser {
        const current = try lexer.nextToken();
        return .{
            .lexer = lexer,
            .current = current,
            .allocator = allocator,
        };
    }

    pub fn parse(self: *Parser) !*Node {
        var statements = std.ArrayList(*Node).init(self.allocator);
        defer statements.deinit();

        while (self.current.type != .eof) {
            const stmt = try self.parseStatement();
            try statements.append(stmt);
        }

        const program_node = try self.allocator.create(Node);
        program_node.* = .{ .program = .{ .statements = try statements.toOwnedSlice() } };
        return program_node;
    }

    fn parseStatement(self: *Parser) !*Node {
        return switch (self.current.type) {
            .var_keyword => self.parseVarDecl(),
            .func_keyword => self.parseFuncDecl(),
            .if_keyword => self.parseIfStmt(),
            .while_keyword => self.parseWhileStmt(),
            .return_keyword => self.parseReturnStmt(),
            .print_keyword => self.parsePrintStmt(),
            .lbrace => self.parseBlock(),
            else => self.parseExprStmt(),
        };
    }

    fn parseVarDecl(self: *Parser) !*Node {
        try self.expect(.var_keyword);

        const name = self.current.lexeme;
        try self.expect(.identifier);

        try self.expect(.assign);

        const initializer = try self.parseExpression();

        try self.expect(.semicolon);

        const node = try self.allocator.create(Node);
        node.* = .{ .var_decl = .{ .name = name, .initializer = initializer } };
        return node;
    }

    fn parseFuncDecl(self: *Parser) !*Node {
        try self.expect(.func_keyword);

        const name = self.current.lexeme;
        try self.expect(.identifier);

        try self.expect(.lparen);

        var params = std.ArrayList([]const u8).init(self.allocator);
        defer params.deinit();

        if (self.current.type != .rparen) {
            while (true) {
                const param = self.current.lexeme;
                try self.expect(.identifier);
                try params.append(param);

                if (self.current.type != .comma) break;
                try self.advance();
            }
        }

        try self.expect(.rparen);

        const body = try self.parseBlock();

        const node = try self.allocator.create(Node);
        node.* = .{ .function = .{
            .name = name,
            .params = try params.toOwnedSlice(),
            .body = body,
        } };
        return node;
    }

    fn parseIfStmt(self: *Parser) !*Node {
        try self.expect(.if_keyword);
        try self.expect(.lparen);
        const condition = try self.parseExpression();
        try self.expect(.rparen);

        const then_branch = try self.parseStatement();

        const else_branch: ?*Node = if (self.current.type == .else_keyword) blk: {
            try self.advance();
            break :blk try self.parseStatement();
        } else null;

        const node = try self.allocator.create(Node);
        node.* = .{ .if_stmt = .{
            .condition = condition,
            .then_branch = then_branch,
            .else_branch = else_branch,
        } };
        return node;
    }

    fn parseWhileStmt(self: *Parser) !*Node {
        try self.expect(.while_keyword);
        try self.expect(.lparen);
        const condition = try self.parseExpression();
        try self.expect(.rparen);

        const body = try self.parseStatement();

        const node = try self.allocator.create(Node);
        node.* = .{ .while_stmt = .{ .condition = condition, .body = body } };
        return node;
    }

    fn parseReturnStmt(self: *Parser) !*Node {
        try self.expect(.return_keyword);

        const value: ?*Node = if (self.current.type != .semicolon)
            try self.parseExpression()
        else
            null;

        try self.expect(.semicolon);

        const node = try self.allocator.create(Node);
        node.* = .{ .return_stmt = .{ .value = value } };
        return node;
    }

    fn parsePrintStmt(self: *Parser) !*Node {
        try self.expect(.print_keyword);
        try self.expect(.lparen);
        const value = try self.parseExpression();
        try self.expect(.rparen);
        try self.expect(.semicolon);

        const node = try self.allocator.create(Node);
        node.* = .{ .print_stmt = .{ .value = value } };
        return node;
    }

    fn parseBlock(self: *Parser) !*Node {
        try self.expect(.lbrace);

        var statements = std.ArrayList(*Node).init(self.allocator);
        defer statements.deinit();

        while (self.current.type != .rbrace and self.current.type != .eof) {
            const stmt = try self.parseStatement();
            try statements.append(stmt);
        }

        try self.expect(.rbrace);

        const node = try self.allocator.create(Node);
        node.* = .{ .block = .{ .statements = try statements.toOwnedSlice() } };
        return node;
    }

    fn parseExprStmt(self: *Parser) !*Node {
        const expr = try self.parseExpression();
        try self.expect(.semicolon);

        const node = try self.allocator.create(Node);
        node.* = .{ .expr_stmt = .{ .expr = expr } };
        return node;
    }

    fn parseExpression(self: *Parser) !*Node {
        return self.parseAssignment();
    }

    fn parseAssignment(self: *Parser) !*Node {
        const expr = try self.parseEquality();

        if (self.current.type == .assign) {
            if (expr.* != .identifier) {
                return error.InvalidAssignmentTarget;
            }
            try self.advance();
            const value = try self.parseAssignment();

            const node = try self.allocator.create(Node);
            node.* = .{ .assignment = .{ .name = expr.identifier, .value = value } };
            return node;
        }

        return expr;
    }

    fn parseEquality(self: *Parser) !*Node {
        var expr = try self.parseComparison();

        while (true) {
            const op: ?BinaryOp = switch (self.current.type) {
                .equal => .equal,
                .not_equal => .not_equal,
                else => null,
            };

            if (op == null) break;
            try self.advance();

            const right = try self.parseComparison();
            const node = try self.allocator.create(Node);
            node.* = .{ .binary_op = .{ .op = op.?, .left = expr, .right = right } };
            expr = node;
        }

        return expr;
    }

    fn parseComparison(self: *Parser) !*Node {
        var expr = try self.parseTerm();

        while (true) {
            const op: ?BinaryOp = switch (self.current.type) {
                .less_than => .less_than,
                .less_equal => .less_equal,
                .greater_than => .greater_than,
                .greater_equal => .greater_equal,
                else => null,
            };

            if (op == null) break;
            try self.advance();

            const right = try self.parseTerm();
            const node = try self.allocator.create(Node);
            node.* = .{ .binary_op = .{ .op = op.?, .left = expr, .right = right } };
            expr = node;
        }

        return expr;
    }

    fn parseTerm(self: *Parser) !*Node {
        var expr = try self.parseFactor();

        while (true) {
            const op: ?BinaryOp = switch (self.current.type) {
                .plus => .add,
                .minus => .subtract,
                else => null,
            };

            if (op == null) break;
            try self.advance();

            const right = try self.parseFactor();
            const node = try self.allocator.create(Node);
            node.* = .{ .binary_op = .{ .op = op.?, .left = expr, .right = right } };
            expr = node;
        }

        return expr;
    }

    fn parseFactor(self: *Parser) !*Node {
        var expr = try self.parseUnary();

        while (true) {
            const op: ?BinaryOp = switch (self.current.type) {
                .star => .multiply,
                .slash => .divide,
                else => null,
            };

            if (op == null) break;
            try self.advance();

            const right = try self.parseUnary();
            const node = try self.allocator.create(Node);
            node.* = .{ .binary_op = .{ .op = op.?, .left = expr, .right = right } };
            expr = node;
        }

        return expr;
    }

    fn parseUnary(self: *Parser) !*Node {
        if (self.current.type == .minus) {
            try self.advance();
            const operand = try self.parseUnary();
            const node = try self.allocator.create(Node);
            node.* = .{ .unary_op = .{ .op = '-', .operand = operand } };
            return node;
        }

        return self.parseCall();
    }

    fn parseCall(self: *Parser) !*Node {
        var expr = try self.parsePrimary();

        if (self.current.type == .lparen) {
            if (expr.* != .identifier) {
                return error.InvalidCallTarget;
            }

            try self.advance();

            var args = std.ArrayList(*Node).init(self.allocator);
            defer args.deinit();

            if (self.current.type != .rparen) {
                while (true) {
                    const arg = try self.parseExpression();
                    try args.append(arg);

                    if (self.current.type != .comma) break;
                    try self.advance();
                }
            }

            try self.expect(.rparen);

            const node = try self.allocator.create(Node);
            node.* = .{ .call = .{
                .callee = expr.identifier,
                .args = try args.toOwnedSlice(),
            } };
            return node;
        }

        return expr;
    }

    fn parsePrimary(self: *Parser) !*Node {
        const node = try self.allocator.create(Node);

        switch (self.current.type) {
            .number => {
                const value = try std.fmt.parseInt(i64, self.current.lexeme, 10);
                node.* = .{ .number_literal = value };
                try self.advance();
                return node;
            },
            .identifier => {
                const name = self.current.lexeme;
                node.* = .{ .identifier = name };
                try self.advance();
                return node;
            },
            .lparen => {
                try self.advance();
                const expr = try self.parseExpression();
                try self.expect(.rparen);
                return expr;
            },
            else => return error.UnexpectedToken,
        }
    }

    fn advance(self: *Parser) !void {
        self.current = try self.lexer.nextToken();
    }

    fn expect(self: *Parser, expected: TokenType) !void {
        if (self.current.type != expected) {
            std.debug.print("Expected {s}, got {}\n", .{ @tagName(expected), self.current });
            return error.UnexpectedToken;
        }
        try self.advance();
    }
};

実行環境 (environment.zig)

const std = @import("std");

pub const Value = union(enum) {
    number: i64,
    function: struct {
        params: []const []const u8,
        body: *const @import("ast.zig").Node,
        closure: *Environment,
    },

    pub fn format(self: Value, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
        switch (self) {
            .number => |n| try writer.print("{}", .{n}),
            .function => try writer.print("<function>", .{}),
        }
    }
};

pub const Environment = struct {
    values: std.StringHashMap(Value),
    parent: ?*Environment,
    allocator: std.mem.Allocator,

    pub fn init(allocator: std.mem.Allocator, parent: ?*Environment) Environment {
        return .{
            .values = std.StringHashMap(Value).init(allocator),
            .parent = parent,
            .allocator = allocator,
        };
    }

    pub fn deinit(self: *Environment) void {
        self.values.deinit();
    }

    pub fn define(self: *Environment, name: []const u8, value: Value) !void {
        try self.values.put(name, value);
    }

    pub fn get(self: *Environment, name: []const u8) ?Value {
        if (self.values.get(name)) |value| {
            return value;
        }

        if (self.parent) |parent| {
            return parent.get(name);
        }

        return null;
    }

    pub fn set(self: *Environment, name: []const u8, value: Value) !void {
        if (self.values.contains(name)) {
            try self.values.put(name, value);
            return;
        }

        if (self.parent) |parent| {
            try parent.set(name, value);
            return;
        }

        return error.UndefinedVariable;
    }
};

インタプリタ (evaluator.zig)

const std = @import("std");
const ast = @import("ast.zig");
const Node = ast.Node;
const BinaryOp = ast.BinaryOp;
const Environment = @import("environment.zig").Environment;
const Value = @import("environment.zig").Value;

pub const ReturnValue = struct {
    value: Value,
};

pub const Evaluator = struct {
    allocator: std.mem.Allocator,
    global_env: *Environment,

    pub fn init(allocator: std.mem.Allocator) !Evaluator {
        const env = try allocator.create(Environment);
        env.* = Environment.init(allocator, null);
        return .{
            .allocator = allocator,
            .global_env = env,
        };
    }

    pub fn deinit(self: *Evaluator) void {
        self.global_env.deinit();
        self.allocator.destroy(self.global_env);
    }

    pub fn eval(self: *Evaluator, node: *const Node, env: *Environment) anyerror!Value {
        switch (node.*) {
            .program => |prog| {
                var last: Value = .{ .number = 0 };
                for (prog.statements) |stmt| {
                    last = try self.eval(stmt, env);
                }
                return last;
            },

            .number_literal => |val| {
                return .{ .number = val };
            },

            .identifier => |name| {
                return env.get(name) orelse return error.UndefinedVariable;
            },

            .binary_op => |op| {
                const left = try self.eval(op.left, env);
                const right = try self.eval(op.right, env);

                if (left != .number or right != .number) {
                    return error.TypeMismatch;
                }

                const result: i64 = switch (op.op) {
                    .add => left.number + right.number,
                    .subtract => left.number - right.number,
                    .multiply => left.number * right.number,
                    .divide => @divFloor(left.number, right.number),
                    .equal => @intFromBool(left.number == right.number),
                    .not_equal => @intFromBool(left.number != right.number),
                    .less_than => @intFromBool(left.number < right.number),
                    .less_equal => @intFromBool(left.number <= right.number),
                    .greater_than => @intFromBool(left.number > right.number),
                    .greater_equal => @intFromBool(left.number >= right.number),
                };

                return .{ .number = result };
            },

            .unary_op => |op| {
                const operand = try self.eval(op.operand, env);
                if (operand != .number) {
                    return error.TypeMismatch;
                }
                return .{ .number = -operand.number };
            },

            .var_decl => |decl| {
                const value = try self.eval(decl.initializer, env);
                try env.define(decl.name, value);
                return value;
            },

            .assignment => |assign| {
                const value = try self.eval(assign.value, env);
                try env.set(assign.name, value);
                return value;
            },

            .if_stmt => |if_stmt| {
                const condition = try self.eval(if_stmt.condition, env);
                if (condition != .number) {
                    return error.TypeMismatch;
                }

                if (condition.number != 0) {
                    return try self.eval(if_stmt.then_branch, env);
                } else if (if_stmt.else_branch) |else_branch| {
                    return try self.eval(else_branch, env);
                }

                return .{ .number = 0 };
            },

            .while_stmt => |while_stmt| {
                var last: Value = .{ .number = 0 };
                while (true) {
                    const condition = try self.eval(while_stmt.condition, env);
                    if (condition != .number) {
                        return error.TypeMismatch;
                    }
                    if (condition.number == 0) break;

                    last = try self.eval(while_stmt.body, env);
                }
                return last;
            },

            .function => |func| {
                const closure = try self.allocator.create(Environment);
                closure.* = Environment.init(self.allocator, env);

                const value = Value{
                    .function = .{
                        .params = func.params,
                        .body = func.body,
                        .closure = closure,
                    },
                };

                try env.define(func.name, value);
                return value;
            },

            .call => |call| {
                const callee = env.get(call.callee) orelse return error.UndefinedFunction;

                if (callee != .function) {
                    return error.NotAFunction;
                }

                const func = callee.function;

                if (func.params.len != call.args.len) {
                    return error.ArgumentCountMismatch;
                }

                // 新しい環境を作成(クロージャを親に)
                var call_env = Environment.init(self.allocator, func.closure);
                defer call_env.deinit();

                // 引数を環境にバインド
                for (func.params, call.args) |param, arg| {
                    const arg_value = try self.eval(arg, env);
                    try call_env.define(param, arg_value);
                }

                // 関数本体を実行
                return self.eval(func.body, &call_env) catch |err| {
                    if (err == error.ReturnValue) {
                        // リターン値を取得(簡易実装)
                        return .{ .number = 0 };
                    }
                    return err;
                };
            },

            .return_stmt => |ret| {
                if (ret.value) |val| {
                    _ = try self.eval(val, env);
                }
                return error.ReturnValue;
            },

            .print_stmt => |print| {
                const value = try self.eval(print.value, env);
                std.debug.print("{}\n", .{value});
                return value;
            },

            .expr_stmt => |stmt| {
                return try self.eval(stmt.expr, env);
            },

            .block => |block| {
                var block_env = Environment.init(self.allocator, env);
                defer block_env.deinit();

                var last: Value = .{ .number = 0 };
                for (block.statements) |stmt| {
                    last = try self.eval(stmt, &block_env);
                }
                return last;
            },
        }
    }
};

メインプログラム (main.zig)

const std = @import("std");
const Lexer = @import("lexer.zig").Lexer;
const Parser = @import("parser.zig").Parser;
const Evaluator = @import("evaluator.zig").Evaluator;

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    const args = try std.process.argsAlloc(allocator);
    defer std.process.argsFree(allocator, args);

    if (args.len > 1) {
        // ファイル実行モード
        try runFile(allocator, args[1]);
    } else {
        // REPLモード
        try runRepl(allocator);
    }
}

fn runFile(allocator: std.mem.Allocator, path: []const u8) !void {
    const source = try std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024);
    defer allocator.free(source);

    try run(allocator, source);
}

fn runRepl(allocator: std.mem.Allocator) !void {
    const stdin = std.io.getStdIn().reader();
    const stdout = std.io.getStdOut().writer();

    try stdout.print("MiniLang REPL v0.1\n", .{});
    try stdout.print("Type 'exit' to quit\n\n", .{});

    var evaluator = try Evaluator.init(allocator);
    defer evaluator.deinit();

    var buffer: [1024]u8 = undefined;
    while (true) {
        try stdout.print("> ", .{});

        const line = try stdin.readUntilDelimiter(&buffer, '\n');
        const trimmed = std.mem.trim(u8, line, &std.ascii.whitespace);

        if (trimmed.len == 0) continue;
        if (std.mem.eql(u8, trimmed, "exit")) break;

        var lexer = Lexer.init(allocator, trimmed);
        var parser = try Parser.init(allocator, &lexer);
        const ast_node = parser.parse() catch |err| {
            try stdout.print("Parse error: {}\n", .{err});
            continue;
        };

        const result = evaluator.eval(ast_node, evaluator.global_env) catch |err| {
            try stdout.print("Runtime error: {}\n", .{err});
            continue;
        };

        try stdout.print("{}\n", .{result});
    }
}

fn run(allocator: std.mem.Allocator, source: []const u8) !void {
    var lexer = Lexer.init(allocator, source);
    var parser = try Parser.init(allocator, &lexer);
    const ast_node = try parser.parse();

    var evaluator = try Evaluator.init(allocator);
    defer evaluator.deinit();

    _ = try evaluator.eval(ast_node, evaluator.global_env);
}

サンプルプログラム (examples/fibonacci.ml)

// フィボナッチ数列

func fibonacci(n) {
    if (n <= 1) {
        return n;
    }
    return fibonacci(n - 1) + fibonacci(n - 2);
}

var i = 0;
while (i < 15) {
    print(fibonacci(i));
    i = i + 1;
}

---

解説

実装のポイント

1. 字句解析(Lexical Analysis)

字句解析器は、ソースコードを意味のある「トークン」に分割します。

"var x = 42;" → [var_keyword, identifier("x"), assign, number("42"), semicolon]

実装のポイント:

  • 状態遷移でトークンを認識
  • 先読み(peek)で2文字演算子(==, !=等)を判定
  • 行番号・列番号を記録してエラーメッセージを改善

2. 構文解析(Syntactic Analysis)

パーサーは、トークン列を抽象構文木(AST)に変換します。

再帰下降パーサー:

  • 各文法規則を関数として実装
  • 演算子の優先順位は関数の呼び出し階層で表現

parseExpression()
  └─ parseAssignment()
       └─ parseEquality()
            └─ parseComparison()
                 └─ parseTerm()  // + -
                      └─ parseFactor()  // * /
                           └─ parseUnary()  // -
                                └─ parseCall()
                                     └─ parsePrimary()  // リテラル、識別子

優先順位:

低 ← 代入 < 等価 < 比較 < 加減 < 乗除 < 単項 < 関数呼び出し < 一次式 → 高

3. 抽象構文木(AST)

ASTは、プログラムの構造を木構造で表現します。

// x + y * 2
BinaryOp(.add) {
    left: Identifier("x"),
    right: BinaryOp(.multiply) {
        left: Identifier("y"),
        right: NumberLiteral(2),
    }
}

Zigのunion(enum)を使うことで、異なる種類のノードを統一的に扱えます。

4. インタプリタ(実行)

ASTを走査して実際に計算を行います。

環境(Environment):

  • 変数スコープを管理
  • 親環境へのリンクでレキシカルスコープを実現

global_env
  └─ function_env (fibonacci)
       └─ call_env (n=5)
            └─ call_env (n=4)
                 └─ ...

クロージャ: 関数は定義時の環境を保持します。

.function = .{
    .params = ["n"],
    .body = ...,
    .closure = env,  // 定義時の環境
}

設計判断

なぜ再帰下降パーサーか?

  • 理解しやすい(文法規則と関数が1対1対応)
  • 手書きで実装可能
  • エラーリカバリが容易

代替: パーサージェネレータ(yacc/bison)は強力だが、学習には再帰下降が最適。

なぜインタプリタか?

  • コンパイラより実装が簡単
  • デバッグが容易
  • 動的な機能(REPL)を実装しやすい

代替: バイトコードコンパイラ、JITコンパイラ

エラーハンドリング: Zigのエラーユニオンを活用:

pub fn parse() !*Node { ... }

行番号・列番号を記録して、分かりやすいエラーメッセージを提供。

代替案

言語機能の拡張:

  • 配列・辞書
  • オブジェクト指向
  • 型システム
  • モジュールシステム

バックエンドの変更:

  • バイトコードコンパイラ + VM
  • LLVMバックエンド(ネイティブコード生成)
  • JavaScriptへのトランスパイル
  • ---

    学習の意図

    習得概念

  • コンパイラ理論の基礎
- 字句解析、構文解析、意味解析、実行 - トークン、AST、環境

  • パーサー設計
- 再帰下降パーサー - 演算子優先順位 - エラーリカバリ

  • スコープとクロージャ
- レキシカルスコープ - 関数クロージャ - 環境チェーン

  • Zigの高度な機能
- union(enum)による多様型 - エラーハンドリング - メモリ管理(アロケータ)

CS基礎との関連

形式言語理論:

  • 文法(BNF記法)
  • 構文解析アルゴリズム
  • 言語階層(正規言語、文脈自由言語)

データ構造:

  • 木構造(AST)
  • ハッシュマップ(環境)
  • スタック(関数呼び出し)

アルゴリズム:

  • 再帰(パーサー、評価器)
  • 木の走査(深さ優先探索)

---

テスト方法

環境構築

# プロジェクトディレクトリで
zig build

基本テスト

1. REPL テスト

zig build run

> var x = 10;
10
> var y = 20;
20
> x + y
30
> print(x * y);
200

2. ファイル実行テスト

# examples/fibonacci.ml を実行
zig build run -- examples/fibonacci.ml

期待される出力:

0
1
1
2
3
5
8
13
21
34
55
89
144
233
377

単体テスト

// test/lexer_test.zig
const std = @import("std");
const Lexer = @import("lexer.zig").Lexer;
const TokenType = @import("token.zig").TokenType;

test "lexer: numbers" {
    var lexer = Lexer.init(std.testing.allocator, "123 456");
    const t1 = try lexer.nextToken();
    try std.testing.expectEqual(TokenType.number, t1.type);
    try std.testing.expectEqualStrings("123", t1.lexeme);

    const t2 = try lexer.nextToken();
    try std.testing.expectEqual(TokenType.number, t2.type);
    try std.testing.expectEqualStrings("456", t2.lexeme);
}

test "lexer: keywords" {
    var lexer = Lexer.init(std.testing.allocator, "var func if");
    try std.testing.expectEqual(TokenType.var_keyword, (try lexer.nextToken()).type);
    try std.testing.expectEqual(TokenType.func_keyword, (try lexer.nextToken()).type);
    try std.testing.expectEqual(TokenType.if_keyword, (try lexer.nextToken()).type);
}

---

評価基準

基本要件(70%)

  • [ ] 字句解析器 (20%)
- 全てのトークンタイプを正しく認識 - キーワードと識別子を区別 - 行番号・列番号を記録

  • [ ] 構文解析器 (25%)
- 全ての文法要素をパース - ASTを正しく構築 - エラー検出とメッセージ

  • [ ] インタプリタ (25%)
- 四則演算、比較演算 - 条件分岐、ループ - 関数定義と呼び出し

発展要件(30%)

  • [ ] 言語機能の拡張 (15%)
- 配列・文字列 - より複雑な型システム - エラーハンドリング構文

  • [ ] 最適化 (10%)
- 定数畳み込み - テールコール最適化 - インライン展開

  • [ ] デバッグ機能 (5%)
- スタックトレース - ブレークポイント - 変数インスペクション

コード品質

  • 可読性: パーサーの構造が明確か
  • 拡張性: 新しい機能を追加しやすいか
  • エラーメッセージ: ユーザーフレンドリーか

---

参考資料

コンパイラ理論

  • "Crafting Interpreters" by Robert Nystrom
  • "Compilers: Principles, Techniques, and Tools" (Dragon Book)

Zig言語

---

この課題を通じて、プログラミング言語がどのように動作するかを深く理解し、コンパイラ理論を実践的に習得できます。自分だけの言語を作る楽しさを体験してください!