社区讨论

90分WA#1:答案402,输出514求助

P11738[集训队互测 2015] 未来程序·改参与者 2已保存回复 1

讨论操作

快速查看讨论及其快照的属性,并进行相关操作。

当前回复
1 条
当前快照
1 份
快照标识符
@mjdz3psx
此快照首次捕获于
2025/12/20 15:24
2 个月前
此快照最后确认于
2025/12/22 15:35
2 个月前
查看原帖
CPP
#ifdef LOCAL
#include "all.hh"
#else
#include <bits/stdc++.h>
#define debug(...) (void)('y' + 'u' + 'z' + 'u')
#define debug_if(...) (void)('y' + 'u' + 'z' + 'u')
#undef assert
#define assert(expr) (void)(expr)
#define FOR(i, a, b) for (ui i = a; i <= (b); ++i)
#define REP(i, a, b) for (ui i = a; i < (b); ++i)
#define RFOR(i, a, b) for (ui i = (a) + 1; i-- > (b);)
#define RREP(i, a, b) for (ui i = a; i > (b); --i)
using uc = unsigned char;
using si = int32_t;
using ui = uint32_t;
using u64 = uint64_t;
using i64 = int64_t;
using fp = double;
template <typename T> using nlim = std::numeric_limits<T>;
template <typename T, typename U> inline bool ckmin(T& a, const U& b) {
    return b < a && (a = b, true);
}
template <typename T, typename U> inline bool ckmax(T& a, const U& b) {
    return a < b && (a = b, true);
}
#endif // LOCAL
template <typename F> struct LambdaFn {
    F f;
    LambdaFn(F&& _f) : f(std::forward<F>(_f)) {
    }
    template <typename... A> auto operator()(A&&... a) {
        return f(*this, std::forward<A>(a)...);
    }
};
template <typename T> auto lambda(T&& f) {
    return LambdaFn<std::decay_t<T>>(std::forward<T>(f));
}
using namespace std;
using namespace literals;

#define LOCAL_FILE_IO

template <typename T> void operator+=(vector<T>& a, const vector<T>& b) {
    a.insert(a.end(), b.begin(), b.end());
}

enum struct TokenType : uc { reserved, name, symbol, number };
struct Token {
    TokenType t;
    union ValueType {
        int i;
        const char* s;
        char ch[2];
    } v;
};
enum struct RegType : uc {
    eax,
    ebx,
    rsp,
    rbp,
    lit, // 字面量
    none,
    par, // 为了简便,专门存放调用函数时的参数
};
// 注意指令规则不一定和实际的汇编指令规则相同
enum struct CmdType : uc {
    lea,  // b = a + ln
    mov,  // b = a
    push, // rbp -= 1, *rbp = a
    pop,  // a = *rbp, rbp += 1
    call,
    lid,    // 标记段,编号为 an
    jmp,    // 无条件跳转到 an。如果 bn=1,表示这是由 return 语句跳转的
    jmp_if, // if (eax) jmp an
    ret,
    add,  // b += a
    sub,  // b -= a
    mul,  // b *= a
    div,  // b /= a
    mod,  // b %= a
    cmpg, // b = (b > a)
    cmpl,
    cmpge,
    cmple,
    cmpe,
    cmpne,
    xor_,   // b = (a != 0) ^ (b != 0)
    and_,   // b = (a != 0) & (b != 0)
    or_,    // b = (a != 0) | (b != 0)
    not_,   // a = (a == 0)
    neg,    // a = -a
    read,   // cin >> eax
    writei, // cout << eax
    writec, // putchar(eax)
};
struct Command {
    CmdType t;
    RegType a, b;
    int an, bn;
};
// 输入部分
vector<int> input;
void init_input() {
    ui n;
    cin >> n;
    input.resize(n);
    REP(i, 0, n) {
        cin >> input[i];
    }
    reverse(input.begin(), input.end());
}
int read_int() {
    int res = input.back();
    input.pop_back();
    return res;
}

// 将程序转换为 token
vector<Token> token_program;
namespace part1 {
    set<string> names; // 用于保存所有名称
    bool is_reserved(const char* s) {
#define f(t)                                                                   \
    if (strcmp(s, #t) == 0)                                                    \
        return true;

        f(int)
        f(for)
        f(if)
        f(while)
        f(else)
        f(cin)
        f(cout)
        f(putchar)
        f(endl)
        f(return)
        return false;
    }
    void read_tokens() {
        cin.ignore(1000, ';');
        char c;
        Token::ValueType tmp_nv;
        while (cin.get(c)) {
            if (isspace(c)) {
                continue;
            }
            if (isalpha(c) || c == '_') {
                string s;
                s += c;
                while (isalpha(cin.peek()) || isdigit(cin.peek()) ||
                       cin.peek() == '_') {
                    s += char(cin.get());
                }
                auto it = names.insert(move(s)).first;
                tmp_nv.s = it->data();
                token_program.push_back({is_reserved(tmp_nv.s)
                                             ? TokenType::reserved
                                             : TokenType::name,
                                         tmp_nv});
            } else if (isdigit(c)) {
                int n = c - '0';
                while (isdigit(cin.peek())) {
                    n = n * 10 + (cin.get() - '0');
                }
                tmp_nv.i = n;
                token_program.push_back({TokenType::number, tmp_nv});
            } else {
                tmp_nv.ch[0] = c;
                tmp_nv.ch[1] = ' ';
                if (char nex = char(cin.peek());
                    ((c == '<' || c == '>' || c == '=' || c == '!') &&
                     nex == '=') ||
                    ((c == '&' || c == '|' || c == '<' || c == '>') &&
                     nex == c)) {
                    tmp_nv.ch[1] = char(cin.get());
                }
                token_program.push_back({TokenType::symbol, tmp_nv});
            }
        }
    }
} // namespace part1

// 将 tokens 转换为汇编
vector<Command> ass_program;
int main_label; // 主函数入口

namespace part2 {
    struct PriorityManager {
        ui priority[128 * 128];
        void set(char c, char d, ui p) {
            priority[ui(c * 128 + d)] = p;
        }
        PriorityManager() {
            set('(', ' ', 0);
            set(')', ' ', 0);
            set('[', ' ', 0);
            set(']', ' ', 0);
            set('a', 't', 0);
            set('!', '1', 1);
            set('+', '1', 1);
            set('-', '1', 1);
            set('*', ' ', 2);
            set('/', ' ', 2);
            set('%', ' ', 2);
            set('+', ' ', 3);
            set('-', ' ', 3);
            set('<', '=', 4);
            set('>', '=', 4);
            set('<', ' ', 4);
            set('<', ' ', 4);
            set('=', '=', 5);
            set('!', '=', 6);
            set('^', ' ', 7);
            set('&', '&', 8);
            set('|', '|', 9);
            set('=', ' ', 10);
            set('<', '<', 11);
            set('>', '>', 11);
        }
        ui get(const char* s) {
            return priority[ui(s[0] * 128 + s[1])];
        }
    } priman;

    union NodeValue {
        const Token* vv; // value
        const char* sv;  // symbol value
    };
    NodeValue tmp_nv;
    struct ExprTreeNode {
        ui ls, rs;
        bool is_leaf; // 叶子节点是值,非叶子节点是运算符
        NodeValue v;
    };
    vector<ExprTreeNode> expr_tree;
    ui new_node(bool is_leaf) {
        ui u = ui(expr_tree.size());
        expr_tree.push_back({UINT_MAX, UINT_MAX, is_leaf, tmp_nv});
        return u;
    }
    /**
     * 查找匹配的括号
     */
    ui find_match(span<Token> s, ui i) {
        assert(s[i].t == TokenType::symbol);
        char open = s[i].v.ch[0];
        assert(open == '(' || open == '[' || open == '{');
        char close = (open == '(' ? ')' : (open == '[' ? ']' : '}'));
        ui deep = 1;
        while (deep) {
            ++i;
            if (s[i].t == TokenType::symbol && s[i].v.ch[0] == open) {
                ++deep;
            } else if (s[i].t == TokenType::symbol && s[i].v.ch[0] == close) {
                --deep;
            }
        }
        return i;
    }
    bool is_l_bracket(char c) {
        return c == '(' || c == '[' || c == '{';
    }
    bool is_r_bracket(char c) {
        return c == ')' || c == ']' || c == '}';
    }
    // 查找分号
    ui find_semicolon(span<Token> s, ui i) {
        ui deep = 0;
        while (deep || s[i].t != TokenType::symbol || s[i].v.ch[0] != ';') {
            if (s[i].t == TokenType::symbol && is_l_bracket(s[i].v.ch[0])) {
                ++deep;
            } else if (s[i].t == TokenType::symbol &&
                       is_r_bracket(s[i].v.ch[0])) {
                --deep;
            }
            ++i;
        }
        return i;
    }
    // 查找逗号
    ui find_comma(span<Token> s, ui i) {
        ui deep = 0;
        while (deep || s[i].t != TokenType::symbol || s[i].v.ch[0] != ',') {
            if (s[i].t == TokenType::symbol && is_l_bracket(s[i].v.ch[0])) {
                ++deep;
            } else if (s[i].t == TokenType::symbol &&
                       is_r_bracket(s[i].v.ch[0])) {
                --deep;
            }
            ++i;
        }
        return i;
    }

    ui build_call_tree(span<Token> s);
    ui build_expr_tree(span<Token> s);
    /**
     * 找到从当前位置开始的一个表达式(单个值,或者整个括号,或者函数调用,可能带一元运算符)
     * 返回 [树根,下一个起始位置]
     */
    pair<ui, ui> build_next_expr(span<Token> s, ui i) {
        if (s[i].t == TokenType::name || s[i].t == TokenType::number) {
            if (i + 1 < s.size() && s[i + 1].v.ch[0] == '(') {
                ui j = find_match(s, i + 1);
                return {build_call_tree(s.subspan(i, j - i + 1)), j + 1};
            }
            return {build_expr_tree(s.subspan(i, 1)), i + 1};
        } else {
            assert(s[i].t == TokenType::symbol);
            if (s[i].v.ch[0] == '!' || s[i].v.ch[0] == '+' ||
                s[i].v.ch[0] == '-') {
                // 一元运算符,直接递归处理即可
                auto [v, j] = build_next_expr(s, i + 1);
                tmp_nv.sv = s[i].v.ch;
                ui u = new_node(false);
                expr_tree[u].ls = v;
                return {u, j};
            } else {
                assert(s[i].v.ch[0] == '(' || s[i].v.ch[0] == '[');
                ui j = find_match(s, i);
                return {build_expr_tree(s.subspan(i + 1, j - i - 1)), j + 1};
            }
        }
    };

    unordered_map<const char*, pair<int, int>>
        function_info; // [起点,参数个数]

    ui build_call_tree(span<Token> s) {
        assert(s[0].t == TokenType::name);
        const char* name = s[0].v.s;
        int param_cnt = function_info[name].second;
        ui i = 2;
        ui r_bracket = find_match(s, 1);
        tmp_nv.vv = &s[0];
        ui rt = new_node(true), u = rt;
        for (int pci = 0; pci < param_cnt; ++pci) {
            ui j = (pci == param_cnt - 1 ? r_bracket : find_comma(s, i));
            ui v = build_expr_tree(s.subspan(i, j - i));
            tmp_nv.sv = "ca";
            expr_tree[u].rs = new_node(false);
            u = expr_tree[u].rs;
            expr_tree[u].ls = v;
            i = j + 1;
        }
        return rt;
    }
    /**
     * 建表达式树,返回树根
     */
    ui build_expr_tree(span<Token> s) {
        if (s.size() == 1) {
            tmp_nv.vv = &s[0];
            return new_node(true);
        }
        stack<ui, vector<ui>> s1;
        stack<const char*, vector<const char*>> s2;
        /**
         * 判断是否是一元运算
         */
        auto is_unary = [&](ui i) {
            assert(s[i].t == TokenType::symbol);
            if (s[i].v.ch[0] == '!' && s[i].v.ch[1] == ' ') {
                return true;
            }
            if (s[i].v.ch[0] != '+' && s[i].v.ch[0] != '-') {
                return false;
            }
            if (i == 0) {
                return true;
            }
            if (s[i - 1].t == TokenType::name ||
                s[i - 1].t == TokenType::number) {
                return false;
            }
            assert(s[i - 1].t == TokenType::symbol);
            if (s[i - 1].v.ch[0] == ')' || s[i - 1].v.ch[0] == ']') {
                return false;
            } else {
                return true;
            }
        };
        /**
         * 将最后一个表达式合并
         */
        auto pop = [&]() {
            tmp_nv.sv = s2.top();
            s2.pop();
            ui u = new_node(false);
            expr_tree[u].rs = s1.top();
            s1.pop();
            expr_tree[u].ls = s1.top();
            s1.pop();
            s1.push(u);
        };
        /**
         * 加入一个运算符,并且把之前运算优先级更高的 pop 掉
         */
        auto push = [&](const char* op) {
            if (op[0] == '=' && op[1] == ' ') {
                // 赋值运算特殊处理
                s2.push(op);
            } else {
                while (!s2.empty() && priman.get(op) >= priman.get(s2.top())) {
                    pop();
                }
                s2.push(op);
            }
        };
        // for (Token t : s) {
        //     print_token(t);
        // }
        for (ui i = 0; i < s.size();) {
            if (s[i].t == TokenType::name || s[i].t == TokenType::number) {
                if (i + 1 < s.size() && s[i].t == TokenType::name &&
                    s[i + 1].t == TokenType::symbol &&
                    s[i + 1].v.ch[0] == '(') {
                    // 调用函数
                    // 此时把表达式树建成右链,非叶子节点的左子树表示当前参数,右边为后面参数。这样在解析时只需循环即可。
                    ui j = find_match(s, i + 1);
                    s1.push(build_call_tree(s.subspan(i, j - i + 1)));
                    i = j + 1;
                } else {
                    tmp_nv.vv = &s[i];
                    s1.push(new_node(true));
                    ++i;
                }
            } else {
                if (s[i].t == TokenType::reserved &&
                    !strcmp(s[i].v.s, "putchar")) {
                    auto [v, j] = build_next_expr(s, i + 1);
                    tmp_nv.sv = "pc";
                    ui u = new_node(false);
                    expr_tree[u].ls = v;
                    s1.push(u);
                    i = j;
                } else {
                    assert(s[i].t == TokenType::symbol);
                    if (s[i].v.ch[0] == '(' || s[i].v.ch[0] == '[') {
                        auto [v, j] = build_next_expr(s, i);
                        s1.push(v);
                        if (s[i].v.ch[0] == '[') {
                            s2.push("at");
                            pop();
                        }
                        i = j;
                    } else if (is_unary(i)) {
                        // 作为表达式处理
                        auto [v, j] = build_next_expr(s, i);
                        s1.push(v);
                        i = j;
                    } else {
                        // 普通二元运算
                        push(s[i].v.ch);
                        ++i;
                    }
                }
            }
        }
        assert(s1.size() == s2.size() + 1);
        while (!s2.empty()) {
            pop();
        }
        assert(s1.size() == 1);
        return s1.top();
    }
    // var[i] = j: 变量名 i 是相对 rbp 第 j.back() 个位置
    unordered_map<const char*, vector<pair<bool, int>>> var;
    // arr[i].back() = [[is_static, x], y], x 为位置,y 为维度列表
    unordered_map<const char*, vector<pair<pair<bool, int>, vector<int>>>> arr;
    // 为了防止变量和数组重名,记录每个名字对应变量还是数组
    // var_type[name].back() : 0表示变量,1表示数组,2表示函数
    unordered_map<const char*, vector<uc>> var_type;
    bool last_is_ref;
    vector<span<const int>> last_type;
    /**
     * 解析表达式树
     */
    vector<Command> parse_expr_tree(ui u) {
        if (expr_tree[u].is_leaf) {
            const Token* t = expr_tree[u].v.vv;
            assert(t->t == TokenType::name || t->t == TokenType::number);
            if (t->t == TokenType::name) {
                if (var_type[t->v.s].empty()) {
                    // debug("var_type[{}] is empty!\n", t->v.s);
                    assert(false);
                }
                if (uc type = var_type[t->v.s].back(); type == 0) {
                    last_is_ref = true;
                    auto& [is_static, p] = var[t->v.s].back();
                    Command cmd;
                    if (is_static) {
                        cmd = {CmdType::lea, RegType::lit, RegType::eax, p, 0};
                    } else {
                        cmd = {CmdType::lea, RegType::rbp, RegType::eax, p, 0};
                    }
                    return {1, cmd};
                } else if (type == 1) {
                    last_is_ref = true;
                    auto& [tmp, d] = arr[t->v.s].back();
                    auto [is_static, p] = tmp;
                    last_type.push_back(d);
                    Command cmd;
                    if (is_static) {
                        cmd = {CmdType::lea, RegType::lit, RegType::eax, p, 0};
                    } else {
                        cmd = {CmdType::lea, RegType::rbp, RegType::eax, p, 0};
                    }
                    return {1, cmd};
                } else if (type == 2) {
                    vector<Command> res;
                    auto [label, param_cnt] = function_info[t->v.s];
                    for (int i = 0; i < param_cnt; ++i) {
                        u = expr_tree[u].rs;
                        res += parse_expr_tree(expr_tree[u].ls);
                        res.push_back({CmdType::mov, RegType::eax, RegType::par,
                                       int(last_is_ref), i});
                    }
                    res.push_back({CmdType::call, RegType::none, RegType::none,
                                   label, 0});
                    last_is_ref = false;
                    return res;
                } else {
                    assert(false);
                    return {};
                }
            } else {
                //   debug("last_is_lef=0 lit={}\n", t->v.i);
                last_is_ref = false;
                Command cmd{CmdType::mov, RegType::lit, RegType::eax, t->v.i,
                            0};
                return {1, cmd};
            }
        } else if (expr_tree[u].rs == UINT_MAX) {
            // 一元运算符
            vector<Command> res = parse_expr_tree(expr_tree[u].ls);
            if (last_is_ref) {
                res.push_back({CmdType::mov, RegType::eax, RegType::eax, 1, 0});
                last_is_ref = false;
            }
            switch (expr_tree[u].v.sv[0]) {
            case '+':
                break;
            case '-':
                res.push_back(
                    {CmdType::neg, RegType::eax, RegType::none, 0, 0});
                break;
            case '!':
                res.push_back(
                    {CmdType::not_, RegType::eax, RegType::none, 0, 0});
                break;
            case 'p':
                res.push_back(
                    {CmdType::writec, RegType::eax, RegType::none, 0, 0});
                break;
            default:
                assert(false);
            }
            return res;
        } else {
            const char* oper = expr_tree[u].v.sv;
            vector<Command> res;
            if (oper[0] == '=' && oper[1] == ' ') {
                res = parse_expr_tree(expr_tree[u].ls);
                assert(last_is_ref);
                res.push_back(
                    {CmdType::push, RegType::eax, RegType::none, 0, 0});
                res += parse_expr_tree(expr_tree[u].rs);
                if (last_is_ref) {
                    res.push_back(
                        {CmdType::mov, RegType::eax, RegType::eax, 1, 0});
                }
                res.push_back({CmdType::mov, RegType::eax, RegType::ebx, 0, 0});
                res.push_back(
                    {CmdType::pop, RegType::eax, RegType::none, 0, 0});
                res.push_back({CmdType::mov, RegType::ebx, RegType::eax, 0, 1});
                last_is_ref = true;
                return res;
            }
            res = parse_expr_tree(expr_tree[u].ls);
            bool l_is_ref = last_is_ref;
            res.push_back({CmdType::push, RegType::eax, RegType::none, 0, 0});
            res += parse_expr_tree(expr_tree[u].rs);
            if (last_is_ref) {
                res.push_back({CmdType::mov, RegType::eax, RegType::eax, 1, 0});
            }
            res.push_back({CmdType::mov, RegType::eax, RegType::ebx, 0, 0});
            res.push_back({CmdType::pop, RegType::eax, RegType::none, 0, 0});
            if (oper[0] == 'a' && oper[1] == 't') {
                assert(l_is_ref);
                res.push_back({CmdType::mul, RegType::lit, RegType::ebx,
                               last_type.back()[1], 0});
                res.push_back({CmdType::add, RegType::ebx, RegType::eax, 0, 0});
                last_type.back() = last_type.back().subspan(1); // pop_front,得到下一维大小
                if (last_type.back().size() == 1) {
                    last_type.pop_back();
                }
                last_is_ref = true;
            } else {
                if (l_is_ref) {
                    // 将左侧地址变成值
                    res.push_back(
                        {CmdType::mov, RegType::eax, RegType::eax, 1, 0});
                }
                if (oper[0] == '*') {
                    res.push_back(
                        {CmdType::mul, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '/') {
                    res.push_back(
                        {CmdType::div, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '%') {
                    res.push_back(
                        {CmdType::mod, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '+') {
                    res.push_back(
                        {CmdType::add, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '-') {
                    res.push_back(
                        {CmdType::sub, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '<' && oper[1] == '=') {
                    res.push_back(
                        {CmdType::cmple, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '>' && oper[1] == '=') {
                    res.push_back(
                        {CmdType::cmpge, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '<' && oper[1] == ' ') {
                    res.push_back(
                        {CmdType::cmpl, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '>' && oper[1] == ' ') {
                    res.push_back(
                        {CmdType::cmpg, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '=' && oper[1] == '=') {
                    res.push_back(
                        {CmdType::cmpe, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '!' && oper[1] == '=') {
                    res.push_back(
                        {CmdType::cmpne, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '^') {
                    res.push_back(
                        {CmdType::xor_, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '&' && oper[1] == '&') {
                    res.push_back(
                        {CmdType::and_, RegType::ebx, RegType::eax, 0, 0});
                } else if (oper[0] == '|' && oper[1] == '|') {
                    res.push_back(
                        {CmdType::or_, RegType::ebx, RegType::eax, 0, 0});
                } else {
                    assert(false);
                }
                last_is_ref = false;
            }
            return res;
        }
    }
    int function_end_label;
    bool is_if_for_while(const char* s) {
        return !strcmp(s, "if") || !strcmp(s, "for") || !strcmp(s, "while");
    }
    /**
     * 为 if,for,while 找到结束位置
     * s[i] 为控制语句的右括号的下一个
     */
    ui find_block_end(span<Token> s, ui i) {
        while (true) {
            if (s[i].t == TokenType::symbol && s[i].v.ch[0] == '{') {
                return find_match(s, i);
            }
            if (s[i].t == TokenType::reserved && is_if_for_while(s[i].v.s)) {
                ui r_bracket = find_match(s, i + 1);
                return find_block_end(s, r_bracket + 1);
            } else {
                return find_semicolon(s, i);
            }
        }
    }
    /**
     * 解析表达式
     * 表达式不包括分号
     * 结果存在 eax
     * @todo all
     */
    vector<Command> parse_expr(span<Token> s) {
        if (s[0].t == TokenType::reserved && !strcmp(s[0].v.s, "cin")) {
            vector<Command> r;
            ui i = 1;
            while (i < s.size()) {
                assert(s[i].t == TokenType::symbol && s[i].v.ch[0] == '>' &&
                       s[i].v.ch[1] == '>');
                ++i;
                assert(i < s.size());
                ui j = i;
                // 找到下一个右移,或者结尾
                while (j < s.size() &&
                       (s[j].t != TokenType::symbol || s[j].v.ch[0] != '>' ||
                        s[j].v.ch[1] != '>')) {
                    ++j;
                }
                r += parse_expr(s.subspan(i, j - i));
                assert(last_is_ref);
                r.push_back(
                    {CmdType::read, RegType::none, RegType::none, 0, 0});
                i = j;
            }
            return r;
        }
        if (s[0].t == TokenType::reserved && !strcmp(s[0].v.s, "cout")) {
            vector<Command> r;
            ui i = 1;
            while (i < s.size()) {
                assert(s[i].t == TokenType::symbol && s[i].v.ch[0] == '<' &&
                       s[i].v.ch[1] == '<');
                ++i;
                assert(i < s.size());
                ui j = i;
                // 找到下一个左移,或者结尾
                while (j < s.size() &&
                       (s[j].t != TokenType::symbol || s[j].v.ch[0] != '<' ||
                        s[j].v.ch[1] != '<')) {
                    ++j;
                }
                if (j - i == 1 && s[i].t == TokenType::reserved &&
                    !strcmp(s[i].v.s, "endl")) {
                    r.push_back({CmdType::mov, RegType::lit, RegType::eax,
                                 int('\n'), 0});
                    r.push_back(
                        {CmdType::writec, RegType::none, RegType::none, 0, 0});
                } else {
                    r += parse_expr(s.subspan(i, j - i));
                    if (last_is_ref) {
                        r.push_back(
                            {CmdType::mov, RegType::eax, RegType::eax, 1, 0});
                    }
                    r.push_back(
                        {CmdType::writei, RegType::none, RegType::none, 0, 0});
                }
                i = j;
            }
            return r;
        }
        if (s[0].t == TokenType::reserved && !strcmp(s[0].v.s, "return")) {
            // 应该无需在此处展开栈
            vector<Command> r = parse_expr(s.subspan(1));
            if (last_is_ref) {
                r.push_back({CmdType::mov, RegType::eax, RegType::eax, 1, 0});
            }
            r.push_back({CmdType::jmp, RegType::none, RegType::none,
                         function_end_label, 1});
            return r;
        }
        // 建出表达式树
        expr_tree.clear();
        ui u = build_expr_tree(s);
        // 解析表达式树
        return parse_expr_tree(u);
    }

    int label_cnt, static_memory;

    namespace func {
        int local_memory; // 局部变量个数
        /**
         * 解析多个表达式
         */
        vector<Command> parse_many_expr(span<Token> s) {
            vector<Command> r;
            vector<const char*> local_vars; // 局部变量列表
            ui i = 0, j;
            while (i < s.size()) {
                // 直接的花括号
                if (s[i].t == TokenType::symbol && s[i].v.ch[0] == '{') {
                    j = find_match(s, i);
                    ++i;
                    r += parse_many_expr(s.subspan(i, j - i));
                    i = j + 1;
                    continue;
                }
                // 定义变量
                if (s[i].t == TokenType::reserved && !strcmp(s[i].v.s, "int")) {
                    ++i;
                    while (s[i].t != TokenType::symbol || s[i].v.ch[0] != ';') {
                        if (s[i].t == TokenType::symbol) {
                            assert(s[i].v.ch[0] == ',');
                            ++i;
                        }
                        assert(s[i].t == TokenType::name);
                        const char* name = s[i].v.s;
                        local_vars.push_back(name);
                        ++i;
                        assert(s[i].t == TokenType::symbol);
                        if (s[i].v.ch[0] == '[') {
                            // 定义数组
                            vector<int> d_sizes;
                            while (s[i].v.ch[0] != ',' && s[i].v.ch[0] != ';') {
                                ++i;
                                assert(s[i].t == TokenType::number);
                                d_sizes.push_back(s[i].v.i);
                                ++i;
                                assert(s[i].t == TokenType::symbol &&
                                       s[i].v.ch[0] == ']');
                                ++i;
                            }
                            d_sizes.push_back(1);
                            // 数组的维度大小做后缀积
                            for (int d = int(d_sizes.size() - 2); d >= 0; --d) {
                                d_sizes[d] *= d_sizes[d + 1];
                            }
                            // 计算相对地址
                            var_type[name].push_back(1);
                            local_memory += d_sizes[0];
                            arr[name].emplace_back(
                                make_pair(false, -local_memory),
                                std::move(d_sizes));
                        } else {
                            var_type[name].push_back(0);
                            ++local_memory;
                            var[name].emplace_back(false, -local_memory);
                        }
                    }
                    ++i;
                    continue;
                }
                /**
                 * if 语句
                 * 如果判断成立继续执行,最后跳到 end_label
                 * 如果判断不成立,跳到 else_label,后面是 end_label
                 */
                if (s[i].t == TokenType::reserved && !strcmp(s[i].v.s, "if")) {
                    ++i;
                    j = find_match(s, i);
                    ++i;
                    r += parse_expr(s.subspan(i, j - i));
                    if (last_is_ref) {
                        r.push_back(
                            {CmdType::mov, RegType::eax, RegType::eax, 1, 0});
                    }
                    r.push_back(
                        {CmdType::not_, RegType::eax, RegType::none, 0, 0});
                    i = j + 1;
                    int else_label = ++label_cnt;
                    int end_label = ++label_cnt;
                    r.push_back({CmdType::jmp_if, RegType::none, RegType::none,
                                 else_label, 0});
                    j = find_block_end(s, i);
                    if (s[i].t == TokenType::symbol && s[i].v.ch[0] == '{') {
                        ++i;
                        r += parse_many_expr(s.subspan(i, j - i));
                    } else {
                        r += parse_many_expr(s.subspan(i, j - i + 1));
                    }
                    i = j + 1;
                    r.push_back({CmdType::jmp, RegType::none, RegType::none,
                                 end_label, 0});
                    r.push_back({CmdType::lid, RegType::none, RegType::none,
                                 else_label, 0});
                    if (i < s.size() && s[i].t == TokenType::reserved &&
                        !strcmp(s[i].v.s, "else")) {
                        ++i;
                        if (s[i].t == TokenType::symbol &&
                            s[i].v.ch[0] == '{') {
                            // many expr
                            j = find_match(s, i);
                            ++i;
                            r += parse_many_expr(s.subspan(i, j - i));
                            i = j + 1;
                        } else {
                            j = find_semicolon(s, i);
                            r += parse_many_expr(s.subspan(i, j - i + 1));
                            i = j + 1;
                        }
                    }
                    r.push_back({CmdType::lid, RegType::none, RegType::none,
                                 end_label, 0});
                    continue;
                }
                // for 语句
                if (s[i].t == TokenType::reserved && !strcmp(s[i].v.s, "for")) {
                    int for_begin_label = ++label_cnt,
                        for_end_label = ++label_cnt;
                    ++i;
                    ui r_bracket = find_match(s, i);
                    ++i;
                    j = find_semicolon(s, i);
                    if (i < j) {
                        r += parse_expr(s.subspan(i, j - i));
                    }
                    i = j + 1;
                    j = find_semicolon(s, i);
                    r.push_back({CmdType::lid, RegType::none, RegType::none,
                                 for_begin_label, 0});
                    if (i == j) {
                        // 无限循环
                        // 如果有条件,则为不满足时跳出循环。所以这里什么也不用做
                    } else {
                        r += parse_expr(s.subspan(i, j - i));
                        if (last_is_ref) {
                            r.push_back({CmdType::mov, RegType::eax,
                                         RegType::eax, 1, 0});
                        }
                        r.push_back(
                            {CmdType::not_, RegType::eax, RegType::none, 0, 0});
                        r.push_back({CmdType::jmp_if, RegType::none,
                                     RegType::none, for_end_label, 0});
                    }
                    i = j + 1;
                    vector<Command> each_loop;
                    if (i < r_bracket) {
                        each_loop = parse_expr(s.subspan(i, r_bracket - i));
                    }
                    i = r_bracket + 1;
                    j = find_block_end(s, i);
                    if (s[i].t == TokenType::symbol && s[i].v.ch[0] == '{') {
                        ++i;
                        r += parse_many_expr(s.subspan(i, j - i));
                    } else {
                        r += parse_many_expr(s.subspan(i, j - i + 1));
                    }
                    i = j + 1;
                    r += each_loop;
                    r.push_back({CmdType::jmp, RegType::none, RegType::none,
                                 for_begin_label, 0});
                    r.push_back({CmdType::lid, RegType::none, RegType::none,
                                 for_end_label, 0});
                    continue;
                }
                // while 语句
                if (s[i].t == TokenType::reserved &&
                    !strcmp(s[i].v.s, "while")) {
                    int while_begin_label = ++label_cnt,
                        while_end_label = ++label_cnt;
                    r.push_back({CmdType::lid, RegType::none, RegType::none,
                                 while_begin_label, 0});
                    ++i;
                    ui r_bracket = find_match(s, i);
                    ++i;
                    r += parse_expr(s.subspan(i, r_bracket - i));
                    if (last_is_ref) {
                        r.push_back(
                            {CmdType::mov, RegType::eax, RegType::eax, 1, 0});
                    }
                    r.push_back(
                        {CmdType::not_, RegType::eax, RegType::none, 0, 0});
                    r.push_back({CmdType::jmp_if, RegType::none, RegType::none,
                                 while_end_label, 0});
                    i = r_bracket + 1;
                    j = find_block_end(s, i);
                    if (s[i].t == TokenType::symbol && s[i].v.ch[0] == '{') {
                        ++i;
                        r += parse_many_expr(s.subspan(i, j - i));
                    } else {
                        r += parse_many_expr(s.subspan(i, j - i + 1));
                    }
                    i = j + 1;
                    r.push_back({CmdType::jmp, RegType::none, RegType::none,
                                 while_begin_label, 0});
                    r.push_back({CmdType::lid, RegType::none, RegType::none,
                                 while_end_label, 0});
                    continue;
                }
                // 普通表达式
                j = find_semicolon(s, i);
                r += parse_expr(s.subspan(i, j - i));
                i = j + 1;
            }
            // 局部变量 pop
            for (const char* name : local_vars) {
                if (var_type[name].back() == 0) {
                    var[name].pop_back();
                } else {
                    arr[name].pop_back();
                }
                // debug("quash {} in parse_many_expr\n", name);
                var_type[name].pop_back();
            }
            return r;
        }
        /**
         * 解析单个函数
         * 将函数拆分成表达式组
         */
        vector<Command> parse_function(span<Token> s) {
            vector<Command> r;
            ui i = 0, j;
            const char* function_name;
            int function_label = ++label_cnt;
            function_end_label = ++label_cnt;
            int param_cnt = 0; // 参数个数
            local_memory = 0;
            vector<const char*> local_names; // 参数和函数

            r.push_back({CmdType::lid, RegType::none, RegType::none,
                         function_label, 0});
            r.push_back({CmdType::push, RegType::rbp, RegType::none, 0, 0});
            r.push_back({CmdType::mov, RegType::rsp, RegType::rbp, 0, 0});
            // int
            assert(s[i].t == TokenType::reserved &&
                   strcmp(s[i].v.s, "int") == 0);
            ++i;
            // 函数名
            assert(s[i].t == TokenType::name);
            function_name = s[i].v.s;
            // debug("var_type[{}].pb(2)!\n", function_name);
            var_type[function_name].push_back(2);
            // local_names.push_back(function_name);我在干什么,不要这个
            ++i;
            if (!strcmp(function_name, "main")) {
                main_label = function_label;
            }
            // (参数列表)
            assert(s[i].t == TokenType::symbol && s[i].v.ch[0] == '(');
            j = i + 1;
            param_cnt = 0;
            while (s[j].t != TokenType::symbol || s[j].v.ch[0] != ')') {
                if (s[j].t == TokenType::name) {
                    const char* name = s[j].v.s;
                    local_names.push_back(name);
                    var_type[name].push_back(0);
                    ++param_cnt;
                    ++local_memory;
                    r.push_back({CmdType::mov, RegType::par, RegType::rbp,
                                 param_cnt - 1, -local_memory});
                    var[name].emplace_back(false, -local_memory);
                }
                ++j;
            }
            i = j + 1;
            function_info[function_name] = {function_label, param_cnt};

            // {
            assert(s[i].t == TokenType::symbol && s[i].v.ch[0] == '{');
            ++i;
            // 函数体
            r += parse_many_expr(s.subspan(i, s.size() - i - 1));
            // 桌越的人不 return
            if (r.back().t != CmdType::jmp || r.back().bn == 0) {
                r.push_back({CmdType::mov, RegType::lit, RegType::eax, 0, 0});
            }
            // 计算好了 local_memory
            r.insert(r.begin() + 3, {CmdType::sub, RegType::lit, RegType::rsp,
                                     local_memory, 0});
            // 撤销名字
            for (const char* name : local_names) {
                if (uc type = var_type[name].back(); type == 0) {
                    var[name].pop_back();
                } else if (type == 1) {
                    arr[name].pop_back();
                }
                // debug("quash {} in parse_function\n", name);
                var_type[name].pop_back();
            }
            r.push_back({CmdType::lid, RegType::none, RegType::none,
                         function_end_label, 0});
            r.push_back(
                {CmdType::add, RegType::lit, RegType::rsp, local_memory, 0});
            r.push_back({CmdType::pop, RegType::rbp, RegType::none, 0, 0});
            r.push_back({CmdType::ret, RegType::none, RegType::none, 0, 0});
            return r;
        }
    } // namespace func
    void parse_program() {
        span<Token> s = token_program;
        ui i = 0;
        while (i < s.size()) {
            ui j = i;
            assert(s[j].t == TokenType::reserved && !strcmp(s[j].v.s, "int"));
            ++j;
            assert(s[j].t == TokenType::name);
            ++j;
            if (s[j].t == TokenType::symbol && s[j].v.ch[0] == '(') {
                // function
                j = find_match(s, j) + 1;
                assert(s[j].t == TokenType::symbol && s[j].v.ch[0] == '{');
                j = find_match(s, j);
                ass_program += func::parse_function(s.subspan(i, j - i + 1));
                i = j + 1;
            } else {
                // 定义变量
                i = j - 1;
                while (s[i].t != TokenType::symbol || s[i].v.ch[0] != ';') {
                    if (s[i].t == TokenType::symbol) {
                        assert(s[i].v.ch[0] == ',');
                        ++i;
                    }
                    assert(s[i].t == TokenType::name);
                    const char* name = s[i].v.s;
                    ++i;
                    assert(s[i].t == TokenType::symbol);
                    if (s[i].v.ch[0] == '[') {
                        // 定义数组
                        vector<int> d_sizes;
                        while (s[i].v.ch[0] != ',' && s[i].v.ch[0] != ';') {
                            ++i;
                            assert(s[i].t == TokenType::number);
                            d_sizes.push_back(s[i].v.i);
                            ++i;
                            assert(s[i].t == TokenType::symbol &&
                                   s[i].v.ch[0] == ']');
                            ++i;
                        }
                        d_sizes.push_back(1);
                        // 数组的维度大小做后缀积
                        for (int d = int(d_sizes.size() - 2); d >= 0; --d) {
                            d_sizes[d] *= d_sizes[d + 1];
                        }
                        // 计算相对地址
                        var_type[name].push_back(1);
                        int arr_size = d_sizes[0];
                        arr[name].emplace_back(make_pair(true, static_memory),
                                               std::move(d_sizes));
                        static_memory += arr_size;
                    } else {
                        var_type[name].push_back(0);
                        var[name].emplace_back(true, static_memory);
                        ++static_memory;
                    }
                }
                ++i;
            }
        }
    }
} // namespace part2

// 执行汇编代码

namespace part3 {
    constexpr ui STACK_SIZE = 1e7;
    int eax, ebx, s[STACK_SIZE], par[100], rsp = STACK_SIZE, rbp, ptr, tmp;
    int label_pos[1000]; // 第 i 个标签的位置
    void init() {
        for (int i = 0, n = int(ass_program.size()); i < n; ++i) {
            if (ass_program[i].t == CmdType::lid) {
                //   debug("label_pos[{}]={}\n", ass_program[i].an, i);
                label_pos[ass_program[i].an] = i;
            }
        }
        ptr = label_pos[main_label];
    }
    int& reg(RegType r, int arg) {
        switch (r) {
        case RegType::eax:
            assert(arg == 0 || arg == 1);
            return arg ? s[eax] : eax;
        case RegType::ebx:
            assert(arg == 0 || arg == 1);
            return arg ? s[ebx] : ebx;
        case RegType::none:
            return tmp;
        case RegType::rsp:
            return rsp;
        case RegType::lit:
            tmp = arg;
            return tmp;
        case RegType::rbp:
            // 由于 rbp 总是指向保存调用者 rbp 的地址,所以程序不会访问
            // 0(%rbp)。arg=1 时表示操作 rbp 本身
            return arg ? s[rbp + arg] : rbp;
        case RegType::par:
            return par[arg];
        }
        assert(false);
        return tmp;
    }
    void push(RegType r) {
        assert(r != RegType::rsp);
        --rsp;
        s[rsp] = reg(r, 0);
    }
    void pop(RegType r, int arg) {
        assert(r != RegType::rsp);
        reg(r, arg) = s[rsp];
        ++rsp;
    }
    void call(int label) {
        --rsp;
        s[rsp] = ptr;
        ptr = label_pos[label];
    }
    void ret() {
        ptr = s[rsp];
        ++rsp;
    }
    void run() {
        // debug("run!\n");
        init();
        ptr = INT_MIN;
        call(main_label);
        while (ptr != INT_MIN) {
            // debug("ptr={}\n", ptr);
            Command& cmd = ass_program[ptr];
            // print_cmd_type(cmd.t);
            // debug("\n");
            switch (cmd.t) {
            case CmdType::lea:
                assert(cmd.a == RegType::rbp || cmd.a == RegType::lit);
                if (cmd.a == RegType::rbp) {
                    reg(cmd.b, 0) = rbp + cmd.an;
                } else {
                    reg(cmd.b, 0) = cmd.an;
                }
                ++ptr;
                break;
            case CmdType::mov:
                reg(cmd.b, cmd.bn) = reg(cmd.a, cmd.an);
                ++ptr;
                break;
            case CmdType::add:
                reg(cmd.b, cmd.bn) += reg(cmd.a, cmd.an);
                ++ptr;
                break;
            case CmdType::sub:
                reg(cmd.b, cmd.bn) -= reg(cmd.a, cmd.an);
                ++ptr;
                break;
            case CmdType::mul:
                reg(cmd.b, cmd.bn) *= reg(cmd.a, cmd.an);
                ++ptr;
                break;
            case CmdType::div:
                reg(cmd.b, cmd.bn) /= reg(cmd.a, cmd.an);
                ++ptr;
                break;
            case CmdType::mod:
                reg(cmd.b, cmd.bn) %= reg(cmd.a, cmd.an);
                ++ptr;
                break;
            case CmdType::cmpg:
                reg(cmd.b, cmd.bn) = (reg(cmd.b, cmd.bn) > reg(cmd.a, cmd.an));
                ++ptr;
                break;
            case CmdType::cmpge:
                reg(cmd.b, cmd.bn) = (reg(cmd.b, cmd.bn) >= reg(cmd.a, cmd.an));
                ++ptr;
                break;
            case CmdType::cmpl:
                reg(cmd.b, cmd.bn) = (reg(cmd.b, cmd.bn) < reg(cmd.a, cmd.an));
                ++ptr;
                break;
            case CmdType::cmple:
                reg(cmd.b, cmd.bn) = (reg(cmd.b, cmd.bn) <= reg(cmd.a, cmd.an));
                ++ptr;
                break;
            case CmdType::cmpe:
                reg(cmd.b, cmd.bn) = (reg(cmd.b, cmd.bn) == reg(cmd.a, cmd.an));
                ++ptr;
                break;
            case CmdType::cmpne:
                reg(cmd.b, cmd.bn) = (reg(cmd.b, cmd.bn) != reg(cmd.a, cmd.an));
                ++ptr;
                break;
            case CmdType::and_:
                reg(cmd.b, cmd.bn) = (reg(cmd.b, cmd.bn) && reg(cmd.a, cmd.an));
                ++ptr;
                break;
            case CmdType::or_:
                reg(cmd.b, cmd.bn) = (reg(cmd.b, cmd.bn) || reg(cmd.a, cmd.an));
                ++ptr;
                break;
            case CmdType::xor_:
                reg(cmd.b, cmd.bn) =
                    (bool(reg(cmd.b, cmd.bn)) ^ bool(reg(cmd.a, cmd.an)));
                ++ptr;
                break;
            case CmdType::call:
                ++ptr;
                call(cmd.an);
                break;
            case CmdType::jmp_if:
                if (eax) {
                    ptr = label_pos[cmd.an];
                } else {
                    ++ptr;
                }
                break;
            case CmdType::jmp:
                ptr = label_pos[cmd.an];
                break;
            case CmdType::lid:
                ++ptr;
                break;
            case CmdType::ret:
                ret();
                break;
            case CmdType::push:
                push(cmd.a);
                ++ptr;
                break;
            case CmdType::pop:
                pop(cmd.a, cmd.an);
                ++ptr;
                break;
            case CmdType::neg:
                reg(cmd.a, cmd.an) = -reg(cmd.a, cmd.an);
                ++ptr;
                break;
            case CmdType::not_:
                reg(cmd.a, cmd.an) = !reg(cmd.a, cmd.an);
                ++ptr;
                break;
            case CmdType::read:
                s[eax] = read_int();
                ++ptr;
                break;
            case CmdType::writei:
                cout << eax;
                ++ptr;
                break;
            case CmdType::writec:
                cout.put(char(eax));
                ++ptr;
                break;
            }
        }
    }
} // namespace part3
int main(int, char**) {
    cin.tie(nullptr)->sync_with_stdio(false);
#if defined(LOCAL) && defined(LOCAL_FILE_IO) && !defined(CPH)
    openLocalIoFile();
#endif
    init_input();
    part1::read_tokens();
    // for (Token t : token_program) {
    //     print_token(t);
    // }
    // debug("part1 finish!\n");
    part2::parse_program();
    // for (Command cmd : ass_program) {
    //     print_cmd(cmd);
    //     debug("\n");
    // }
    // debug("part2 finish!\n");
    part3::run();
}

回复

1 条回复,欢迎继续交流。

正在加载回复...