a-zlm/server/ShellParser.h

208 lines
7.7 KiB
C++
Raw Permalink Normal View History

2026-01-14 15:38:20 +08:00
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#ifndef ZLMEDIAKIT_SHELLPARSER_H
#define ZLMEDIAKIT_SHELLPARSER_H
#include <iostream>
#include <string>
#include <vector>
#include <cctype>
// Shell-like command line parser.
// Features:
// - Whitespace splitting (space, tab, newline)
// - Quotes: single ('...') and double ("...")
// - Escapes with backslash (\\) outside quotes
// - In single quotes: backslash is literal (like POSIX shell)
// - In double quotes: backslash can escape " $ ` \\ and newline (line continuation)
// Additionally supports common C-style escapes: \n \t \r \0 .. outside and inside double quotes
// - Line continuation: backslash followed by newline is ignored
// - Produces argv pointers with stable lifetime backed by std::vector<std::string>
//
// Notes:
// - This is NOT a full shell (no variable expansion, no globbing, no command substitution).
// - Behavior aims to be practical and safe for exec* arguments building.
struct ParseResult {
ParseResult(bool ok, const char *err, size_t pos, std::vector<std::string> args)
: ok(ok)
, error_msg(err)
, error_pos(pos)
, args(std::move(args)) {}
bool ok;
std::string error_msg;
size_t error_pos = 0; // index in input when error happens
std::vector<std::string> args; // parsed arguments
};
namespace detail {
inline bool is_space(char c) {
return c == ' ' || c == '\t' || c == '\n';
}
// Returns true if it handled a line continuation ("\\\n").
inline bool handle_line_continuation(const std::string &s, size_t &i) {
if (i + 1 < s.size() && s[i] == '\\' && s[i + 1] == '\n') {
i += 2; // consume both and do nothing
return true;
}
return false;
}
inline bool hex_digit(char c) { return std::isxdigit(static_cast<unsigned char>(c)) != 0; }
inline int hex_val(char c) {
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'a' && c <= 'f') return 10 + (c - 'a');
if (c >= 'A' && c <= 'F') return 10 + (c - 'A');
return 0;
}
// Parse C-style escapes: \n, \t, \r, \0..\377 (octal), \xHH (hex). Returns std::nullopt if not a known escape.
inline std::pair<bool, char> c_style_escape(const std::string &s, size_t &i) {
if (i >= s.size()) return std::make_pair(false, '\0');
char c = s[i];
switch (c) {
case 'n': ++i; return std::make_pair(true, '\n');
case 't': ++i; return std::make_pair(true, '\t');
case 'r': ++i; return std::make_pair(true, '\r');
case 'a': ++i; return std::make_pair(true, '\a');
case 'b': ++i; return std::make_pair(true, '\b');
case 'f': ++i; return std::make_pair(true, '\f');
case 'v': ++i; return std::make_pair(true, '\v');
case '\\': ++i; return std::make_pair(true, '\\');
case '"': ++i; return std::make_pair(true, '"');
case '\'': ++i; return std::make_pair(true, '\'');
case '0': {
// up to 3 octal digits total (including the first 0 already consumed here?)
// Here c=='0' means octal sequence starts at current '0'.
// We'll parse up to 3 octal digits starting at current pos.
int val = 0; int cnt = 0;
while (i < s.size() && cnt < 3 && (s[i] >= '0' && s[i] <= '7')) {
val = (val << 3) + (s[i] - '0');
++i; ++cnt;
}
return std::make_pair(true, static_cast<char>(val & 0xFF));
}
case 'x': {
++i; // consume 'x'
int val = 0; int cnt = 0;
while (i < s.size() && cnt < 2 && hex_digit(s[i])) {
val = (val << 4) + hex_val(s[i]);
++i; ++cnt;
}
if (cnt == 0) return std::make_pair(false, '\0'); // not actually a hex escape
return std::make_pair(true, static_cast<char>(val & 0xFF));
}
default:
return std::make_pair(false, '\0');
}
}
}
ParseResult parse_shell_like(const std::string &input) {
using namespace detail;
std::vector<std::string> args;
std::string cur;
enum class State { Normal, InSingle, InDouble };
State st = State::Normal;
size_t i = 0; const size_t N = input.size();
while (i < N) {
// line continuation check (\\\n) applies in all states
if (handle_line_continuation(input, i)) continue;
if (i >= N) break;
char c = input[i];
switch (st) {
case State::Normal: {
if (is_space(c)) {
if (!cur.empty()) { args.emplace_back(std::move(cur)); cur.clear(); }
++i;
} else if (c == '\'') {
st = State::InSingle; ++i;
} else if (c == '"') {
st = State::InDouble; ++i;
} else if (c == '\\') {
++i; // consume backslash
if (i >= N) {
return {false, "结尾处孤立的反斜杠(未转义任何字符)", i, {}};
}
// Try C-style escapes first
auto esc = c_style_escape(input, i);
if (esc.first) {
cur.push_back(esc.second);
} else {
// Not a known C escape: take the next char literally
cur.push_back(input[i]);
++i;
}
} else {
cur.push_back(c); ++i;
}
} break;
case State::InSingle: {
if (c == '\'') { st = State::Normal; ++i; }
else { cur.push_back(c); ++i; }
} break;
case State::InDouble: {
if (c == '"') { st = State::Normal; ++i; }
else if (c == '\\') {
++i; // consume backslash
if (i >= N) {
return {false, "双引号内以反斜杠结尾,缺少被转义字符", i, {}};
}
// In POSIX shell, within double quotes, only certain escapes are special.
// Here we support both POSIX subset and common C-style escapes for practicality.
auto esc = c_style_escape(input, i);
if (esc.first) {
cur.push_back(esc.second);
} else {
// If not a C-style escape, allow escaping one char literally (e.g., $ `)
cur.push_back(input[i]);
++i;
}
} else {
cur.push_back(c); ++i;
}
} break;
}
}
if (st == State::InSingle) {
return {false, "缺少配对的单引号('", i, {}};
}
if (st == State::InDouble) {
return {false, "缺少配对的双引号(\"", i, {}};
}
if (!cur.empty()) args.emplace_back(std::move(cur));
return {true, "", 0, std::move(args)};
}
// Helper: build argv pointers backed by the strings' storage.
// The returned vector includes a trailing nullptr, suitable for execv*.
inline std::vector<const char*> make_argv(const std::vector<std::string>& args) {
std::vector<const char*> argv;
argv.reserve(args.size() + 1);
for (const auto &s : args) argv.push_back(s.c_str());
argv.push_back(nullptr);
return argv;
}
#endif // ZLMEDIAKIT_SHELLPARSER_H