summaryrefslogtreecommitdiffstats
path: root/linux
diff options
context:
space:
mode:
authorBenny Baumann <BenBE@geshi.org>2020-11-07 22:52:28 +0100
committerBenBE <BenBE@geshi.org>2020-11-24 19:05:48 +0100
commitfcda517a67ca0b0a1690a91c15e7255053d83e5c (patch)
tree5a1f23b7fd61dfa4c8b39d5029ac80e95e0dded4 /linux
parent98fce1fb43d66f5c74090094be589fe7f859ed20 (diff)
Add heuristic for space-separated cmdline
Diffstat (limited to 'linux')
-rw-r--r--linux/LinuxProcessList.c96
1 files changed, 91 insertions, 5 deletions
diff --git a/linux/LinuxProcessList.c b/linux/LinuxProcessList.c
index 522b96f5..43c419a9 100644
--- a/linux/LinuxProcessList.c
+++ b/linux/LinuxProcessList.c
@@ -886,9 +886,7 @@ static bool LinuxProcessList_readCmdlineFile(Process* process, const char* dirna
char command[4096 + 1]; // max cmdline length on Linux
int amtRead = xread(fd, command, sizeof(command) - 1);
close(fd);
- int tokenEnd = 0;
- int tokenStart = 0;
- int lastChar = 0;
+
if (amtRead == 0) {
if (process->state == 'Z') {
process->basenameOffset = 0;
@@ -899,11 +897,24 @@ static bool LinuxProcessList_readCmdlineFile(Process* process, const char* dirna
} else if (amtRead < 0) {
return false;
}
+
+ int tokenEnd = 0;
+ int tokenStart = 0;
+ int lastChar = 0;
+ bool argSepNUL = false;
+ bool argSepSpace = false;
+
for (int i = 0; i < amtRead; i++) {
/* newline used as delimiter - when forming the mergedCommand, newline is
* converted to space by LinuxProcess_makeCommandStr */
if (command[i] == '\0') {
command[i] = '\n';
+ } else {
+ /* Record some information for the argument parsing heuristic below. */
+ if (tokenEnd)
+ argSepNUL = true;
+ if (command[i] <= ' ')
+ argSepSpace = true;
}
if (command[i] == '\n') {
@@ -920,10 +931,85 @@ static bool LinuxProcessList_readCmdlineFile(Process* process, const char* dirna
lastChar = i;
}
}
+
+ command[lastChar + 1] = '\0';
+
+ if (!argSepNUL && argSepSpace) {
+ /* Argument parsing heuristic.
+ *
+ * This heuristic is used for processes that rewrite their command line.
+ * Normally the command line is split by using NUL bytes between each argument.
+ * But some programs like chrome flatten this using spaces.
+ *
+ * This heuristic tries its best to undo this loss of information.
+ * To achieve this, we treat every character <= 32 as argument separators
+ * (i.e. all of ASCII control sequences and space).
+ * We then search for the basename of the cmdline in the first argument we found that way.
+ * As path names may contain we try to cross-validate if the path we got that way exists.
+ */
+
+ tokenStart = tokenEnd = 0;
+
+ // From initial scan we know there's at least one space.
+ // Check if that's part of a filename for an existing file.
+ if (Compat_faccessat(AT_FDCWD, command, F_OK, AT_SYMLINK_NOFOLLOW) != 0) {
+ // If we reach here the path does not exist.
+ // Thus begin searching for the part of it that actually is.
+
+ int tokenArg0Start = 0;
+
+ for (int i = 0; i <= lastChar; i++) {
+ /* Any ASCII control or space used as delimiter */
+ char tmpCommandChar = command[i];
+
+ if (command[i] <= ' ') {
+ if (!tokenEnd) {
+ command[i] = '\0';
+
+ bool found = Compat_faccessat(AT_FDCWD, command, F_OK, AT_SYMLINK_NOFOLLOW) == 0;
+
+ // Restore if this wasn't it
+ command[i] = found ? '\n' : tmpCommandChar;
+
+ if (found)
+ tokenEnd = i;
+ if (!tokenArg0Start)
+ tokenArg0Start = tokenStart;
+ } else {
+ // Split on every further separator, regardless of path correctness
+ command[i] = '\n';
+ }
+ } else if (!tokenEnd) {
+ if (command[i] == '/') {
+ tokenStart = i + 1;
+ } else if (command[i] == '\\' && (!tokenStart || command[tokenStart - 1] == '\\')) {
+ tokenStart = i + 1;
+ } else if (command[i] == ':' && (command[i + 1] != '/' && command[i + 1] != '\\')) {
+ tokenEnd = i;
+ }
+ }
+ }
+
+ if (!tokenEnd) {
+ tokenStart = tokenArg0Start;
+
+ // No token delimiter found, forcibly split
+ for (int i = 0; i <= lastChar; i++) {
+ if (command[i] <= ' ') {
+ command[i] = '\n';
+ if (!tokenEnd) {
+ tokenEnd = i;
+ }
+ }
+ }
+ }
+ }
+ }
+
if (tokenEnd == 0) {
- tokenEnd = amtRead;
+ tokenEnd = lastChar + 1;
}
- command[lastChar + 1] = '\0';
+
lp->mergedCommand.maxLen = lastChar + 1; /* accomodate cmdline */
if (!process->comm || strcmp(command, process->comm)) {
process->basenameOffset = tokenEnd;

© 2014-2024 Faster IT GmbH | imprint | privacy policy