feat: respect IFS environment variable for internal splits.

hamza-cskn · hamza-cskn · commit abb2ddfe59d9 · 2025-03-15T12:56:26.000+03:00
diff --git a/Makefile b/Makefile
@@ -4,7 +4,8 @@ LIBFT_DIR = ./libft
 LIBFT_PATH = $(LIBFT_DIR)/libft.a
 
 CC = gcc
-FLAGS = -g -Wall -Wextra -Werror #-fsanitize=address
+
+FLAGS = -g -Wall -Wextra -Werror -fsanitize=address  -I/opt/homebrew/opt/readline/include -L/opt/homebrew/opt/readline/lib -lreadline  -Wno-unused-command-line-argument #-fsanitize=address
 
 MEMORY_ALLOCATOR_SOURCES = memory-allocator/aborter.c memory-allocator/allocator.c
 SOURCES = src/execute/execute_utils.c src/builtin/cd.c src/builtin/exit.c src/builtin/export.c src/builtin/export_utils.c \
@@ -13,7 +14,7 @@ SOURCES = src/execute/execute_utils.c src/builtin/cd.c src/builtin/exit.c src/bu
  src/lexer/lexer.c src/lexer/unquote.c src/lexer/lexer_utils.c src/execute/execute.c \
  src/lexer/lexer_error_message.c src/lexer/is_valid.c src/execute/error_message.c src/execute/fd_utils.c \
  src/parser/parser.c src/parser/parser_state.c src/parser/parser_utils.c src/execute/heredoc.c \
- src/expander/expander.c src/splitter.c src/lexer/syntax_analyzer.c src/signal.c $(MEMORY_ALLOCATOR_SOURCES) \
+ src/expander/expander.c src/lexer/syntax_analyzer.c src/signal.c $(MEMORY_ALLOCATOR_SOURCES) \
  src/redirections/redirections.c src/env/global_env.c src/utils/unsafe_utils.c src/utils/char_classification.c src/utils/string_utils.c \
  src/utils/quote_classification.c src/expander/expander_2.c
 
@@ -31,15 +32,22 @@ $(TEST_PATH):
 
 test: $(TEST_PATH) $(NAME)
 	@printf "$(CLEAN_CAR)$(GREEN_COLOR)[Tests compiling]$(BLUE_COLOR) : $(PURPLE_COLOR)$<$(NO_COLOR)"
-	@$(CC) $(FLAGS) $(SOURCES:.c=.o) $(LIBFT_PATH) $(TEST_SOURCES) -o $(TEST_PATH)/tests -lcriterion -L/usr/local/lib -I/usr/local/include -lreadline
+	@$(CC) $(FLAGS) $(SOURCES:.c=.o) $(LIBFT_PATH) $(TEST_SOURCES) -o $(TEST_PATH)/tests -I/opt/homebrew/Cellar/criterion/2.4.2_2/include -L/opt/homebrew/Cellar/criterion/2.4.2_2/lib -lcriterion
+	@printf "$(CLEAN_CAR)$(GREEN_COLOR)Tests running right now. Please wait.\n$(BLUE_COLOR)$(NO_COLOR)"
+	@./$(TEST_PATH)/tests ; export TEST_RESULT=$$? ; rm -f __test_file* | exit $$TEST_RESULT 
+
+testifs: $(TEST_PATH) $(NAME)
+	@printf "$(CLEAN_CAR)$(GREEN_COLOR)[Tests compiling]$(BLUE_COLOR) : $(PURPLE_COLOR)$<$(NO_COLOR)"
+	@$(CC) $(FLAGS) $(SOURCES:.c=.o) $(LIBFT_PATH)  -o $(TEST_PATH)/tests -I/opt/homebrew/Cellar/criterion/2.4.2_2/include -L/opt/homebrew/Cellar/criterion/2.4.2_2/lib -lcriterion
 	@printf "$(CLEAN_CAR)$(GREEN_COLOR)Tests running right now. Please wait.\n$(BLUE_COLOR)$(NO_COLOR)"
 	@./$(TEST_PATH)/tests ; export TEST_RESULT=$$? ; rm -f __test_file* | exit $$TEST_RESULT 
 
+
 $(LIBFT_PATH):
 	@make bonus -C $(LIBFT_DIR) FLAGS="$(FLAGS)"
 
 $(NAME): $(LIBFT_PATH) $(MINISHELL_OBJECTS)
-	@$(CC) $(FLAGS) -o $(NAME) $(MINISHELL_OBJECTS) $(LIBFT_PATH) -L/usr/local/lib -I/usr/local/include -lreadline
+	@$(CC) $(FLAGS) -o $(NAME) $(MINISHELL_OBJECTS) $(LIBFT_PATH)
 	@sleep 0.2
 	@echo "$(CLEAN_CAR)$(GREEN_COLOR)Minishell compiled!$(NO_COLOR)"
 
@@ -60,7 +68,7 @@ fclean:
 
 re: fclean all
 
-.PHONY: all clean fclean re
+.PHONY: all clean fclean re test testifs
 
 NO_COLOR		=	\x1b[0m
 GREEN_COLOR		=	\x1b[32;01m
diff --git a/includes/char_classification.h b/includes/char_classification.h
@@ -13,34 +13,12 @@
 #ifndef CHAR_CLASSIFICATION_H
 # define CHAR_CLASSIFICATION_H
 
-/**
- * @file char_classification.h
- * @file char_classification.c
- * 
- * @brief This file contains macros and enums for
- * character classification.
- *
- * @description This file completely defines what our
- * shell should understand when it comes to characters.
- *
- * @note This file completely defines what our shellshould understand
- * regarding characters. If this file makes you feel like we just told
- * you that cars have 4 wheels, then you are right. But we need to be
- * explicit about it.
- *
- * @note You can find tons of code that does not care about these details.
- * and probably you'll see that they going to be a soup instead of being a
- * code. This file is very critical. So we carefully read the manual before
- * and during writing this file.
- * 
- **/
 typedef enum s_quote
 {
 	DOUBLE_QUOTE = '\"',
 	SINGLE_QUOTE = '\''
 }	t_quote;
 
-int	is_field_terminator(char c);
 int	is_meta_char(char c);
 int	is_whitespace(char c);
 int	is_quote(char c);
diff --git a/includes/minishell.h b/includes/minishell.h
@@ -112,14 +112,13 @@ void			handle_invalid_input(t_token *lexer_data);
 
 // expander
 void			expand(t_token **head);
-void			internal_field_split(t_token **token);
-void			insert_uword_tokens(t_token **token_ptr, char **strings);
+void			internal_field_split(t_token **token, t_token**next_token_ptr);
 void			expand_string(char **string);
 char			*replace_string(char *input, int p_start,
 					int p_len, char *replacement);
 int				is_nameless_variable(t_token *token);
 void			expand_token(t_token *token, t_token **head,
-					t_token **token_ptr, t_token **prev_ptr);
+					t_token **token_ptr, t_token **prev_ptr, t_token**next_token);
 
 // parser
 t_command		*parse(t_token *lexer_data);
@@ -184,4 +183,8 @@ void			abort_function(void);
 void			path_error(char	*cmd);
 void			pid_error(int *prev_pipe, int *next_pipe);
 
+int				are_quotes_valid(t_token *token);
+t_token			*do_ifs(char *str);
+t_token			**find_token_ptr_before(t_token **head, t_token *tofind);
+
 #endif
diff --git a/includes/utils.h b/includes/utils.h
@@ -20,8 +20,7 @@ char	*ft_str_arr_join(char **str_list, unsigned int str_count);
 int		skip_white_spaces(const char *str);
 int		find_char(const char *str, char looking_for);
 int		is_escaped(char *input, unsigned int index);
-int		is_internal_field_sep(char *str, int index);
-char	**str_split(char const *str, int (is_delimiter)(char *, int));
+int		is_internal_field_sep(char c);
 int		count_len(const char *str, int (*is_valid)(char c));
 int		str_arr_size(char **strings);
 char	**ft_unsafe_strarrdup(char **arr);
diff --git a/src/expander/expander.c b/src/expander/expander.c
@@ -16,6 +16,7 @@
 #include "../../includes/char_classification.h"
 #include "../../memory-allocator/allocator.h"
 #include "../../includes/env.h"
+#include <stdio.h>
 
 int	expand_variable(char **input, int index)
 {
@@ -85,23 +86,79 @@ void	expand(t_token **head)
 	while (token)
 	{
 		if (token->type == UNQUOTED_WORD || token->type == DOUBLE_QUOTED_WORD)
-			expand_token(token, head, token_ptr, prev_ptr);
+			expand_token(token, head, token_ptr, prev_ptr, &token);
 		prev_ptr = token_ptr;
 		token_ptr = &token->next;
 		token = token->next;
 	}
 }
 
-void	internal_field_split(t_token **token_ptr)
+t_token		*do_ifs(char *str)
 {
-	char	**new_words;
+	//printf("do_ifs: %s\n", str);
+	int len = ft_strlen(str);
+	int i = 0;
+	t_token		*head = NULL;
+	int last_was_word = 0;
+
+	if (is_internal_field_sep(str[0])) {
+		i++;
+		if (i < len && is_whitespace(str[i]) && is_internal_field_sep(str[i])) {
+			while (i < len && is_whitespace(str[i]) && is_internal_field_sep(str[i]))
+				i++;
+			if (is_internal_field_sep(str[i]))
+				i++;
+		}
+		head = lexer_data_new((t_token){NULL, DELIMITER, NULL});
+	}
+
+	while (i < len) {
+		int start = i;
+		int substrlen = 0;
+		while (i < len && !is_internal_field_sep(str[i]))
+			i++, substrlen++;
+
+		if (last_was_word)
+			lexer_data_append(&head, lexer_data_new((t_token){NULL, DELIMITER, NULL}));
+		lexer_data_append(&head, lexer_data_new((t_token){ft_substr(str, start, substrlen), UNQUOTED_WORD, NULL}));
+		last_was_word = 1;
+		if (i < len && is_internal_field_sep(str[i])) {
+			i++;
+			while (i < len && is_whitespace(str[i]))
+				i++;
+		}
+	}
+
+	/* debug purposes - inspect tokens 
+
+	t_token t = *head;
+	while (t.next) {
+		printf(t.type == UNQUOTED_WORD ? "'%s' ->" : "'DELIMITER' -> ", t.value);
+		t = *t.next;
+	}
+	printf(t.type == UNQUOTED_WORD ? "'%s'\n" : "'DELIMITER'\n", t.value);
+*/
+	return head;
+}
+
+void	internal_field_split(t_token **token_ptr, t_token **next_token_ptr)
+{
+	t_token *new_words;
 	t_token	*token;
 
 	token = *token_ptr;
-	new_words = str_split(token->value, is_internal_field_sep);
-	if (str_arr_size(new_words) == 1)
+	new_words = do_ifs(token->value);
+	if (new_words == NULL || new_words->next == NULL)
 		return ;
-	safe_free(token->value);
-	insert_uword_tokens(token_ptr, new_words);
-	safe_free(new_words);
+
+	// move expander cursor to the last token
+	*next_token_ptr = get_last_lexer_data(new_words);
+
+	// put new expanded tokens
+	lexer_data_insert(token, new_words);
+
+	// remove legacy token
+	t_token **prev = find_token_ptr_before(token_ptr, token);
+	remove_token(prev, token_ptr, token);
+
 }
diff --git a/src/expander/expander_2.c b/src/expander/expander_2.c
@@ -15,16 +15,17 @@
 #include "../../libft/libft.h"
 
 void	expand_token(t_token *token, t_token **head,
-		t_token **token_ptr, t_token **prev_ptr)
+		t_token **token_ptr, t_token **prev_ptr, t_token**next_token_ptr)
 {
 	if (is_nameless_variable(token))
 		token->value = ft_strdup("");
 	else
 	{
+		int isvar = token->value[0] == '$';
 		expand_string(&token->value);
 		if (is_full_of_spaces(token->value))
 			remove_token(prev_ptr, head, token);
-		else if (token->type == UNQUOTED_WORD)
-			internal_field_split(token_ptr);
+		else if ((token->type == UNQUOTED_WORD && isvar))
+			internal_field_split(token_ptr, next_token_ptr);
 	}
 }
diff --git a/src/expander/expander_nonvariables.c b/src/expander/expander_nonvariables.c
@@ -16,28 +16,6 @@
 #include "../../includes/char_classification.h"
 #include "../../memory-allocator/allocator.h"
 
-void	insert_uword_tokens(t_token **token_ptr, char **strings)
-{
-	t_token	*token;
-	t_token	*list;
-	t_token	*new;
-	int		i;
-
-	token = *token_ptr;
-	token->value = strings[0];
-	list = NULL;
-	i = 1;
-	while (strings[i])
-	{
-		new = lexer_data_new((t_token){NULL, DELIMITER, NULL});
-		lexer_data_append(&list, new);
-		new = lexer_data_new((t_token){strings[i], UNQUOTED_WORD, NULL});
-		lexer_data_append(&list, new);
-		i++;
-	}
-	lexer_data_insert(token, list);
-}
-
 int	is_nameless_variable(t_token *token)
 {
 	return (ft_strcmp(token->value, "$") == 0
diff --git a/src/handler.c b/src/handler.c
@@ -12,6 +12,8 @@
 
 #include "../includes/minishell.h"
 #include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
 
 int	*get_exit_status(void)
 {
@@ -39,14 +41,16 @@ void	handle_input(char *input)
 		return (handle_invalid_input(lexer_data));
 	if (is_empty(lexer_data))
 		return ;
+	if (are_quotes_valid(lexer_data) == 0)
+		return (handle_invalid_input(lexer_data));
+	unquote(lexer_data);
 	expand(&lexer_data);
 	if (!is_valid(lexer_data))
 		return (handle_invalid_input(lexer_data));
-	unquote(lexer_data);
 	parser_data = parse(lexer_data);
 	handle_file_redirections(parser_data);
 	g_signal_type = RUNNING_COMMANDS;
+	uninit_tokens(lexer_data);
 	execute(parser_data);
 	g_signal_type = PROMPT;
-	uninit_tokens(lexer_data);
 }
diff --git a/src/lexer/is_valid.c b/src/lexer/is_valid.c
@@ -59,7 +59,6 @@ int	are_tokens_valid(t_token *lexer_data)
 int	is_valid(t_token *lexer_data)
 {
 	return (are_tokens_valid(lexer_data)
-		&& are_quotes_valid(lexer_data)
 		&& is_there_lack_of_word(lexer_data)
 		&& validate_pipes(lexer_data));
 }
diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c
@@ -55,9 +55,15 @@ t_lexer_state	word_state(t_token **lexer_data, char *input, int *const i)
 	}
 	else
 	{
-		token.type = UNQUOTED_WORD;
-		while (is_unquoted_word_char(input[*i]) || is_escaped(input, *i))
+		if (input[*i] == '$'){
 			(*i)++;
+			while (is_name_char(input[*i]))
+				(*i)++;
+		} else {
+			while ((is_unquoted_word_char(input[*i]) || is_escaped(input, *i)) && input[*i] != '$')
+				(*i)++;
+		}
+		token.type = UNQUOTED_WORD;
 		(*i)--;
 	}
 	token.value = ft_substr(input, start_i, *i - start_i + 1);
diff --git a/src/lexer/lexer_utils.c b/src/lexer/lexer_utils.c
@@ -72,3 +72,15 @@ void	uninit_tokens(t_token *lexical_data)
 		lexical_data = next;
 	}
 }
+
+t_token **find_token_ptr_before(t_token **head, t_token *tofind)
+{
+	t_token **prev = NULL;
+	t_token **cur = head;
+
+	while (*cur && *cur != tofind) {	
+		prev = cur;
+		cur = &((*cur)->next);
+	}
+	return prev;
+}
diff --git a/src/lexer/utils.c b/src/lexer/utils.c
@@ -75,11 +75,12 @@ int	str_arr_size(char **strings)
 	return (i);
 }
 
-int	is_internal_field_sep(char *str, int index)
+int	is_internal_field_sep(char c)
 {
-	if (is_escaped(str, index))
+	char *ifs = find_env("IFS");
+	if (!ifs)
 		return (0);
-	if (is_field_terminator(str[index]))
+	if (ft_strchr(ifs, c))
 		return (1);
 	return (0);
 }
diff --git a/src/main.c b/src/main.c
@@ -37,6 +37,7 @@ int	main(int ac, char **av, char **envp)
 	(void)av;
 	init_env(envp);
 	export_env("OLDPWD=");
+	export_env("IFS= \t\n");
 	register_post_abort_func(handle_memory_error);
 	register_signal_handler();
 	while (1)
diff --git a/src/splitter.c b/src/splitter.c
diff --git a/src/utils/char_classification.c b/src/utils/char_classification.c
diff --git a/tests/end2end_tests.c b/tests/end2end_tests.c
diff --git a/tests/expander_tests.c b/tests/expander_tests.c
diff --git a/tests/ifs_tests.c b/tests/ifs_tests.c
diff --git a/tests/syntax_analyzer_tests.c b/tests/syntax_analyzer_tests.c

Original file line number	Diff line number	Diff line change
`@@ -15,16 +15,17 @@`
`15`	`15`	`#include "../../libft/libft.h"`
`16`	`16`
`17`	`17`	`void expand_token(t_token token, t_token *head,`
`18`		`- t_token token_ptr, t_token prev_ptr)`
	`18`	`+ t_token token_ptr, t_token prev_ptr, t_token**next_token_ptr)`
`19`	`19`	`{`
`20`	`20`	`if (is_nameless_variable(token))`
`21`	`21`	`token->value = ft_strdup("");`
`22`	`22`	`else`
`23`	`23`	`{`
	`24`	`+ int isvar = token->value[0] == '$';`
`24`	`25`	`expand_string(&token->value);`
`25`	`26`	`if (is_full_of_spaces(token->value))`
`26`	`27`	`remove_token(prev_ptr, head, token);`
`27`		`- else if (token->type == UNQUOTED_WORD)`
`28`		`- internal_field_split(token_ptr);`
	`28`	`+ else if ((token->type == UNQUOTED_WORD && isvar))`
	`29`	`+ internal_field_split(token_ptr, next_token_ptr);`
`29`	`30`	`}`
`30`	`31`	`}`
Original file line number	Diff line number	Diff line change
`@@ -59,7 +59,6 @@ int are_tokens_valid(t_token *lexer_data)`
`59`	`59`	`int is_valid(t_token *lexer_data)`
`60`	`60`	`{`
`61`	`61`	`return (are_tokens_valid(lexer_data)`
`62`		`- && are_quotes_valid(lexer_data)`
`63`	`62`	`&& is_there_lack_of_word(lexer_data)`
`64`	`63`	`&& validate_pipes(lexer_data));`
`65`	`64`	`}`
Original file line number	Diff line number	Diff line change
`@@ -55,9 +55,15 @@ t_lexer_state word_state(t_token *lexer_data, char input, int *const i)`
`55`	`55`	`}`
`56`	`56`	`else`
`57`	`57`	`{`
`58`		`- token.type = UNQUOTED_WORD;`
`59`		`- while (is_unquoted_word_char(input[i]) \|\| is_escaped(input, i))`
	`58`	`+ if (input[*i] == '$'){`
`60`	`59`	`(*i)++;`
	`60`	`+ while (is_name_char(input[*i]))`
	`61`	`+ (*i)++;`
	`62`	`+ } else {`
	`63`	`+ while ((is_unquoted_word_char(input[i]) \|\| is_escaped(input, i)) && input[*i] != '$')`
	`64`	`+ (*i)++;`
	`65`	`+ }`
	`66`	`+ token.type = UNQUOTED_WORD;`
`61`	`67`	`(*i)--;`
`62`	`68`	`}`
`63`	`69`	`token.value = ft_substr(input, start_i, *i - start_i + 1);`