%{ /* * Copyright 2013 Google Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ /* * Author: Author: ncardwell@google.com (Neal Cardwell) * * This is the parser for the packetdrill script language. It is * processed by the bison parser generator. * * For full documentation see: http://www.gnu.org/software/bison/manual/ * * Here is a quick and dirty tutorial on bison: * * A bison parser specification is basically a BNF grammar for the * language you are parsing. Each rule specifies a nonterminal symbol * on the left-hand side and a sequence of terminal symbols (lexical * tokens) and or nonterminal symbols on the right-hand side that can * "reduce" to the symbol on the left hand side. When the parser sees * the sequence of symbols on the right where it "wants" to see a * nonterminal on the left, the rule fires, executing the semantic * action code in curly {} braces as it reduces the right hand side to * the left hand side. * * The semantic action code for a rule produces an output, which it * can reference using the $$ token. The set of possible types * returned in output expressions is given in the %union section of * the .y file. The specific type of the output for a terminal or * nonterminal symbol (corresponding to a field in the %union) is * given by the %type directive in the .y file. The action code can * access the outputs of the symbols on the right hand side by using * the notation $1 for the first symbol, $2 for the second symbol, and * so on. * * The lexer (generated by flex from lexer.l) feeds a stream of * terminal symbols up to this parser. Parser semantic actions can * access the lexer output for a terminal symbol with the same * notation they use for nonterminals. * * Here's an example rule with its semantic action in {} braces: * * tcp_option * ... * | MSS INTEGER { * $$ = tcp_option_new(...); * ... * $$->data.mss.bytes = htons($2); * } * * This rule basically says: * * When the parser wants to see a tcp_option, if it sees an MSS from * the lexer followed by an INTEGER from the lexer then run the * action code that (a) stores in the output $$ a pointer to a * struct tcp_option object, and then (b) stores in that object the * value of the INTEGER token (accessed with $2). * */ /* The first part of the .y file consists of C code that bison copies * directly into the top of the .c file it generates. */ #include "types.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "gre_packet.h" #include "ip.h" #include "ip_packet.h" #include "icmp_packet.h" #include "logging.h" #include "mpls.h" #include "mpls_packet.h" #include "tcp_packet.h" #include "udp_packet.h" #include "parse.h" #include "script.h" #include "tcp.h" #include "tcp_options.h" /* This include of the bison-generated .h file must go last so that we * can first include all of the declarations on which it depends. */ #include "parser.h" /* Change this YYDEBUG to 1 to get verbose debug output for parsing: */ #define YYDEBUG 0 #if YYDEBUG extern int yydebug; #endif extern FILE *yyin; extern int yylineno; extern char *yytext; extern int yylex(void); extern int yyparse(void); extern int yywrap(void); /* This mutex guards all parser global variables declared in this file. */ pthread_mutex_t parser_mutex = PTHREAD_MUTEX_INITIALIZER; /* The input to the parser: the path name of the script file to parse. */ static const char* current_script_path = NULL; /* The starting line number of the input script statement that we're * currently parsing. This may be different than yylineno if bison had * to look ahead and lexically scan a token on the following line to * decide that the current statement is done. */ static int current_script_line = -1; /* * We uses this object to look up configuration info needed during * parsing (such as whether packets are IPv4 or IPv6). */ static const struct config *in_config = NULL; /* The output of the parser: an output script containing * 1) a linked list of options * 2) a linked list of events */ static struct script *out_script = NULL; /* The test invocation to pass back to parse_and_finalize_config(). */ struct invocation *invocation; /* Copy the script contents into our single linear buffer. */ void copy_script(const char *script_buffer, struct script *script) { DEBUGP("copy_script\n"); free(script->buffer); script->length = strlen(script_buffer); script->buffer = strdup(script_buffer); assert(script->buffer != NULL); DEBUGP("copy_script: %d bytes\n", script->length); } /* Read the script file into a single linear buffer. */ void read_script(const char *script_path, struct script *script) { int size = 0; DEBUGP("read_script(%s)\n", script_path); while (script->buffer == NULL) { struct stat script_info; int fd = -1; /* Allocate a buffer big enough for the whole file. */ if (stat(script_path, &script_info) != 0) die("parse error: stat() of script file '%s': %s\n", script_path, strerror(errno)); /* Pick a buffer size larger than the file, so we'll * know if the file grew. */ size = max((int)script_info.st_size, size) + 1; script->buffer = malloc(size); assert(script->buffer != NULL); /* Read the file into our buffer. */ fd = open(script_path, O_RDONLY); if (fd < 0) die("parse error opening script file '%s': %s\n", script_path, strerror(errno)); script->length = read(fd, script->buffer, size); if (script->length < 0) die("parse error reading script file '%s': %s\n", script_path, strerror(errno)); /* If we filled the buffer, then probably another * process wrote more to the file since our stat call, * so we should try again. */ if (script->length == size) { free(script->buffer); script->buffer = NULL; script->length = 0; } if (close(fd)) die_perror("close"); } DEBUGP("read_script: %d bytes\n", script->length); } /* The public entry point for the script parser. Parses the * text script file with the given path name and fills in the script * object with the parsed representation. */ int parse_script(const struct config *config, struct script *script, struct invocation *callback_invocation) { /* This bison-generated parser is not multi-thread safe, so we * have a lock to prevent more than one thread using the * parser at the same time. This is useful in the wire server * context, where in general we may have more than one test * thread running at the same time. */ if (pthread_mutex_lock(&parser_mutex) != 0) die_perror("pthread_mutex_lock"); #if YYDEBUG yydebug = 1; #endif /* Now parse the script from our buffer. */ yyin = fmemopen(script->buffer, script->length, "r"); if (yyin == NULL) die_perror("fmemopen: parse error opening script buffer"); current_script_path = config->script_path; in_config = config; out_script = script; invocation = callback_invocation; /* We have to reset the line number here since the wire server * can do more than one yyparse(). */ yylineno = 1; int result = yyparse(); /* invoke bison-generated parser */ current_script_path = NULL; if (fclose(yyin)) die_perror("fclose: error closing script buffer"); /* Unlock parser. */ if (pthread_mutex_unlock(&parser_mutex) != 0) die_perror("pthread_mutex_unlock"); return result ? STATUS_ERR : STATUS_OK; } /* Bison emits code to call this method when there's a parse-time error. * We print the line number and the error message. */ static void yyerror(const char *message) { fprintf(stderr, "%s:%d: parse error at '%s': %s\n", current_script_path, yylineno, yytext, message); } /* After we finish parsing each line of a script, we analyze the * semantics of the line. If we encounter an error then we print the * error message to stderr and exit with an error. */ static void semantic_error(const char* message) { assert(current_script_line >= 0); die("%s:%d: semantic error: %s\n", current_script_path, current_script_line, message); } /* This standard callback is invoked by flex when it encounters * the end of a file. We return 1 to tell flex to return EOF. */ int yywrap(void) { return 1; } /* Create and initalize a new expression. */ static struct expression *new_expression(enum expression_t type) { struct expression *expression = calloc(1, sizeof(struct expression)); expression->type = type; return expression; } /* Create and initalize a new integer expression with the given * literal value and format string. */ static struct expression *new_integer_expression(s64 num, const char *format) { struct expression *expression = new_expression(EXPR_INTEGER); expression->value.num = num; expression->format = format; return expression; } /* Create and initalize a new one-element expression_list. */ static struct expression_list *new_expression_list( struct expression *expression) { struct expression_list *list; list = calloc(1, sizeof(struct expression_list)); list->expression = expression; list->next = NULL; return list; } /* Add the expression to the end of the list. */ static void expression_list_append(struct expression_list *list, struct expression *expression) { while (list->next != NULL) { list = list->next; } list->next = new_expression_list(expression); } /* Create and initialize a new option. */ static struct option_list *new_option(char *name, char *value) { struct option_list *opt = calloc(1, sizeof(struct option_list)); opt->name = name; opt->value = value; return opt; } /* Create and initialize a new event. */ static struct event *new_event(enum event_t type) { struct event *e = calloc(1, sizeof(struct event)); e->type = type; e->time_usecs_end = NO_TIME_RANGE; e->offset_usecs = NO_TIME_RANGE; return e; } static int parse_hex_byte(const char *hex, u8 *byte) { if (!isxdigit((int)hex[0]) || !isxdigit((int)hex[1])) { return STATUS_ERR; /* need two hex digits per byte */ } char buf[] = { hex[0], hex[1], '\0' }; char* buf_end = NULL; u32 byte_value = strtoul(buf, &buf_end, 16); assert(byte_value <= 0xff); assert(buf_end == buf + 2); *byte = byte_value; return STATUS_OK; } /* Converts a hex string in 'hex' into bytes and stores them in a * buffer 'buf' of length 'buf_len' bytes; returns number of bytes in * out_len. Works for hex strings of arbitrary size, such as very long * TCP Fast Open cookies. */ static int parse_hex_string(const char *hex, u8 *buf, int buf_len, int *out_len) { u8 *out = buf; u8 *buf_end = buf + buf_len; while (hex[0] != '\0') { if (out >= buf_end) { return STATUS_ERR; /* ran out of output space */ } if (parse_hex_byte(hex, out)) return STATUS_ERR; /* bad character */ hex += 2; out += 1; } *out_len = out - buf; assert(*out_len <= buf_len); return STATUS_OK; } static struct tcp_option *new_tcp_fast_open_option(const char *cookie_string, char **error) { int cookie_string_len = strlen(cookie_string); if (cookie_string_len & 1) { asprintf(error, "TCP fast open cookie has an odd number of digits"); return NULL; } int cookie_bytes = cookie_string_len / 2; /* 2 hex chars per byte */ if (cookie_bytes > MAX_TCP_FAST_OPEN_COOKIE_BYTES) { asprintf(error, "TCP fast open cookie too long"); asprintf(error, "TCP fast open cookie of %d bytes " "exceeds maximum cookie length of %d bytes", cookie_bytes, MAX_TCP_FAST_OPEN_COOKIE_BYTES); return NULL; } u8 option_bytes = TCPOLEN_EXP_FASTOPEN_BASE + cookie_bytes; struct tcp_option *option; option = tcp_option_new(TCPOPT_EXP, option_bytes); option->data.fast_open.magic = htons(TCPOPT_FASTOPEN_MAGIC); int parsed_bytes = 0; /* Parse cookie. This should be an ASCII hex string * representing an even number of bytes (4-16 bytes). But we * do not enforce this, since we want to allow test cases that * supply invalid cookies. */ if (parse_hex_string(cookie_string, option->data.fast_open.cookie, sizeof(option->data.fast_open.cookie), &parsed_bytes)) { free(option); asprintf(error, "TCP fast open cookie is not a valid hex string"); return NULL; } assert(parsed_bytes == cookie_bytes); return option; } %} %locations %expect 1 /* we expect a shift/reduce conflict for the | binary expression */ /* The %union section specifies the set of possible types for values * for all nonterminal and terminal symbols in the grammar. */ %union { s64 integer; double floating; char *string; char *reserved; s64 time_usecs; enum direction_t direction; enum ip_ecn_t ip_ecn; struct mpls_stack *mpls_stack; struct mpls mpls_stack_entry; u16 port; s32 window; u32 sequence_number; struct { int protocol; /* IPPROTO_TCP or IPPROTO_UDP */ u32 start_sequence; u16 payload_bytes; } tcp_sequence_info; struct option_list *option; struct event *event; struct packet *packet; struct syscall_spec *syscall; struct command_spec *command; struct code_spec *code; struct tcp_option *tcp_option; struct tcp_options *tcp_options; struct expression *expression; struct expression_list *expression_list; struct errno_spec *errno_info; } /* The specific type of the output for a symbol is given by the %type * directive. By convention terminal symbols returned from the lexer * have ALL_CAPS names, and nonterminal symbols have lower_case names. */ %token ELLIPSIS %token SA_FAMILY SIN_PORT SIN_ADDR _HTONS_ INET_ADDR %token MSG_NAME MSG_IOV MSG_FLAGS %token FD EVENTS REVENTS ONOFF LINGER %token ACK ECR EOL MSS NOP SACK SACKOK TIMESTAMP VAL WIN WSCALE PRO %token FAST_OPEN %token ECT0 ECT1 CE ECT01 NO_ECN %token IPV4 IPV6 ICMP UDP GRE MTU %token MPLS LABEL TC TTL %token OPTION %token FLOAT %token INTEGER HEX_INTEGER %token WORD STRING BACK_QUOTED CODE IPV4_ADDR IPV6_ADDR %type direction %type opt_ip_info %type ip_ecn %type