-
Michael Tüxen authored
DATA[flgs=0x03 len=1016 tsn=0 ... This fixes https://github.com/nplab/packetdrill/issues/3
Michael Tüxen authoredDATA[flgs=0x03 len=1016 tsn=0 ... This fixes https://github.com/nplab/packetdrill/issues/3
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
lexer.l 7.42 KiB
%{
/*
* Copyright 2013 Google Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
/*
* Author: ncardwell@google.com (Neal Cardwell)
*
* This is the specification for the lexical scanner for the packetdrill
* script language. It is processed by the flex lexical scanner
* generator.
*
* For full documentation see: http://flex.sourceforge.net/manual/
*
* Here is a quick and dirty tutorial on flex:
*
* A flex lexical scanner specification is basically a list of rules,
* where each rule is a regular expressions for a lexical token to
* match, followed by a C fragment to execute when the scanner sees
* that pattern.
*
* The lexer feeds a stream of terminal symbols up to this parser,
* passing up a FOO token for each "return FOO" in the lexer spec. The
* lexer specifies what value to pass up to the parser by setting a
* yylval.fooval field, where fooval is a field in the %union in the
* .y file.
*
* TODO: detect overflow in numeric literals.
*/
#include "types.h"
#include <netinet/in.h>
#include <stdlib.h>
#include <stdio.h>
#include "script.h"
#include "tcp_options.h"
/* This include of the bison-generated .h file must go last so that we
* can first include all of the declarations on which it depends.
*/
#include "parser.h"
/* Suppress flex's generation of an uncalled static input() function, which
* leads to a compiler warning:
* warning: ‘input’ defined but not used
*/
#define YY_NO_INPUT
/* Copy the string name "foo" after the "--" of a "--foo" option. */
static char *option(const char *s)
{
const int dash_dash_len = 2;
return strndup(s + dash_dash_len, strlen(s) - dash_dash_len);
}
/* Copy the string inside a quoted string. */
static char *quoted(const char *s)
{
const int delim_len = 1;
return strndup(s + delim_len, strlen(s) - 2*delim_len);
}
/* Copy the code inside a code snippet that is enclosed in %{ }% after
* first stripping the space and tab characters from either end of the
* snippet. We strip leading and trailing whitespace for Python users
* to remain sane, since Python is sensitive to whitespace. To summarize,
* given an input %{<space><code><space>}% we return: <code>
*/
static char *code(const char *s)
{
const int delim_len = sizeof("%{")-1;
const char *start = s + delim_len;
while ((*start == ' ') || (*start == '\t'))
++start;
const char *end = s + (strlen(s) - 1) - delim_len;
while ((*end == ' ') || (*end == '\t'))
--end;
const int code_len = end - start + 1;
return strndup(start, code_len);
}
/* Convert a hex string prefixed by "0x" to an integer value. */
static s64 hextol(const char *s)
{
return strtol(yytext + 2, NULL, 16);
}
%}
%{
#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno;
%}
%option yylineno
%option nounput
/* A regexp for C++ comments: */
cpp_comment \/\/[^\n]*\n
/* Here is a summary of the regexp for C comments:
* open-comment
* any number of:
* (non-stars) or (star then non-slash)
* close comment
*/
c_comment \/\*(([^*])|(\*[^\/]))*\*\/
/* The regexp for code snippets is analogous to that for C comments.
* Here is a summary of the regexp for code snippets:
* %{
* any number of:
* (non-}) or (} then non-%)
* }%
*/
code \%\{(([^}])|(\}[^\%]))*\}\%
/* IPv4: a regular experssion for an IPv4 address */
ipv4_addr [0-9]+[.][0-9]+[.][0-9]+[.][0-9]+
/* IPv6: a regular experssion for an IPv6 address. The complexity is
* unfortunate, but we can't use a super-simple approach because TCP
* sequence number ranges like 1:1001 can look like IPv6 addresses if
* we use a naive approach.
*/
seg [0-9a-fA-F]{1,4}
v0 [:][:]
v1 ({seg}[:]){7,7}{seg}
v2 ({seg}[:]){1,7}[:]
v3 ({seg}[:]){1,6}[:]{seg}
v4 ({seg}[:]){1,5}([:]{seg}){1,2}
v5 ({seg}[:]){1,4}([:]{seg}){1,3}
v6 ({seg}[:]){1,3}([:]{seg}){1,4}
v7 ({seg}[:]){1,2}([:]{seg}){1,5}
v8 {seg}[:](([:]{seg}){1,6})
v9 [:]([:]{seg}){1,7}
/* IPv4-mapped IPv6 address: */
v10 [:][:]ffff[:]{ipv4_addr}
/* IPv4-translated IPv6 address: */
v11 [:][:]ffff[:](0){1,4}[:]{ipv4_addr}
/* IPv4-embedded IPv6 addresses: */
v12 ({seg}[:]){1,4}[:]{ipv4_addr}
ipv6_addr ({v0}|{v1}|{v2}|{v3}|{v4}|{v5}|{v6}|{v7}|{v8}|{v9}|{v10}|{v11}|{v12})
%%
sa_family return SA_FAMILY;
sin_port return SIN_PORT;
sin_addr return SIN_ADDR;
msg_name return MSG_NAME;
msg_iov return MSG_IOV;
msg_flags return MSG_FLAGS;
fd return FD;
events return EVENTS;
revents return REVENTS;
onoff return ONOFF;
linger return LINGER;
htons return _HTONS_;
ipv4 return IPV4;
ipv6 return IPV6;
icmp return ICMP;
sctp return SCTP;
udp return UDP;
udplite return UDPLITE;
gre return GRE;
mpls return MPLS;
label return LABEL;
tc return TC;
ttl return TTL;
inet_addr return INET_ADDR;
ack return ACK;
eol return EOL;
ecr return ECR;
mss return MSS;
mtu return MTU;
nop return NOP;
sack return SACK;
sackOK return SACKOK;
TS return TIMESTAMP;
FO return FAST_OPEN;
val return VAL;
win return WIN;
wscale return WSCALE;
ect01 return ECT01;
ect0 return ECT0;
ect1 return ECT1;
noecn return NO_ECN;
ce return CE;
[.][.][.] return ELLIPSIS;
assoc_value return ASSOC_VALUE;
sack_delay return SACK_DELAY;
sack_freq return SACK_FREQ;
srto_initial return SRTO_INITIAL;
srto_max return SRTO_MAX;
srto_min return SRTO_MIN;
sinit_num_ostreams return SINIT_NUM_OSTREAMS;
sinit_max_instreams return SINIT_MAX_INSTREAMS;
sinit_max_attempts return SINIT_MAX_ATTEMPTS;
sinit_max_init_timeo return SINIT_MAX_INIT_TIMEO;
DATA return DATA;
INIT return INIT;
INIT_ACK return INIT_ACK;
SACK return SACK;
HEARTBEAT return HEARTBEAT;
HEARTBEAT_ACK return HEARTBEAT_ACK;
ABORT return ABORT;
SHUTDOWN return SHUTDOWN;
SHUTDOWN_ACK return SHUTDOWN_ACK;
ERROR return ERROR;
COOKIE_ECHO return COOKIE_ECHO;
COOKIE_ACK return COOKIE_ACK;
ECNE return ECNE;
CWR return CWR;
SHUTDOWN_COMPLETE return SHUTDOWN_COMPLETE;
flgs return FLAGS;
len return LEN;
tag return TAG;
a_rwnd return A_RWND;
is return IS;
os return OS;
tsn return TSN;
sid return SID;
ssn return SSN;
ppid return PPID;
gaps return GAPS;
dups return DUPS;
--[a-zA-Z0-9_]+ yylval.string = option(yytext); return OPTION;
[-]?[0-9]*[.][0-9]+ yylval.floating = atof(yytext); return FLOAT;
[-]?[0-9]+ yylval.integer = atoll(yytext); return INTEGER;
0x[0-9a-fA-F]+ yylval.integer = hextol(yytext); return HEX_INTEGER;
[a-zA-Z0-9_]+ yylval.string = strdup(yytext); return WORD;
\"(\\.|[^"])*\" yylval.string = quoted(yytext); return STRING;
\`(\\.|[^`])*\` yylval.string = quoted(yytext); return BACK_QUOTED;
[^ \t\n] return (int) yytext[0];
[ \t\n]+ /* ignore whitespace */;
{cpp_comment} /* ignore C++-style comment */;
{c_comment} /* ignore C-style comment */;
{code} yylval.string = code(yytext); return CODE;
{ipv4_addr} yylval.string = strdup(yytext); return IPV4_ADDR;
{ipv6_addr} yylval.string = strdup(yytext); return IPV6_ADDR;
%%