---------- Forwarded message --------- From: Scott Lorberbaum <ugasco...@gmail.com> Date: Sat, May 8, 2021 at 11:06 AM Subject: Re: c++ bison, flex, having yylex return parser::symbol_type and take in arguments (parser::semantic_type, parser::location_type) To: Akim Demaille <a...@lrde.epita.fr>
Akim , yes sorry I was in a rush yesterday and didn't post everything. Basically I'm trying to have yylex return a parser::symbol_type which is a full token, and I think its possible. Yet whichever options I put in, I get errors. If I use %define api.token.constructor, then the code outputs yylex() without arguments. ucc.tab.cpp:805:45: error: use of undeclared identifier 'lval' symbol_type yylookahead (yylex (lval)); ^ If I add in %lex-param{ uccParser::semantic_type* const lval } %lex-param{ uccParser::location_type* loc } ucc.tab.cpp:805:45: error: use of undeclared identifier 'lval' symbol_type yylookahead (yylex (lval, loc)); ^ ucc.tab.cpp:805:51: error: use of undeclared identifier 'loc' symbol_type yylookahead (yylex (lval, loc)); then yylex is called with arguments but then lval and loc are undefined. If i use %param instead of %lex-param for those arguments everything compiles but now the constructor for the parser class takes in those extra parameters of semantic_type* and location_type* which doesn't seem to be what is wanted. Usually it seems that if I don't have api.token.constructor and I comment out the param values then &yyla.value and &yyla.location are passed into yylex as needed but still uses a translate function that expects an integer. ucc.tab.cpp:1061:40: error: no viable conversion from 'uccParser::symbol_type' to 'int' yyla.kind_ = yytranslate_ (yylex (&yyla.value, &yyla.location)); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ucc.ypp:62:16: note: expanded from macro 'yylex' #define yylex compiler.lexer.yylex ^ ucc.tab.hpp:1910:47: note: passing argument to parameter 't' here static symbol_kind_type yytranslate_ (int t); AND it seems that if yylex is a parameter for yytranslate_ and I want yylex to return an symbol_type then won't that conflict with the parameter/argument of "int t;" for yytranslate_?? Here is the "lex.hpp" file that define the class with the virtual yylex. #ifndef _LEX_HPP #define _LEX_HPP 1 #include <string> #include <istream> #include <iostream> #if !defined(yyFlexLexerOnce) #include <FlexLexer.h> #endif //#include "Compiler.hpp" #include "ucc.tab.hpp" #include "location.hh" namespace ucc{ class uccLexer : public yyFlexLexer { public: // uccLexer(); uccLexer(std::istream* in, Compiler& compiler); // using FlexLexer::yylex; virtual uccParser::symbol_type yylex(uccParser::semantic_type* const lval, uccParser::location_type* location); virtual ~uccLexer(); private: uccParser::semantic_type* yylval = nullptr; uccParser::location_type* loc = nullptr; Compiler& compiler; std::string string_buf{""}; // std::string* string_buf_ptr = nullptr; }; } #endif Here is my lexer grammer file again in fulll.. I have commented out the original returning a parser::token:: and using yylval->emplace() with the make_ functions which i think are the way to go when returning the parser::symbol_type. %option c++ %{ #include <cstdint> #ifdef DEBUGON #ifndef DEBUG #define DEBUG #endif #endif #include "type.hpp" #include "cpptypes.hpp" #include "lex.hpp" #include "List.hpp" #include "symtab.hpp" #include "data.hpp" #undef YY_DECL #define YY_DECL uccParser::symbol_type uccLexer::yylex(uccParser::semantic_type* const lval, uccParser::location_type* location) #include "ucc.tab.hpp" #include "compiler.hpp" using namespace ucc; using token = uccParser::token; #define YY_USER_ACTION loc->step(); loc->columns(yyleng); %} %option stack %option yylineno %option yyclass="uccLexer" %s initial %x astring %x comment DIGIT [0-9] LETTER [A-Za-z\_] SPACE [ ] EOLN \\n PERCENT \% QUESTION \? NULLS \\0 BEGCOM "\/\*" ENDCOM "\*\/" ELIP "..." EQUEQU "==" NEQ "\!=" LEQ "<=" GEQ ">=" ADOF "&" %% %{ yylval = lval; %} <comment>{ [^*\n]* [^*\n]*\n ++compiler.Line_Number; \*+[^*/\n]* \*+[^*/\n]*\n ++compiler.Line_Number; {ENDCOM} { BEGIN(INITIAL);} <<EOF>> { compiler.error("eof terminated comment",""); BEGIN(INITIAL); } } <astring>{ \" { BEGIN(INITIAL); // yylval->emplace<ucc::StrConstant>(string_buf); // return(token::StrConstant); return uccParser::make_StrConstant(string_buf,*loc); } \n { BEGIN(INITIAL); ++compiler.Line_Number; compiler.error("unterminated string constant",""); } \\n { string_buf += "\\n"; } \\\" { string_buf += "\\\""; } \\[^n\\\"] { int c; BEGIN(INITIAL); compiler.error("illegal character:%s in string",yytext); while(((c = yyinput()) != '\"') && (c != EOF)){ ; } if(c == EOF){ compiler.error("eof terminated string",""); } } ((\\\\{SPACE}+)*|[&@$\%^/,.+'-\[\]{}();~`|=#\!:<>]*|{LETTER}|{DIGIT}|{PERCENT}|{SPACE}|{BEGCOM}|{ENDCOM}|{QUESTION})* { char *yptr = yytext; while((*yptr != (int)NULL) && *yptr){ string_buf += *yptr++; } } <<EOF>> { compiler.error("eof terminated string",""); BEGIN(INITIAL); } } void {/*keyword void*/ // return token::voidt; return uccParser::make_voidt(0,*loc); } if {/* keyword if*/ // return token::ift; return uccParser::make_ift(0,*loc); } return {/* keyword return*/ // return token::returnt; return uccParser::make_returnt(*loc); } else {/* keyword else*/ // return token::elset; return uccParser::make_elset(0,*loc); } while {/*keyword while*/ // return token::whilet; return uccParser::make_whilet(0,*loc); } int { /*keyword int*/ // yylval.ttype = type::INT; // return token::intt; return uccParser::make_intt(0,*loc); } float {/* keyword float*/ // yylval.ttype = type::FLOAT; // return token::floatt; return uccParser::make_floatt(0,*loc); } char { /* keyword char*/ // yylval.ttype = type::CHAR; // return token::chart; return uccParser::make_chart(0,*loc); } \( { // return token::lpar; return uccParser::make_lpar(0,*loc); } \) { // return token::rpar; return uccParser::make_rpar(0,*loc); } \{ { // return token::lcbra; return uccParser::make_lcbra(0,*loc); } \} { // return token::rcbra; return uccParser::make_rcbra(0,*loc); } \; { // return token::semi; return uccParser::make_semi(0,*loc); } \, { // return token::comma; return uccParser::make_comma(0,*loc); } \< { // yylval.value.relopvalue = reltype::LES; // return token::lesst; return uccParser::make_lesst(reltype::LES,*loc); } \> { // yylval.value.relopvalue = reltype::GRE; // return token::greatt; return uccParser::make_greatt(reltype::GRE,*loc); } \= { // return token::equalt; return uccParser::make_equalt(0,*loc); } \+ { // yylval.value.addopvalue = addtype::PLS; // return token::plus; return uccParser::make_plus(addtype::PLS,*loc); } \- { // yylval.value.addopvalue = addtype::MIN; // return token::minus; return uccParser::make_minus(addtype::MIN,*loc); } \/ { // yylval.value.multopvalue = multype::DIV; // return token::divide; return uccParser::make_divide(multype::DIV,*loc); } \* { // yylval.value.multopvalue = multype::MULT; // return token::star; return uccParser::make_star(multype::MULT,*loc); } {GEQ} { // yylval.value.relopvalue = reltype::GEQ; // return token::geq; return uccParser::make_geq(reltype::GEQ,*loc); } {LEQ} { // yylval.value.relopvalue = reltype::LEQ; // return token::leq; return uccParser::make_leq(reltype::LEQ,*loc); } {NEQ} { // yylval.value.eqopvalue = eqtype::NEQ; // return token::neq; return uccParser::make_neq(eqtype::NEQ,*loc); } {EQUEQU} { // yylval.value.eqopvalue = eqtype::EQEQ; // return token::equequ; return uccParser::make_equequ(eqtype::EQEQ,*loc); } {ADOF} { // return token::adof; return uccParser::make_adof(0,*loc); } {BEGCOM} { BEGIN(comment); } {LETTER}({LETTER}|{DIGIT})* {/* identifier*/ // yylval.ttype = type::STR; std::string tempstr(yytext); // yylval->emplace<std::string>(tempstr); return uccParser::make_Ident(tempstr,*loc); // return(token::Ident); } ((0|([1-9]{DIGIT}*)))(while|if|void|return|char|int|float|else|for|extern) { compiler.error("illegal token: %s", yytext); } ((0|([1-9]{DIGIT}*))(\.{DIGIT}+))(while|if|void|return|char|int|float|else|for|extern) { compiler.error("illegal token: %s",yytext); } (0|([1-9]{DIGIT}*))({LETTER}({LETTER}|{DIGIT})*) { /* bad form*/ compiler.error("illegal token: %s", yytext); } ((0|([1-9]{DIGIT}*))(\.{DIGIT}{DIGIT}*))({LETTER}({LETTER}|{DIGIT})*) {/*bad form*/ compiler.error("illegal token: %s",yytext); } (0|([1-9]{DIGIT}*))\.{DIGIT}{DIGIT}* {/*float*/ // yylval.ttype = type::FLOAT; // yylval->emplace<float>(std::atof(yytext)); // yylval.value.fvalue=(float)std::atof(yytext); return uccParser::make_FloatConstant((float)std::atof(yytext),*loc); // return(token::FloatConstant); } 0{DIGIT}+\.{DIGIT}+ {/* bad float*/ compiler.error("illegal float: %s",yytext); } 0{DIGIT}+ {/* bad integer*/ compiler.error("illegal integer: %s", yytext); } (0|([1-9]{DIGIT}*)) {/*integer constant*/ // yylval.ttype = type::INT; // yylval->emplace<int>(std::atoi(yytext)); // yylval.value.ivalue =(int)std::atoi(yytext); return uccParser::make_IntConstant((int)std::atoi(yytext),*loc); // return(token::IntConstant); } {ELIP} { // return token::elip; return uccParser::make_elip(0,*loc); } \" { string_buf.clear(); BEGIN(astring); } \n { ++compiler.Line_Number; } [ \t\r]+ { /*eat up tabs, whitespace */ } <<EOF>> { return uccParser::make_YYEOF(*loc); // return '\0'; } . { compiler.error("illegal character: %s",yytext); } %% I hope this helps. Let me know what else you may need. I added the lex class file via the lex.hpp, I showed use of the make_ in the lex grammar, and I've added some compiler errors that show up when using the various options. On Fri, May 7, 2021 at 10:56 PM Akim Demaille <a...@lrde.epita.fr> wrote: > Hi Scott, > > > Le 7 mai 2021 à 21:26, Scott Lorberbaum <ugasco...@gmail.com> a écrit : > > > > [...] > > i've played around with having the param turned on and either full > %param, > > or %lex %parse specific. I've turned on and off token.raw and > > token.constructor but it doesn't seem to work. > > the translate_ function still seems to want to accept an (int) from yylex > > but should it be used if api.token.raw is turned on? Maybe i'm thinking > the > > translate_ function is used differently. Let me know if you need more > code. > > Thank you in advance. > > AFAICT, you did not exactly state what your problem is. > > The fact that yytranslate still exists with token.raw is not > (expected to be) a problem: its implementation is then trivial. > > inline > parser::symbol_kind_type > parser::yytranslate_ (int t) > { > return static_cast<symbol_kind_type> (t); > } > > > Please show exactly your problem, for instance show the compiler > errors. And if that's something relating to the communication > between the scanner and the parser, it would be most useful to > include (i) bits of your scanner where you actually return a token, > and (ii) details about "#define yylex compiler.lexer.yylex". > > Cheers!