Programme pour détecter les jetons dans un code C

Comme on sait que Analyse lexicale est la première phase du compilateur également connue sous le nom de scanner. Il convertit le programme d'entrée en une séquence de jetons. 
Un programme C se compose de divers jetons et un jeton est soit un mot-clé, un identifiant, une constante, une chaîne littérale ou un symbole.
Par exemple: 

  • Mots-clés : pendant tandis que si etc.
  • Identifiant : nom de la variable, nom de la fonction, etc.
  • Opérateurs : '+' '++' '-' etc.
  • Séparateurs : ' ' ';' etc.

Exemple : Pour la saisie 'int a = b + 1c;' il identifiera 'int' comme mot clé 'a' comme identifiant '=' en tant qu'opérateur, etc.

Approche :

  • L'idée est de diviser la chaîne d'entrée (un code C) en jetons tels que des mots-clés, des identifiants, des opérateurs, des nombres entiers et des nombres réels.
  • Il vérifie chaque jeton à l'aide de fonctions d'assistance ( isKeyword isInteger validIdentifier etc.) et imprime sa catégorie.

Ci-dessous se trouve un programme pour imprimer tous les mots-clés littéraux identifiants valides identifiants invalides nombre entier nombre réel dans un code C donné :

C++
   #include         #include         #include         #include         // Returns 'true' if the character is a DELIMITER.   bool     isDelimiter  (  char     ch  )   {      if     (  ch     ==     ' '     ||     ch     ==     '+'     ||     ch     ==     '-'     ||     ch     ==     '*'     ||         ch     ==     '/'     ||     ch     ==     ''     ||     ch     ==     ';'     ||     ch     ==     '>'     ||         ch     ==     ' <'     ||     ch     ==     '='     ||     ch     ==     '('     ||     ch     ==     ')'     ||         ch     ==     '['     ||     ch     ==     ']'     ||     ch     ==     '{'     ||     ch     ==     '}'  )      return     (  true  );      return     (  false  );   }   // Returns 'true' if the character is an OPERATOR.   bool     isOperator  (  char     ch  )   {      if     (  ch     ==     '+'     ||     ch     ==     '-'     ||     ch     ==     '*'     ||         ch     ==     '/'     ||     ch     ==     '>'     ||     ch     ==     ' <'     ||         ch     ==     '='  )      return     (  true  );      return     (  false  );   }   // Returns 'true' if the string is a VALID IDENTIFIER.   bool     validIdentifier  (  char  *     str  )   {      if     (  str  [  0  ]     ==     '0'     ||     str  [  0  ]     ==     '1'     ||     str  [  0  ]     ==     '2'     ||      str  [  0  ]     ==     '3'     ||     str  [  0  ]     ==     '4'     ||     str  [  0  ]     ==     '5'     ||         str  [  0  ]     ==     '6'     ||     str  [  0  ]     ==     '7'     ||     str  [  0  ]     ==     '8'     ||         str  [  0  ]     ==     '9'     ||     isDelimiter  (  str  [  0  ])     ==     true  )      return     (  false  );      return     (  true  );   }   // Returns 'true' if the string is a KEYWORD.   bool     isKeyword  (  char  *     str  )   {      if     (  !  strcmp  (  str       'if'  )     ||     !  strcmp  (  str       'else'  )     ||      !  strcmp  (  str       'while'  )     ||     !  strcmp  (  str       'do'  )     ||         !  strcmp  (  str       'break'  )     ||         !  strcmp  (  str       'continue'  )     ||     !  strcmp  (  str       'int'  )      ||     !  strcmp  (  str       'double'  )     ||     !  strcmp  (  str       'float'  )      ||     !  strcmp  (  str       'return'  )     ||     !  strcmp  (  str       'char'  )      ||     !  strcmp  (  str       'case'  )     ||     !  strcmp  (  str       'char'  )      ||     !  strcmp  (  str       'sizeof'  )     ||     !  strcmp  (  str       'long'  )      ||     !  strcmp  (  str       'short'  )     ||     !  strcmp  (  str       'typedef'  )      ||     !  strcmp  (  str       'switch'  )     ||     !  strcmp  (  str       'unsigned'  )      ||     !  strcmp  (  str       'void'  )     ||     !  strcmp  (  str       'static'  )      ||     !  strcmp  (  str       'struct'  )     ||     !  strcmp  (  str       'goto'  ))      return     (  true  );      return     (  false  );   }   // Returns 'true' if the string is an INTEGER.   bool     isInteger  (  char  *     str  )   {      int     i       len     =     strlen  (  str  );      if     (  len     ==     0  )      return     (  false  );      for     (  i     =     0  ;     i      <     len  ;     i  ++  )     {      if     (  str  [  i  ]     !=     '0'     &&     str  [  i  ]     !=     '1'     &&     str  [  i  ]     !=     '2'      &&     str  [  i  ]     !=     '3'     &&     str  [  i  ]     !=     '4'     &&     str  [  i  ]     !=     '5'      &&     str  [  i  ]     !=     '6'     &&     str  [  i  ]     !=     '7'     &&     str  [  i  ]     !=     '8'      &&     str  [  i  ]     !=     '9'     ||     (  str  [  i  ]     ==     '-'     &&     i     >     0  ))      return     (  false  );      }      return     (  true  );   }   // Returns 'true' if the string is a REAL NUMBER.   bool     isRealNumber  (  char  *     str  )   {      int     i       len     =     strlen  (  str  );      bool     hasDecimal     =     false  ;      if     (  len     ==     0  )      return     (  false  );      for     (  i     =     0  ;     i      <     len  ;     i  ++  )     {      if     (  str  [  i  ]     !=     '0'     &&     str  [  i  ]     !=     '1'     &&     str  [  i  ]     !=     '2'      &&     str  [  i  ]     !=     '3'     &&     str  [  i  ]     !=     '4'     &&     str  [  i  ]     !=     '5'      &&     str  [  i  ]     !=     '6'     &&     str  [  i  ]     !=     '7'     &&     str  [  i  ]     !=     '8'      &&     str  [  i  ]     !=     '9'     &&     str  [  i  ]     !=     '.'     ||         (  str  [  i  ]     ==     '-'     &&     i     >     0  ))      return     (  false  );      if     (  str  [  i  ]     ==     '.'  )      hasDecimal     =     true  ;      }      return     (  hasDecimal  );   }   // Extracts the SUBSTRING.   char  *     subString  (  char  *     str       int     left       int     right  )   {      int     i  ;      char  *     subStr     =     (  char  *  )  malloc  (      sizeof  (  char  )     *     (  right     -     left     +     2  ));      for     (  i     =     left  ;     i      <=     right  ;     i  ++  )      subStr  [  i     -     left  ]     =     str  [  i  ];      subStr  [  right     -     left     +     1  ]     =     ''  ;      return     (  subStr  );   }   // Parsing the input STRING.   void     parse  (  char  *     str  )   {      int     left     =     0       right     =     0  ;      int     len     =     strlen  (  str  );      while     (  right      <=     len     &&     left      <=     right  )     {      if     (  isDelimiter  (  str  [  right  ])     ==     false  )      right  ++  ;      if     (  isDelimiter  (  str  [  right  ])     ==     true     &&     left     ==     right  )     {      if     (  isOperator  (  str  [  right  ])     ==     true  )      printf  (  ''%c' IS AN OPERATOR  n  '       str  [  right  ]);      right  ++  ;      left     =     right  ;      }     else     if     (  isDelimiter  (  str  [  right  ])     ==     true     &&     left     !=     right      ||     (  right     ==     len     &&     left     !=     right  ))     {      char  *     subStr     =     subString  (  str       left       right     -     1  );      if     (  isKeyword  (  subStr  )     ==     true  )      printf  (  ''%s' IS A KEYWORD  n  '       subStr  );      else     if     (  isInteger  (  subStr  )     ==     true  )      printf  (  ''%s' IS AN INTEGER  n  '       subStr  );      else     if     (  isRealNumber  (  subStr  )     ==     true  )      printf  (  ''%s' IS A REAL NUMBER  n  '       subStr  );      else     if     (  validIdentifier  (  subStr  )     ==     true      &&     isDelimiter  (  str  [  right     -     1  ])     ==     false  )      printf  (  ''%s' IS A VALID IDENTIFIER  n  '       subStr  );      else     if     (  validIdentifier  (  subStr  )     ==     false      &&     isDelimiter  (  str  [  right     -     1  ])     ==     false  )      printf  (  ''%s' IS NOT A VALID IDENTIFIER  n  '       subStr  );      left     =     right  ;      }      }      return  ;   }   // DRIVER FUNCTION   int     main  ()   {      // maximum length of string is 100 here       char     str  [  100  ]     =     'int a = b + 1c; '  ;      parse  (  str  );     // calling the parse function      return     (  0  );   }   
C
   #include         #include         #include         #include         // Returns 'true' if the character is a DELIMITER.   bool     isDelimiter  (  char     ch  )   {      if     (  ch     ==     ' '     ||     ch     ==     '+'     ||     ch     ==     '-'     ||     ch     ==     '*'     ||         ch     ==     '/'     ||     ch     ==     ''     ||     ch     ==     ';'     ||     ch     ==     '>'     ||         ch     ==     ' <'     ||     ch     ==     '='     ||     ch     ==     '('     ||     ch     ==     ')'     ||         ch     ==     '['     ||     ch     ==     ']'     ||     ch     ==     '{'     ||     ch     ==     '}'  )      return     (  true  );      return     (  false  );   }   // Returns 'true' if the character is an OPERATOR.   bool     isOperator  (  char     ch  )   {      if     (  ch     ==     '+'     ||     ch     ==     '-'     ||     ch     ==     '*'     ||         ch     ==     '/'     ||     ch     ==     '>'     ||     ch     ==     ' <'     ||         ch     ==     '='  )      return     (  true  );      return     (  false  );   }   // Returns 'true' if the string is a VALID IDENTIFIER.   bool     validIdentifier  (  char  *     str  )   {      if     (  str  [  0  ]     ==     '0'     ||     str  [  0  ]     ==     '1'     ||     str  [  0  ]     ==     '2'     ||      str  [  0  ]     ==     '3'     ||     str  [  0  ]     ==     '4'     ||     str  [  0  ]     ==     '5'     ||         str  [  0  ]     ==     '6'     ||     str  [  0  ]     ==     '7'     ||     str  [  0  ]     ==     '8'     ||         str  [  0  ]     ==     '9'     ||     isDelimiter  (  str  [  0  ])     ==     true  )      return     (  false  );      return     (  true  );   }   // Returns 'true' if the string is a KEYWORD.   bool     isKeyword  (  char  *     str  )   {      if     (  !  strcmp  (  str       'if'  )     ||     !  strcmp  (  str       'else'  )     ||      !  strcmp  (  str       'while'  )     ||     !  strcmp  (  str       'do'  )     ||         !  strcmp  (  str       'break'  )     ||         !  strcmp  (  str       'continue'  )     ||     !  strcmp  (  str       'int'  )      ||     !  strcmp  (  str       'double'  )     ||     !  strcmp  (  str       'float'  )      ||     !  strcmp  (  str       'return'  )     ||     !  strcmp  (  str       'char'  )      ||     !  strcmp  (  str       'case'  )     ||     !  strcmp  (  str       'char'  )      ||     !  strcmp  (  str       'sizeof'  )     ||     !  strcmp  (  str       'long'  )      ||     !  strcmp  (  str       'short'  )     ||     !  strcmp  (  str       'typedef'  )      ||     !  strcmp  (  str       'switch'  )     ||     !  strcmp  (  str       'unsigned'  )      ||     !  strcmp  (  str       'void'  )     ||     !  strcmp  (  str       'static'  )      ||     !  strcmp  (  str       'struct'  )     ||     !  strcmp  (  str       'goto'  ))      return     (  true  );      return     (  false  );   }   // Returns 'true' if the string is an INTEGER.   bool     isInteger  (  char  *     str  )   {      int     i       len     =     strlen  (  str  );      if     (  len     ==     0  )      return     (  false  );      for     (  i     =     0  ;     i      <     len  ;     i  ++  )     {      if     (  str  [  i  ]     !=     '0'     &&     str  [  i  ]     !=     '1'     &&     str  [  i  ]     !=     '2'      &&     str  [  i  ]     !=     '3'     &&     str  [  i  ]     !=     '4'     &&     str  [  i  ]     !=     '5'      &&     str  [  i  ]     !=     '6'     &&     str  [  i  ]     !=     '7'     &&     str  [  i  ]     !=     '8'      &&     str  [  i  ]     !=     '9'     ||     (  str  [  i  ]     ==     '-'     &&     i     >     0  ))      return     (  false  );      }      return     (  true  );   }   // Returns 'true' if the string is a REAL NUMBER.   bool     isRealNumber  (  char  *     str  )   {      int     i       len     =     strlen  (  str  );      bool     hasDecimal     =     false  ;      if     (  len     ==     0  )      return     (  false  );      for     (  i     =     0  ;     i      <     len  ;     i  ++  )     {      if     (  str  [  i  ]     !=     '0'     &&     str  [  i  ]     !=     '1'     &&     str  [  i  ]     !=     '2'      &&     str  [  i  ]     !=     '3'     &&     str  [  i  ]     !=     '4'     &&     str  [  i  ]     !=     '5'      &&     str  [  i  ]     !=     '6'     &&     str  [  i  ]     !=     '7'     &&     str  [  i  ]     !=     '8'      &&     str  [  i  ]     !=     '9'     &&     str  [  i  ]     !=     '.'     ||         (  str  [  i  ]     ==     '-'     &&     i     >     0  ))      return     (  false  );      if     (  str  [  i  ]     ==     '.'  )      hasDecimal     =     true  ;      }      return     (  hasDecimal  );   }   // Extracts the SUBSTRING.   char  *     subString  (  char  *     str       int     left       int     right  )   {      int     i  ;      char  *     subStr     =     (  char  *  )  malloc  (      sizeof  (  char  )     *     (  right     -     left     +     2  ));      for     (  i     =     left  ;     i      <=     right  ;     i  ++  )      subStr  [  i     -     left  ]     =     str  [  i  ];      subStr  [  right     -     left     +     1  ]     =     ''  ;      return     (  subStr  );   }   // Parsing the input STRING.   void     parse  (  char  *     str  )   {      int     left     =     0       right     =     0  ;      int     len     =     strlen  (  str  );      while     (  right      <=     len     &&     left      <=     right  )     {      if     (  isDelimiter  (  str  [  right  ])     ==     false  )      right  ++  ;      if     (  isDelimiter  (  str  [  right  ])     ==     true     &&     left     ==     right  )     {      if     (  isOperator  (  str  [  right  ])     ==     true  )      printf  (  ''%c' IS AN OPERATOR  n  '       str  [  right  ]);      right  ++  ;      left     =     right  ;      }     else     if     (  isDelimiter  (  str  [  right  ])     ==     true     &&     left     !=     right      ||     (  right     ==     len     &&     left     !=     right  ))     {      char  *     subStr     =     subString  (  str       left       right     -     1  );      if     (  isKeyword  (  subStr  )     ==     true  )      printf  (  ''%s' IS A KEYWORD  n  '       subStr  );      else     if     (  isInteger  (  subStr  )     ==     true  )      printf  (  ''%s' IS AN INTEGER  n  '       subStr  );      else     if     (  isRealNumber  (  subStr  )     ==     true  )      printf  (  ''%s' IS A REAL NUMBER  n  '       subStr  );      else     if     (  validIdentifier  (  subStr  )     ==     true      &&     isDelimiter  (  str  [  right     -     1  ])     ==     false  )      printf  (  ''%s' IS A VALID IDENTIFIER  n  '       subStr  );      else     if     (  validIdentifier  (  subStr  )     ==     false      &&     isDelimiter  (  str  [  right     -     1  ])     ==     false  )      printf  (  ''%s' IS NOT A VALID IDENTIFIER  n  '       subStr  );      left     =     right  ;      }      }      return  ;   }   // DRIVER FUNCTION   int     main  ()   {      // maximum length of string is 100 here       char     str  [  100  ]     =     'int a = b + 1c; '  ;      parse  (  str  );     // calling the parse function      return     (  0  );   }   
Java
   import     java.util.Arrays  ;   public     class   Parser     {      public     static     boolean     isDelimiter  (  char     ch  )     {      return     ' +-*/ ;> <=()[]{}'  .  indexOf  (  ch  )     !=     -  1  ;      }      public     static     boolean     isOperator  (  char     ch  )     {      return     '+-*/> <='  .  indexOf  (  ch  )     !=     -  1  ;      }      public     static     boolean     validIdentifier  (  String     str  )     {      if     (  str  .  isEmpty  ()     ||     Character  .  isDigit  (  str  .  charAt  (  0  ))     ||     isDelimiter  (  str  .  charAt  (  0  )))      return     false  ;      return     true  ;      }      public     static     boolean     isKeyword  (  String     str  )     {      String  []     keywords     =     {     'if'       'else'       'while'       'do'       'break'       'continue'       'int'       'double'       'float'       'return'       'char'       'case'       'sizeof'       'long'       'short'       'typedef'       'switch'       'unsigned'       'void'       'static'       'struct'       'goto'     };      return     Arrays  .  asList  (  keywords  ).  contains  (  str  );      }      public     static     boolean     isInteger  (  String     str  )     {      if     (  str  .  isEmpty  ())      return     false  ;      for     (  int     i     =     0  ;     i      <     str  .  length  ();     i  ++  )     {      if     (  !  Character  .  isDigit  (  str  .  charAt  (  i  ))     &&     !  (  str  .  charAt  (  i  )     ==     '-'     &&     i     ==     0  ))      return     false  ;      }      return     true  ;      }      public     static     boolean     isRealNumber  (  String     str  )     {      if     (  str  .  isEmpty  ())      return     false  ;      boolean     hasDecimal     =     false  ;      for     (  int     i     =     0  ;     i      <     str  .  length  ();     i  ++  )     {      if     (  !  Character  .  isDigit  (  str  .  charAt  (  i  ))     &&     str  .  charAt  (  i  )     !=     '.'     &&     !  (  str  .  charAt  (  i  )     ==     '-'     &&     i     ==     0  ))      return     false  ;      if     (  str  .  charAt  (  i  )     ==     '.'  )      hasDecimal     =     true  ;      }      return     hasDecimal  ;      }      public     static     String     subString  (  String     str       int     left       int     right  )     {      return     str  .  substring  (  left       right     +     1  );      }      public     static     void     parse  (  String     str  )     {      int     left     =     0       right     =     0  ;      int     len     =     str  .  length  ();      while     (  right      <=     len     &&     left      <=     right  )     {      if     (  !  isDelimiter  (  str  .  charAt  (  right  )))      right  ++  ;      if     (  isDelimiter  (  str  .  charAt  (  right  ))     &&     left     ==     right  )     {      if     (  isOperator  (  str  .  charAt  (  right  )))      System  .  out  .  println  (  '''     +     str  .  charAt  (  right  )     +     '' IS AN OPERATOR'  );      right  ++  ;      left     =     right  ;      }     else     if     (  isDelimiter  (  str  .  charAt  (  right  ))     &&     left     !=     right     ||     (  right     ==     len     &&     left     !=     right  ))     {      String     subStr     =     subString  (  str       left       right     -     1  );      if     (  isKeyword  (  subStr  ))      System  .  out  .  println  (  '''     +     subStr     +     '' IS A KEYWORD'  );      else     if     (  isInteger  (  subStr  ))      System  .  out  .  println  (  '''     +     subStr     +     '' IS AN INTEGER'  );      else     if     (  isRealNumber  (  subStr  ))      System  .  out  .  println  (  '''     +     subStr     +     '' IS A REAL NUMBER'  );      else     if     (  validIdentifier  (  subStr  )     &&     !  isDelimiter  (  str  .  charAt  (  right     -     1  )))      System  .  out  .  println  (  '''     +     subStr     +     '' IS A VALID IDENTIFIER'  );      else     if     (  !  validIdentifier  (  subStr  )     &&     !  isDelimiter  (  str  .  charAt  (  right     -     1  )))      System  .  out  .  println  (  '''     +     subStr     +     '' IS NOT A VALID IDENTIFIER'  );      left     =     right  ;      }      }      }      public     static     void     main  (  String  []     args  )     {      String     str     =     'int a = b + 1c; '  ;      parse  (  str  );      }   }   
Python
   def   is_delimiter  (  ch  ):   return   ch   in   ' +-*/ ;> <=()[]  {}  '   def   is_operator  (  ch  ):   return   ch   in   '+-*/> <='   def   valid_identifier  (  str  ):   if   not   str   or   str  [  0  ]  .  isdigit  ()   or   is_delimiter  (  str  [  0  ]):   return   False   return   True   def   is_keyword  (  str  ):   keywords   =   [  'if'     'else'     'while'     'do'     'break'     'continue'     'int'     'double'     'float'     'return'     'char'     'case'     'sizeof'     'long'     'short'     'typedef'     'switch'     'unsigned'     'void'     'static'     'struct'     'goto'  ]   return   str   in   keywords   def   is_integer  (  str  ):   if   not   str  :   return   False   for   i   in   range  (  len  (  str  )):   if   not   str  [  i  ]  .  isdigit  ()   and   not   (  str  [  i  ]   ==   '-'   and   i   ==   0  ):   return   False   return   True   def   is_real_number  (  str  ):   if   not   str  :   return   False   has_decimal   =   False   for   i   in   range  (  len  (  str  )):   if   not   str  [  i  ]  .  isdigit  ()   and   str  [  i  ]   !=   '.'   and   not   (  str  [  i  ]   ==   '-'   and   i   ==   0  ):   return   False   if   str  [  i  ]   ==   '.'  :   has_decimal   =   True   return   has_decimal   def   sub_string  (  str     left     right  ):   return   str  [  left  :  right   +   1  ]   def   parse  (  str  ):   left   =   0   right   =   0   len_str   =   len  (  str  )   while   right    <=   len_str   and   left    <=   right  :   if   not   is_delimiter  (  str  [  right  ]):   right   +=   1   if   is_delimiter  (  str  [  right  ])   and   left   ==   right  :   if   is_operator  (  str  [  right  ]):   print  (  f  ''  {  str  [  right  ]  }  ' IS AN OPERATOR'  )   right   +=   1   left   =   right   elif   is_delimiter  (  str  [  right  ])   and   left   !=   right   or   (  right   ==   len_str   and   left   !=   right  ):   sub_str   =   sub_string  (  str     left     right   -   1  )   if   is_keyword  (  sub_str  ):   print  (  f  ''  {  sub_str  }  ' IS A KEYWORD'  )   elif   is_integer  (  sub_str  ):   print  (  f  ''  {  sub_str  }  ' IS AN INTEGER'  )   elif   is_real_number  (  sub_str  ):   print  (  f  ''  {  sub_str  }  ' IS A REAL NUMBER'  )   elif   valid_identifier  (  sub_str  )   and   not   is_delimiter  (  str  [  right   -   1  ]):   print  (  f  ''  {  sub_str  }  ' IS A VALID IDENTIFIER'  )   elif   not   valid_identifier  (  sub_str  )   and   not   is_delimiter  (  str  [  right   -   1  ]):   print  (  f  ''  {  sub_str  }  ' IS NOT A VALID IDENTIFIER'  )   left   =   right   if   __name__   ==   '__main__'  :   str   =   'int a = b + 1c; '   parse  (  str  )   

Sortir: 

 'int' IS A KEYWORD 'a' IS A VALID IDENTIFIER '=' IS AN OPERATOR 'b' IS A VALID IDENTIFIER '+' IS AN OPERATOR '1c' IS NOT A VALID IDENTIFIER  


 

Créer un quiz