C 코드에서 토큰을 감지하는 프로그램
알려진 바와 같이 어휘 분석 스캐너라고도 알려진 컴파일러의 첫 번째 단계입니다. 입력 프로그램을 토큰 시퀀스로 변환합니다.
C 프로그램은 다양한 토큰으로 구성되며 토큰은 키워드, 식별자, 상수, 문자열 리터럴 또는 기호입니다.
예를 들어:
- 키워드: for while if 등
- 식별자 : 변수명 함수명 등
- 연산자: '+' '++' '-' 등
- 구분 기호: ' ' ';' 등
예: 입력의 경우 'int a = b + 1c;' 그것은 식별할 것이다 'int' 키워드로 'a' 식별자로 '=' 운영자 등으로
접근하다 :
- 아이디어는 입력 문자열(C 코드)을 키워드 식별자 연산자 정수 및 실수와 같은 토큰으로 나누는 것입니다.
- 도우미 함수를 사용하여 각 토큰을 확인합니다(
isKeywordisIntegervalidIdentifier등) 해당 카테고리를 인쇄합니다.
다음은 주어진 C 코드에서 모든 키워드 리터럴 유효한 식별자 잘못된 식별자 정수 실수를 인쇄하는 프로그램입니다.
C++ #include #include #include #include // Returns 'true' if the character is a DELIMITER. bool isDelimiter ( char ch ) { if ( ch == ' ' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '' || ch == ';' || ch == '>' || ch == ' <' || ch == '=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}' ) return ( true ); return ( false ); } // Returns 'true' if the character is an OPERATOR. bool isOperator ( char ch ) { if ( ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '>' || ch == ' <' || ch == '=' ) return ( true ); return ( false ); } // Returns 'true' if the string is a VALID IDENTIFIER. bool validIdentifier ( char * str ) { if ( str [ 0 ] == '0' || str [ 0 ] == '1' || str [ 0 ] == '2' || str [ 0 ] == '3' || str [ 0 ] == '4' || str [ 0 ] == '5' || str [ 0 ] == '6' || str [ 0 ] == '7' || str [ 0 ] == '8' || str [ 0 ] == '9' || isDelimiter ( str [ 0 ]) == true ) return ( false ); return ( true ); } // Returns 'true' if the string is a KEYWORD. bool isKeyword ( char * str ) { if ( ! strcmp ( str 'if' ) || ! strcmp ( str 'else' ) || ! strcmp ( str 'while' ) || ! strcmp ( str 'do' ) || ! strcmp ( str 'break' ) || ! strcmp ( str 'continue' ) || ! strcmp ( str 'int' ) || ! strcmp ( str 'double' ) || ! strcmp ( str 'float' ) || ! strcmp ( str 'return' ) || ! strcmp ( str 'char' ) || ! strcmp ( str 'case' ) || ! strcmp ( str 'char' ) || ! strcmp ( str 'sizeof' ) || ! strcmp ( str 'long' ) || ! strcmp ( str 'short' ) || ! strcmp ( str 'typedef' ) || ! strcmp ( str 'switch' ) || ! strcmp ( str 'unsigned' ) || ! strcmp ( str 'void' ) || ! strcmp ( str 'static' ) || ! strcmp ( str 'struct' ) || ! strcmp ( str 'goto' )) return ( true ); return ( false ); } // Returns 'true' if the string is an INTEGER. bool isInteger ( char * str ) { int i len = strlen ( str ); if ( len == 0 ) return ( false ); for ( i = 0 ; i < len ; i ++ ) { if ( str [ i ] != '0' && str [ i ] != '1' && str [ i ] != '2' && str [ i ] != '3' && str [ i ] != '4' && str [ i ] != '5' && str [ i ] != '6' && str [ i ] != '7' && str [ i ] != '8' && str [ i ] != '9' || ( str [ i ] == '-' && i > 0 )) return ( false ); } return ( true ); } // Returns 'true' if the string is a REAL NUMBER. bool isRealNumber ( char * str ) { int i len = strlen ( str ); bool hasDecimal = false ; if ( len == 0 ) return ( false ); for ( i = 0 ; i < len ; i ++ ) { if ( str [ i ] != '0' && str [ i ] != '1' && str [ i ] != '2' && str [ i ] != '3' && str [ i ] != '4' && str [ i ] != '5' && str [ i ] != '6' && str [ i ] != '7' && str [ i ] != '8' && str [ i ] != '9' && str [ i ] != '.' || ( str [ i ] == '-' && i > 0 )) return ( false ); if ( str [ i ] == '.' ) hasDecimal = true ; } return ( hasDecimal ); } // Extracts the SUBSTRING. char * subString ( char * str int left int right ) { int i ; char * subStr = ( char * ) malloc ( sizeof ( char ) * ( right - left + 2 )); for ( i = left ; i <= right ; i ++ ) subStr [ i - left ] = str [ i ]; subStr [ right - left + 1 ] = ' ' ; return ( subStr ); } // Parsing the input STRING. void parse ( char * str ) { int left = 0 right = 0 ; int len = strlen ( str ); while ( right <= len && left <= right ) { if ( isDelimiter ( str [ right ]) == false ) right ++ ; if ( isDelimiter ( str [ right ]) == true && left == right ) { if ( isOperator ( str [ right ]) == true ) printf ( ''%c' IS AN OPERATOR n ' str [ right ]); right ++ ; left = right ; } else if ( isDelimiter ( str [ right ]) == true && left != right || ( right == len && left != right )) { char * subStr = subString ( str left right - 1 ); if ( isKeyword ( subStr ) == true ) printf ( ''%s' IS A KEYWORD n ' subStr ); else if ( isInteger ( subStr ) == true ) printf ( ''%s' IS AN INTEGER n ' subStr ); else if ( isRealNumber ( subStr ) == true ) printf ( ''%s' IS A REAL NUMBER n ' subStr ); else if ( validIdentifier ( subStr ) == true && isDelimiter ( str [ right - 1 ]) == false ) printf ( ''%s' IS A VALID IDENTIFIER n ' subStr ); else if ( validIdentifier ( subStr ) == false && isDelimiter ( str [ right - 1 ]) == false ) printf ( ''%s' IS NOT A VALID IDENTIFIER n ' subStr ); left = right ; } } return ; } // DRIVER FUNCTION int main () { // maximum length of string is 100 here char str [ 100 ] = 'int a = b + 1c; ' ; parse ( str ); // calling the parse function return ( 0 ); }
C #include #include #include #include // Returns 'true' if the character is a DELIMITER. bool isDelimiter ( char ch ) { if ( ch == ' ' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '' || ch == ';' || ch == '>' || ch == ' <' || ch == '=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}' ) return ( true ); return ( false ); } // Returns 'true' if the character is an OPERATOR. bool isOperator ( char ch ) { if ( ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '>' || ch == ' <' || ch == '=' ) return ( true ); return ( false ); } // Returns 'true' if the string is a VALID IDENTIFIER. bool validIdentifier ( char * str ) { if ( str [ 0 ] == '0' || str [ 0 ] == '1' || str [ 0 ] == '2' || str [ 0 ] == '3' || str [ 0 ] == '4' || str [ 0 ] == '5' || str [ 0 ] == '6' || str [ 0 ] == '7' || str [ 0 ] == '8' || str [ 0 ] == '9' || isDelimiter ( str [ 0 ]) == true ) return ( false ); return ( true ); } // Returns 'true' if the string is a KEYWORD. bool isKeyword ( char * str ) { if ( ! strcmp ( str 'if' ) || ! strcmp ( str 'else' ) || ! strcmp ( str 'while' ) || ! strcmp ( str 'do' ) || ! strcmp ( str 'break' ) || ! strcmp ( str 'continue' ) || ! strcmp ( str 'int' ) || ! strcmp ( str 'double' ) || ! strcmp ( str 'float' ) || ! strcmp ( str 'return' ) || ! strcmp ( str 'char' ) || ! strcmp ( str 'case' ) || ! strcmp ( str 'char' ) || ! strcmp ( str 'sizeof' ) || ! strcmp ( str 'long' ) || ! strcmp ( str 'short' ) || ! strcmp ( str 'typedef' ) || ! strcmp ( str 'switch' ) || ! strcmp ( str 'unsigned' ) || ! strcmp ( str 'void' ) || ! strcmp ( str 'static' ) || ! strcmp ( str 'struct' ) || ! strcmp ( str 'goto' )) return ( true ); return ( false ); } // Returns 'true' if the string is an INTEGER. bool isInteger ( char * str ) { int i len = strlen ( str ); if ( len == 0 ) return ( false ); for ( i = 0 ; i < len ; i ++ ) { if ( str [ i ] != '0' && str [ i ] != '1' && str [ i ] != '2' && str [ i ] != '3' && str [ i ] != '4' && str [ i ] != '5' && str [ i ] != '6' && str [ i ] != '7' && str [ i ] != '8' && str [ i ] != '9' || ( str [ i ] == '-' && i > 0 )) return ( false ); } return ( true ); } // Returns 'true' if the string is a REAL NUMBER. bool isRealNumber ( char * str ) { int i len = strlen ( str ); bool hasDecimal = false ; if ( len == 0 ) return ( false ); for ( i = 0 ; i < len ; i ++ ) { if ( str [ i ] != '0' && str [ i ] != '1' && str [ i ] != '2' && str [ i ] != '3' && str [ i ] != '4' && str [ i ] != '5' && str [ i ] != '6' && str [ i ] != '7' && str [ i ] != '8' && str [ i ] != '9' && str [ i ] != '.' || ( str [ i ] == '-' && i > 0 )) return ( false ); if ( str [ i ] == '.' ) hasDecimal = true ; } return ( hasDecimal ); } // Extracts the SUBSTRING. char * subString ( char * str int left int right ) { int i ; char * subStr = ( char * ) malloc ( sizeof ( char ) * ( right - left + 2 )); for ( i = left ; i <= right ; i ++ ) subStr [ i - left ] = str [ i ]; subStr [ right - left + 1 ] = ' ' ; return ( subStr ); } // Parsing the input STRING. void parse ( char * str ) { int left = 0 right = 0 ; int len = strlen ( str ); while ( right <= len && left <= right ) { if ( isDelimiter ( str [ right ]) == false ) right ++ ; if ( isDelimiter ( str [ right ]) == true && left == right ) { if ( isOperator ( str [ right ]) == true ) printf ( ''%c' IS AN OPERATOR n ' str [ right ]); right ++ ; left = right ; } else if ( isDelimiter ( str [ right ]) == true && left != right || ( right == len && left != right )) { char * subStr = subString ( str left right - 1 ); if ( isKeyword ( subStr ) == true ) printf ( ''%s' IS A KEYWORD n ' subStr ); else if ( isInteger ( subStr ) == true ) printf ( ''%s' IS AN INTEGER n ' subStr ); else if ( isRealNumber ( subStr ) == true ) printf ( ''%s' IS A REAL NUMBER n ' subStr ); else if ( validIdentifier ( subStr ) == true && isDelimiter ( str [ right - 1 ]) == false ) printf ( ''%s' IS A VALID IDENTIFIER n ' subStr ); else if ( validIdentifier ( subStr ) == false && isDelimiter ( str [ right - 1 ]) == false ) printf ( ''%s' IS NOT A VALID IDENTIFIER n ' subStr ); left = right ; } } return ; } // DRIVER FUNCTION int main () { // maximum length of string is 100 here char str [ 100 ] = 'int a = b + 1c; ' ; parse ( str ); // calling the parse function return ( 0 ); }
Java import java.util.Arrays ; public class Parser { public static boolean isDelimiter ( char ch ) { return ' +-*/ ;> <=()[]{}' . indexOf ( ch ) != - 1 ; } public static boolean isOperator ( char ch ) { return '+-*/> <=' . indexOf ( ch ) != - 1 ; } public static boolean validIdentifier ( String str ) { if ( str . isEmpty () || Character . isDigit ( str . charAt ( 0 )) || isDelimiter ( str . charAt ( 0 ))) return false ; return true ; } public static boolean isKeyword ( String str ) { String [] keywords = { 'if' 'else' 'while' 'do' 'break' 'continue' 'int' 'double' 'float' 'return' 'char' 'case' 'sizeof' 'long' 'short' 'typedef' 'switch' 'unsigned' 'void' 'static' 'struct' 'goto' }; return Arrays . asList ( keywords ). contains ( str ); } public static boolean isInteger ( String str ) { if ( str . isEmpty ()) return false ; for ( int i = 0 ; i < str . length (); i ++ ) { if ( ! Character . isDigit ( str . charAt ( i )) && ! ( str . charAt ( i ) == '-' && i == 0 )) return false ; } return true ; } public static boolean isRealNumber ( String str ) { if ( str . isEmpty ()) return false ; boolean hasDecimal = false ; for ( int i = 0 ; i < str . length (); i ++ ) { if ( ! Character . isDigit ( str . charAt ( i )) && str . charAt ( i ) != '.' && ! ( str . charAt ( i ) == '-' && i == 0 )) return false ; if ( str . charAt ( i ) == '.' ) hasDecimal = true ; } return hasDecimal ; } public static String subString ( String str int left int right ) { return str . substring ( left right + 1 ); } public static void parse ( String str ) { int left = 0 right = 0 ; int len = str . length (); while ( right <= len && left <= right ) { if ( ! isDelimiter ( str . charAt ( right ))) right ++ ; if ( isDelimiter ( str . charAt ( right )) && left == right ) { if ( isOperator ( str . charAt ( right ))) System . out . println ( ''' + str . charAt ( right ) + '' IS AN OPERATOR' ); right ++ ; left = right ; } else if ( isDelimiter ( str . charAt ( right )) && left != right || ( right == len && left != right )) { String subStr = subString ( str left right - 1 ); if ( isKeyword ( subStr )) System . out . println ( ''' + subStr + '' IS A KEYWORD' ); else if ( isInteger ( subStr )) System . out . println ( ''' + subStr + '' IS AN INTEGER' ); else if ( isRealNumber ( subStr )) System . out . println ( ''' + subStr + '' IS A REAL NUMBER' ); else if ( validIdentifier ( subStr ) && ! isDelimiter ( str . charAt ( right - 1 ))) System . out . println ( ''' + subStr + '' IS A VALID IDENTIFIER' ); else if ( ! validIdentifier ( subStr ) && ! isDelimiter ( str . charAt ( right - 1 ))) System . out . println ( ''' + subStr + '' IS NOT A VALID IDENTIFIER' ); left = right ; } } } public static void main ( String [] args ) { String str = 'int a = b + 1c; ' ; parse ( str ); } }
Python def is_delimiter ( ch ): return ch in ' +-*/ ;> <=()[] {} ' def is_operator ( ch ): return ch in '+-*/> <=' def valid_identifier ( str ): if not str or str [ 0 ] . isdigit () or is_delimiter ( str [ 0 ]): return False return True def is_keyword ( str ): keywords = [ 'if' 'else' 'while' 'do' 'break' 'continue' 'int' 'double' 'float' 'return' 'char' 'case' 'sizeof' 'long' 'short' 'typedef' 'switch' 'unsigned' 'void' 'static' 'struct' 'goto' ] return str in keywords def is_integer ( str ): if not str : return False for i in range ( len ( str )): if not str [ i ] . isdigit () and not ( str [ i ] == '-' and i == 0 ): return False return True def is_real_number ( str ): if not str : return False has_decimal = False for i in range ( len ( str )): if not str [ i ] . isdigit () and str [ i ] != '.' and not ( str [ i ] == '-' and i == 0 ): return False if str [ i ] == '.' : has_decimal = True return has_decimal def sub_string ( str left right ): return str [ left : right + 1 ] def parse ( str ): left = 0 right = 0 len_str = len ( str ) while right <= len_str and left <= right : if not is_delimiter ( str [ right ]): right += 1 if is_delimiter ( str [ right ]) and left == right : if is_operator ( str [ right ]): print ( f '' { str [ right ] } ' IS AN OPERATOR' ) right += 1 left = right elif is_delimiter ( str [ right ]) and left != right or ( right == len_str and left != right ): sub_str = sub_string ( str left right - 1 ) if is_keyword ( sub_str ): print ( f '' { sub_str } ' IS A KEYWORD' ) elif is_integer ( sub_str ): print ( f '' { sub_str } ' IS AN INTEGER' ) elif is_real_number ( sub_str ): print ( f '' { sub_str } ' IS A REAL NUMBER' ) elif valid_identifier ( sub_str ) and not is_delimiter ( str [ right - 1 ]): print ( f '' { sub_str } ' IS A VALID IDENTIFIER' ) elif not valid_identifier ( sub_str ) and not is_delimiter ( str [ right - 1 ]): print ( f '' { sub_str } ' IS NOT A VALID IDENTIFIER' ) left = right if __name__ == '__main__' : str = 'int a = b + 1c; ' parse ( str )
산출:
'int' IS A KEYWORD 'a' IS A VALID IDENTIFIER '=' IS AN OPERATOR 'b' IS A VALID IDENTIFIER '+' IS AN OPERATOR '1c' IS NOT A VALID IDENTIFIER퀴즈 만들기