Ohjelma tunnistaa rahakkeita C-koodissa
Kuten tiedetään Leksinen analyysi on kääntäjän ensimmäinen vaihe, joka tunnetaan myös nimellä skanneri. Se muuntaa syöttöohjelman merkkijonoksi.
C-ohjelma koostuu erilaisista tokeneista ja merkki on joko avainsana, tunniste, vakio, merkkijonoliteraali tai symboli.
Esimerkiksi:
- Avainsanat: jonkin aikaa jos jne.
- Tunniste: Muuttujan nimi funktion nimi jne.
- Operaattorit: '+' '++' '-' jne.
- Erottimet: ' ' ';' jne
Esimerkki: Syöttöä varten 'int a = b + 1c;' se tunnistaa 'int' avainsanana 'a' tunnisteena '=' operaattorina jne.
Lähestymistapa:
- Ideana on hajottaa syötemerkkijono (C-koodi) tunnisteiksi, kuten avainsanoja tunnisteisiin, operaattoreihin kokonaislukuihin ja reaalilukuihin.
- Se tarkistaa jokaisen tunnuksen aputoimintojen avulla (
isKeywordisIntegervalidIdentifierjne.) ja tulostaa sen kategorian.
Alla on ohjelma, joka tulostaa kaikki avainsanat literaalit kelvolliset tunnisteet virheelliset tunnisteet kokonaisluku reaaliluku tietyssä C-koodissa:
C++ #include #include #include #include // Returns 'true' if the character is a DELIMITER. bool isDelimiter ( char ch ) { if ( ch == ' ' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '' || ch == ';' || ch == '>' || ch == ' <' || ch == '=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}' ) return ( true ); return ( false ); } // Returns 'true' if the character is an OPERATOR. bool isOperator ( char ch ) { if ( ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '>' || ch == ' <' || ch == '=' ) return ( true ); return ( false ); } // Returns 'true' if the string is a VALID IDENTIFIER. bool validIdentifier ( char * str ) { if ( str [ 0 ] == '0' || str [ 0 ] == '1' || str [ 0 ] == '2' || str [ 0 ] == '3' || str [ 0 ] == '4' || str [ 0 ] == '5' || str [ 0 ] == '6' || str [ 0 ] == '7' || str [ 0 ] == '8' || str [ 0 ] == '9' || isDelimiter ( str [ 0 ]) == true ) return ( false ); return ( true ); } // Returns 'true' if the string is a KEYWORD. bool isKeyword ( char * str ) { if ( ! strcmp ( str 'if' ) || ! strcmp ( str 'else' ) || ! strcmp ( str 'while' ) || ! strcmp ( str 'do' ) || ! strcmp ( str 'break' ) || ! strcmp ( str 'continue' ) || ! strcmp ( str 'int' ) || ! strcmp ( str 'double' ) || ! strcmp ( str 'float' ) || ! strcmp ( str 'return' ) || ! strcmp ( str 'char' ) || ! strcmp ( str 'case' ) || ! strcmp ( str 'char' ) || ! strcmp ( str 'sizeof' ) || ! strcmp ( str 'long' ) || ! strcmp ( str 'short' ) || ! strcmp ( str 'typedef' ) || ! strcmp ( str 'switch' ) || ! strcmp ( str 'unsigned' ) || ! strcmp ( str 'void' ) || ! strcmp ( str 'static' ) || ! strcmp ( str 'struct' ) || ! strcmp ( str 'goto' )) return ( true ); return ( false ); } // Returns 'true' if the string is an INTEGER. bool isInteger ( char * str ) { int i len = strlen ( str ); if ( len == 0 ) return ( false ); for ( i = 0 ; i < len ; i ++ ) { if ( str [ i ] != '0' && str [ i ] != '1' && str [ i ] != '2' && str [ i ] != '3' && str [ i ] != '4' && str [ i ] != '5' && str [ i ] != '6' && str [ i ] != '7' && str [ i ] != '8' && str [ i ] != '9' || ( str [ i ] == '-' && i > 0 )) return ( false ); } return ( true ); } // Returns 'true' if the string is a REAL NUMBER. bool isRealNumber ( char * str ) { int i len = strlen ( str ); bool hasDecimal = false ; if ( len == 0 ) return ( false ); for ( i = 0 ; i < len ; i ++ ) { if ( str [ i ] != '0' && str [ i ] != '1' && str [ i ] != '2' && str [ i ] != '3' && str [ i ] != '4' && str [ i ] != '5' && str [ i ] != '6' && str [ i ] != '7' && str [ i ] != '8' && str [ i ] != '9' && str [ i ] != '.' || ( str [ i ] == '-' && i > 0 )) return ( false ); if ( str [ i ] == '.' ) hasDecimal = true ; } return ( hasDecimal ); } // Extracts the SUBSTRING. char * subString ( char * str int left int right ) { int i ; char * subStr = ( char * ) malloc ( sizeof ( char ) * ( right - left + 2 )); for ( i = left ; i <= right ; i ++ ) subStr [ i - left ] = str [ i ]; subStr [ right - left + 1 ] = ' ' ; return ( subStr ); } // Parsing the input STRING. void parse ( char * str ) { int left = 0 right = 0 ; int len = strlen ( str ); while ( right <= len && left <= right ) { if ( isDelimiter ( str [ right ]) == false ) right ++ ; if ( isDelimiter ( str [ right ]) == true && left == right ) { if ( isOperator ( str [ right ]) == true ) printf ( ''%c' IS AN OPERATOR n ' str [ right ]); right ++ ; left = right ; } else if ( isDelimiter ( str [ right ]) == true && left != right || ( right == len && left != right )) { char * subStr = subString ( str left right - 1 ); if ( isKeyword ( subStr ) == true ) printf ( ''%s' IS A KEYWORD n ' subStr ); else if ( isInteger ( subStr ) == true ) printf ( ''%s' IS AN INTEGER n ' subStr ); else if ( isRealNumber ( subStr ) == true ) printf ( ''%s' IS A REAL NUMBER n ' subStr ); else if ( validIdentifier ( subStr ) == true && isDelimiter ( str [ right - 1 ]) == false ) printf ( ''%s' IS A VALID IDENTIFIER n ' subStr ); else if ( validIdentifier ( subStr ) == false && isDelimiter ( str [ right - 1 ]) == false ) printf ( ''%s' IS NOT A VALID IDENTIFIER n ' subStr ); left = right ; } } return ; } // DRIVER FUNCTION int main () { // maximum length of string is 100 here char str [ 100 ] = 'int a = b + 1c; ' ; parse ( str ); // calling the parse function return ( 0 ); }
C #include #include #include #include // Returns 'true' if the character is a DELIMITER. bool isDelimiter ( char ch ) { if ( ch == ' ' || ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '' || ch == ';' || ch == '>' || ch == ' <' || ch == '=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}' ) return ( true ); return ( false ); } // Returns 'true' if the character is an OPERATOR. bool isOperator ( char ch ) { if ( ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '>' || ch == ' <' || ch == '=' ) return ( true ); return ( false ); } // Returns 'true' if the string is a VALID IDENTIFIER. bool validIdentifier ( char * str ) { if ( str [ 0 ] == '0' || str [ 0 ] == '1' || str [ 0 ] == '2' || str [ 0 ] == '3' || str [ 0 ] == '4' || str [ 0 ] == '5' || str [ 0 ] == '6' || str [ 0 ] == '7' || str [ 0 ] == '8' || str [ 0 ] == '9' || isDelimiter ( str [ 0 ]) == true ) return ( false ); return ( true ); } // Returns 'true' if the string is a KEYWORD. bool isKeyword ( char * str ) { if ( ! strcmp ( str 'if' ) || ! strcmp ( str 'else' ) || ! strcmp ( str 'while' ) || ! strcmp ( str 'do' ) || ! strcmp ( str 'break' ) || ! strcmp ( str 'continue' ) || ! strcmp ( str 'int' ) || ! strcmp ( str 'double' ) || ! strcmp ( str 'float' ) || ! strcmp ( str 'return' ) || ! strcmp ( str 'char' ) || ! strcmp ( str 'case' ) || ! strcmp ( str 'char' ) || ! strcmp ( str 'sizeof' ) || ! strcmp ( str 'long' ) || ! strcmp ( str 'short' ) || ! strcmp ( str 'typedef' ) || ! strcmp ( str 'switch' ) || ! strcmp ( str 'unsigned' ) || ! strcmp ( str 'void' ) || ! strcmp ( str 'static' ) || ! strcmp ( str 'struct' ) || ! strcmp ( str 'goto' )) return ( true ); return ( false ); } // Returns 'true' if the string is an INTEGER. bool isInteger ( char * str ) { int i len = strlen ( str ); if ( len == 0 ) return ( false ); for ( i = 0 ; i < len ; i ++ ) { if ( str [ i ] != '0' && str [ i ] != '1' && str [ i ] != '2' && str [ i ] != '3' && str [ i ] != '4' && str [ i ] != '5' && str [ i ] != '6' && str [ i ] != '7' && str [ i ] != '8' && str [ i ] != '9' || ( str [ i ] == '-' && i > 0 )) return ( false ); } return ( true ); } // Returns 'true' if the string is a REAL NUMBER. bool isRealNumber ( char * str ) { int i len = strlen ( str ); bool hasDecimal = false ; if ( len == 0 ) return ( false ); for ( i = 0 ; i < len ; i ++ ) { if ( str [ i ] != '0' && str [ i ] != '1' && str [ i ] != '2' && str [ i ] != '3' && str [ i ] != '4' && str [ i ] != '5' && str [ i ] != '6' && str [ i ] != '7' && str [ i ] != '8' && str [ i ] != '9' && str [ i ] != '.' || ( str [ i ] == '-' && i > 0 )) return ( false ); if ( str [ i ] == '.' ) hasDecimal = true ; } return ( hasDecimal ); } // Extracts the SUBSTRING. char * subString ( char * str int left int right ) { int i ; char * subStr = ( char * ) malloc ( sizeof ( char ) * ( right - left + 2 )); for ( i = left ; i <= right ; i ++ ) subStr [ i - left ] = str [ i ]; subStr [ right - left + 1 ] = ' ' ; return ( subStr ); } // Parsing the input STRING. void parse ( char * str ) { int left = 0 right = 0 ; int len = strlen ( str ); while ( right <= len && left <= right ) { if ( isDelimiter ( str [ right ]) == false ) right ++ ; if ( isDelimiter ( str [ right ]) == true && left == right ) { if ( isOperator ( str [ right ]) == true ) printf ( ''%c' IS AN OPERATOR n ' str [ right ]); right ++ ; left = right ; } else if ( isDelimiter ( str [ right ]) == true && left != right || ( right == len && left != right )) { char * subStr = subString ( str left right - 1 ); if ( isKeyword ( subStr ) == true ) printf ( ''%s' IS A KEYWORD n ' subStr ); else if ( isInteger ( subStr ) == true ) printf ( ''%s' IS AN INTEGER n ' subStr ); else if ( isRealNumber ( subStr ) == true ) printf ( ''%s' IS A REAL NUMBER n ' subStr ); else if ( validIdentifier ( subStr ) == true && isDelimiter ( str [ right - 1 ]) == false ) printf ( ''%s' IS A VALID IDENTIFIER n ' subStr ); else if ( validIdentifier ( subStr ) == false && isDelimiter ( str [ right - 1 ]) == false ) printf ( ''%s' IS NOT A VALID IDENTIFIER n ' subStr ); left = right ; } } return ; } // DRIVER FUNCTION int main () { // maximum length of string is 100 here char str [ 100 ] = 'int a = b + 1c; ' ; parse ( str ); // calling the parse function return ( 0 ); }
Java import java.util.Arrays ; public class Parser { public static boolean isDelimiter ( char ch ) { return ' +-*/ ;> <=()[]{}' . indexOf ( ch ) != - 1 ; } public static boolean isOperator ( char ch ) { return '+-*/> <=' . indexOf ( ch ) != - 1 ; } public static boolean validIdentifier ( String str ) { if ( str . isEmpty () || Character . isDigit ( str . charAt ( 0 )) || isDelimiter ( str . charAt ( 0 ))) return false ; return true ; } public static boolean isKeyword ( String str ) { String [] keywords = { 'if' 'else' 'while' 'do' 'break' 'continue' 'int' 'double' 'float' 'return' 'char' 'case' 'sizeof' 'long' 'short' 'typedef' 'switch' 'unsigned' 'void' 'static' 'struct' 'goto' }; return Arrays . asList ( keywords ). contains ( str ); } public static boolean isInteger ( String str ) { if ( str . isEmpty ()) return false ; for ( int i = 0 ; i < str . length (); i ++ ) { if ( ! Character . isDigit ( str . charAt ( i )) && ! ( str . charAt ( i ) == '-' && i == 0 )) return false ; } return true ; } public static boolean isRealNumber ( String str ) { if ( str . isEmpty ()) return false ; boolean hasDecimal = false ; for ( int i = 0 ; i < str . length (); i ++ ) { if ( ! Character . isDigit ( str . charAt ( i )) && str . charAt ( i ) != '.' && ! ( str . charAt ( i ) == '-' && i == 0 )) return false ; if ( str . charAt ( i ) == '.' ) hasDecimal = true ; } return hasDecimal ; } public static String subString ( String str int left int right ) { return str . substring ( left right + 1 ); } public static void parse ( String str ) { int left = 0 right = 0 ; int len = str . length (); while ( right <= len && left <= right ) { if ( ! isDelimiter ( str . charAt ( right ))) right ++ ; if ( isDelimiter ( str . charAt ( right )) && left == right ) { if ( isOperator ( str . charAt ( right ))) System . out . println ( ''' + str . charAt ( right ) + '' IS AN OPERATOR' ); right ++ ; left = right ; } else if ( isDelimiter ( str . charAt ( right )) && left != right || ( right == len && left != right )) { String subStr = subString ( str left right - 1 ); if ( isKeyword ( subStr )) System . out . println ( ''' + subStr + '' IS A KEYWORD' ); else if ( isInteger ( subStr )) System . out . println ( ''' + subStr + '' IS AN INTEGER' ); else if ( isRealNumber ( subStr )) System . out . println ( ''' + subStr + '' IS A REAL NUMBER' ); else if ( validIdentifier ( subStr ) && ! isDelimiter ( str . charAt ( right - 1 ))) System . out . println ( ''' + subStr + '' IS A VALID IDENTIFIER' ); else if ( ! validIdentifier ( subStr ) && ! isDelimiter ( str . charAt ( right - 1 ))) System . out . println ( ''' + subStr + '' IS NOT A VALID IDENTIFIER' ); left = right ; } } } public static void main ( String [] args ) { String str = 'int a = b + 1c; ' ; parse ( str ); } }
Python def is_delimiter ( ch ): return ch in ' +-*/ ;> <=()[] {} ' def is_operator ( ch ): return ch in '+-*/> <=' def valid_identifier ( str ): if not str or str [ 0 ] . isdigit () or is_delimiter ( str [ 0 ]): return False return True def is_keyword ( str ): keywords = [ 'if' 'else' 'while' 'do' 'break' 'continue' 'int' 'double' 'float' 'return' 'char' 'case' 'sizeof' 'long' 'short' 'typedef' 'switch' 'unsigned' 'void' 'static' 'struct' 'goto' ] return str in keywords def is_integer ( str ): if not str : return False for i in range ( len ( str )): if not str [ i ] . isdigit () and not ( str [ i ] == '-' and i == 0 ): return False return True def is_real_number ( str ): if not str : return False has_decimal = False for i in range ( len ( str )): if not str [ i ] . isdigit () and str [ i ] != '.' and not ( str [ i ] == '-' and i == 0 ): return False if str [ i ] == '.' : has_decimal = True return has_decimal def sub_string ( str left right ): return str [ left : right + 1 ] def parse ( str ): left = 0 right = 0 len_str = len ( str ) while right <= len_str and left <= right : if not is_delimiter ( str [ right ]): right += 1 if is_delimiter ( str [ right ]) and left == right : if is_operator ( str [ right ]): print ( f '' { str [ right ] } ' IS AN OPERATOR' ) right += 1 left = right elif is_delimiter ( str [ right ]) and left != right or ( right == len_str and left != right ): sub_str = sub_string ( str left right - 1 ) if is_keyword ( sub_str ): print ( f '' { sub_str } ' IS A KEYWORD' ) elif is_integer ( sub_str ): print ( f '' { sub_str } ' IS AN INTEGER' ) elif is_real_number ( sub_str ): print ( f '' { sub_str } ' IS A REAL NUMBER' ) elif valid_identifier ( sub_str ) and not is_delimiter ( str [ right - 1 ]): print ( f '' { sub_str } ' IS A VALID IDENTIFIER' ) elif not valid_identifier ( sub_str ) and not is_delimiter ( str [ right - 1 ]): print ( f '' { sub_str } ' IS NOT A VALID IDENTIFIER' ) left = right if __name__ == '__main__' : str = 'int a = b + 1c; ' parse ( str )
Lähtö:
'int' IS A KEYWORD 'a' IS A VALID IDENTIFIER '=' IS AN OPERATOR 'b' IS A VALID IDENTIFIER '+' IS AN OPERATOR '1c' IS NOT A VALID IDENTIFIERLuo tietokilpailu