#!/usr/bin/env python3 # Wireshark - Network traffic analyzer # By Gerald Combs # Copyright 1998 Gerald Combs # # SPDX-License-Identifier: GPL-2.0-or-later # Scan dissectors for calls to col_[set|add|append]_[f]str # to check that most appropriate API is being used import os import re import subprocess import argparse import signal # Try to exit soon after Ctrl-C is pressed. should_exit = False def signal_handler(sig, frame): global should_exit should_exit = True print('You pressed Ctrl+C - exiting') signal.signal(signal.SIGINT, signal_handler) # Test for whether the given file was automatically generated. def isGeneratedFile(filename): # Check file exists - e.g. may have been deleted in a recent commit. if not os.path.exists(filename): return False # Open file f_read = open(os.path.join(filename), 'r', encoding="utf8") lines_tested = 0 for line in f_read: # The comment to say that its generated is near the top, so give up once # get a few lines down. if lines_tested > 10: f_read.close() return False if (line.find('Generated automatically') != -1 or line.find('Generated Automatically') != -1 or line.find('Autogenerated from') != -1 or line.find('is autogenerated') != -1 or line.find('automatically generated by Pidl') != -1 or line.find('Created by: The Qt Meta Object Compiler') != -1 or line.find('This file was generated') != -1 or line.find('This filter was automatically generated') != -1 or line.find('This file is auto generated, do not edit!') != -1 or line.find('This file is auto generated') != -1): f_read.close() return True lines_tested = lines_tested + 1 # OK, looks like a hand-written file! f_read.close() return False def removeComments(code_string): code_string = re.sub(re.compile(r"/\*.*?\*/",re.DOTALL ) ,"" ,code_string) # C-style comment code_string = re.sub(re.compile(r"//.*?\n" ) ,"" ,code_string) # C++-style comment return code_string def is_dissector_file(filename): p = re.compile(r'.*(packet|file)-.*\.c') return p.match(filename) def findDissectorFilesInFolder(folder, recursive=False): dissector_files = [] if recursive: for root, subfolders, files in os.walk(folder): for f in files: if should_exit: return f = os.path.join(root, f) dissector_files.append(f) else: for f in sorted(os.listdir(folder)): if should_exit: return filename = os.path.join(folder, f) dissector_files.append(filename) return [x for x in filter(is_dissector_file, dissector_files)] warnings_found = 0 errors_found = 0 class ColCall: def __init__(self, file, line_number, name, last_args, generated, verbose): self.filename = file self.line_number = line_number self.name = name self.last_args = last_args self.generated = generated self.verbose = verbose def issue_prefix(self): generated = '(GENERATED) ' if self.generated else '' return self.filename + ':' + generated + str(self.line_number) + ' : called ' + self.name + ' with ' + self.last_args def check(self): global warnings_found self.last_args = self.last_args.replace('\\\"', "'") self.last_args = self.last_args.strip() # Empty string never a good idea if self.last_args == r'""': if self.name.find('append') == -1: print('Warning:', self.issue_prefix(), '- if want to clear column, use col_clear() instead') warnings_found += 1 else: # TODO: pointless if appending, but unlikely to see pass # This is never a good idea.. if self.last_args.startswith(r'"%s"'): print('Warning:', self.issue_prefix(), " - don't need fstr API?") warnings_found += 1 # String should be static, or at least persist if self.name == 'col_set_str': # Literal strings are safe, as well as some other patterns.. if self.last_args.startswith('"'): return elif self.last_args.startswith('val_to_str_const') or self.last_args.startswith('val_to_str_ext_const'): return # TODO: substitute macros to avoid some special cases.. elif self.last_args.startswith('PSNAME') or self.last_args.startswith('PNAME') or self.last_args.startswith('PROTO_SHORT_NAME'): return # TODO; match ternary test with both outcomes being literal strings? else: if self.verbose: # Not easy/possible to judge lifetime of string.. print('Note:', self.issue_prefix(), '- is this OK??') if self.name == 'col_add_str': # If literal string, could have used col_set_str instead? self.last_args = self.last_args.replace('\\\"', "'") self.last_args = self.last_args.strip() if self.last_args.startswith('"'): print('Warning:', self.issue_prefix(), '- could call col_set_str() instead') warnings_found += 1 elif self.last_args.startswith('val_to_str_const'): print('Warning:', self.issue_prefix(), '- const so could use col_set_str() instead') warnings_found += 1 elif self.last_args.startswith('val_to_str_ext_const'): print('Warning:', self.issue_prefix(), '- const so could use col_set_str() instead') warnings_found += 1 if self.name == 'col_append_str': pass if self.name == 'col_add_fstr' or self.name == 'col_append_fstr': # Look at format string self.last_args = self.last_args.replace('\\\"', "'") m = re.search(r'"(.*?)"', self.last_args) if m: # Should contain at least one format specifier! format_string = m.group(1) if format_string.find('%') == -1: print('Warning:', self.issue_prefix(), 'with no format specifiers - "' + format_string + '" - use str() version instead') warnings_found += 1 # Check the given dissector file. def checkFile(filename, generated, verbose=False): global warnings_found global errors_found # Check file exists - e.g. may have been deleted in a recent commit. if not os.path.exists(filename): print(filename, 'does not exist!') return with open(filename, 'r', encoding="utf8") as f: full_contents = f.read() # Remove comments so as not to trip up RE. contents = removeComments(full_contents) # Look for all calls in this file matches = re.finditer(r'(col_set_str|col_add_str|col_add_fstr|col_append_str|col_append_fstr)\((.*?)\)\s*\;', contents, re.MULTILINE|re.DOTALL) col_calls = [] last_line_number = 1 last_char_offset = 0 for m in matches: args = m.group(2) line_number = -1 # May fail to find there were comments inside call... # Make search partial to: # - avoid finding an earlier identical call # - speed up searching by making it shorter remaining_lines_text = full_contents[last_char_offset:] match_offset = remaining_lines_text.find(m.group(0)) if match_offset != -1: match_in_lines = len(remaining_lines_text[0:match_offset].splitlines()) line_number = last_line_number + match_in_lines-1 last_line_number = line_number last_char_offset += match_offset + 1 # enough to not match again # Match first 2 args plus remainer args_m = re.match(r'(.*?),\s*(.*?),\s*(.*)', args) if args_m: col_calls.append(ColCall(filename, line_number, m.group(1), last_args=args_m.group(3), generated=generated, verbose=verbose)) # Check them all for call in col_calls: call.check() ################################################################# # Main logic. # command-line args. Controls which dissector files should be checked. # If no args given, will scan all dissectors. parser = argparse.ArgumentParser(description='Check calls in dissectors') parser.add_argument('--file', action='append', help='specify individual dissector file to test') parser.add_argument('--commits', action='store', help='last N commits to check') parser.add_argument('--open', action='store_true', help='check open files') parser.add_argument('--verbose', action='store_true', help='show extra info') args = parser.parse_args() # Get files from wherever command-line args indicate. files = [] if args.file: # Add specified file(s) for f in args.file: if not os.path.isfile(f) and not f.startswith('epan'): f = os.path.join('epan', 'dissectors', f) if not os.path.isfile(f): print('Chosen file', f, 'does not exist.') exit(1) else: files.append(f) elif args.commits: # Get files affected by specified number of commits. command = ['git', 'diff', '--name-only', 'HEAD~' + args.commits] files = [f.decode('utf-8') for f in subprocess.check_output(command).splitlines()] # Will examine dissector files only files = list(filter(lambda f : is_dissector_file(f), files)) elif args.open: # Unstaged changes. command = ['git', 'diff', '--name-only'] files = [f.decode('utf-8') for f in subprocess.check_output(command).splitlines()] # Only interested in dissector files. files = list(filter(lambda f : is_dissector_file(f), files)) # Staged changes. command = ['git', 'diff', '--staged', '--name-only'] files_staged = [f.decode('utf-8') for f in subprocess.check_output(command).splitlines()] # Only interested in dissector files. files_staged = list(filter(lambda f : is_dissector_file(f), files_staged)) for f in files_staged: if f not in files: files.append(f) else: # Find all dissector files from folder. files = findDissectorFilesInFolder(os.path.join('epan', 'dissectors')) files += findDissectorFilesInFolder(os.path.join('plugins', 'epan'), recursive=True) files += findDissectorFilesInFolder(os.path.join('epan', 'dissectors', 'asn1'), recursive=True) # If scanning a subset of files, list them here. print('Examining:') if args.file or args.commits or args.open: if files: print(' '.join(files), '\n') else: print('No files to check.\n') else: print('All dissectors\n') # Now check the chosen files for f in files: if should_exit: exit(1) checkFile(f, isGeneratedFile(f), verbose=args.verbose) # Show summary. print(warnings_found, 'warnings found') if errors_found: print(errors_found, 'errors found') exit(1)