Created
March 29, 2024 23:07
-
-
Save paulresdat/c1c7ab8662965f83fcbbfe39874878ad to your computer and use it in GitHub Desktop.
An easy line count that has the ability to quickly add languages
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from enum import Enum | |
from os.path import abspath | |
from glob import glob | |
from io import TextIOWrapper | |
import pathlib | |
from typing import List, Pattern, AnyStr | |
import inspect | |
import re | |
from abc import ABCMeta, abstractmethod | |
# python3 line-count.py -h to get the list of commands | |
# Example use, default c# | |
# python3 line-count.py -d ./VSIProject/VirtualSwitchIndication | |
# Exclude some assemblies to just get the VSI project and related nugets | |
# specific to the VSI project (including VsiSignalR and VSICommon). | |
# -i added to include bracket characters on their own line: counts single "{", "}", "(", and ")" lines. | |
# python3 line-count.py -d ./VSIProject/VirtualSwitchIndication --lang c# --exclude AKRRCommon,AKRRItcm,AKRRAmqp -i | |
# for excluding other directories before counting you can always do the following, only shows the directories, doesn't process | |
# python3 line-count.py -d ./VSIProject/LINNTestEngine --lang python --show-dir | |
# then add the exclusions | |
# python3 line-count.py -d ./VSIProject/LINNTestEngine --lang python --exclude "site-packages,node_modules" | |
def main(): | |
args = parse_arguments() | |
langs = get_langs([args.lang]) | |
directory_path = abspath(args.directory) | |
directory = pathlib.Path(directory_path) | |
include_all = args.include_all_lines | |
exclude = [] | |
if args.exclude is not None: | |
data = str(args.exclude) | |
exclude.extend([x.strip() for x in data.split(",")]) | |
all_counts = {} | |
for lang in langs: | |
print("\nProcessing " + str(lang.lang_type())) | |
files = directory.rglob('*.' + lang.glob_file) | |
lang_exclusion = lang.excluded_directories() | |
lang_exclusion.extend(exclude) | |
file_match = [] | |
for f in files: | |
if matches_inclusion(f, lang_exclusion): | |
file_match.append(f) | |
if args.show_dir: | |
match_dir = [] | |
for f in file_match: | |
tmp = str(f).split("/") | |
match_dir.append("/".join(tmp[0:-1])) | |
match_dir = list(set(match_dir)) | |
print("\n".join(match_dir)) | |
# for debugging so we're skipping the processing | |
# only show directories that have files we're searching for | |
continue | |
fcount = 0 | |
count = 0 | |
for f in file_match: | |
fcount += 1 | |
print("Files: " + str(fcount), end="\r") | |
count += lang.line_count(open(f, "r"), include_all) | |
all_counts[lang.glob_file] = count | |
if not args.show_dir: | |
print("\n") | |
print("Total Counts") | |
for c in all_counts: | |
print("Lines in ." + c + " files: " + str(all_counts[c])) | |
def matches_inclusion(file: str, exclude: List[str]) -> bool: | |
for e in exclude: | |
if e in str(file): | |
return False | |
return True | |
def parse_arguments(): | |
parser = argparse.ArgumentParser(description="Line Counter") | |
parser.add_argument("--directory", "-d", action="store", help="Directory to point to") | |
parser.add_argument("--lang", "-l", action="store", type=CodeTypes, default=CodeTypes.csharp, | |
help="Parse the lines of a specific language: default c#") | |
parser.add_argument("--exclude", "-e", action="store", help="comma separated list of assembly names to exclude") | |
parser.add_argument("--include-all-lines", "-i", action="store_true", default=False, | |
help="include all lines except empty newlines") | |
parser.add_argument("--show-dir", "-sd", action="store_true", default=False, | |
help="Show the current list of directories that the files will be counted from") | |
return parser.parse_args() | |
class CodeTypes(str, Enum): | |
csharp = 'c#' | |
javascript = 'js' | |
typescript = 'ts' | |
python = 'python' | |
class ILanguage(metaclass=ABCMeta): | |
@classmethod | |
def __subclasshook__(cls, subclass: type) -> bool: | |
return ( | |
hasattr(subclass, 'line_count') and callable(subclass.line_count) | |
or NotImplemented | |
) | |
@abstractmethod | |
def line_count(self, file: TextIOWrapper, include_all: bool = False) -> int: | |
raise NotImplementedError | |
@abstractmethod | |
def excluded_directories(self) -> List[str]: | |
raise NotImplementedError | |
@abstractmethod | |
def lang_type(self) -> 'CodeTypes': | |
raise NotImplementedError | |
@abstractmethod | |
def glob_file(self) -> str: | |
raise NotImplementedError | |
class Language(ILanguage): | |
def __init__(self): | |
self.glob_file = None | |
self._excluded_dir = [] | |
self._lines_ignore = [] | |
self._ignore_regexes = [] | |
self._type: CodeTypes = None | |
def initialize(self): | |
if self.glob_file is None: | |
raise RuntimeError("Language does not have glob file variable set which is required to know what kind of files it should be searching") | |
self._ignore_regexes: List[Pattern[AnyStr@compile]] = [] | |
for r in self._lines_ignore: | |
self._ignore_regexes.append(re.compile(r)) | |
def __matches_exclusion(self, line: str): | |
for r in self._ignore_regexes: | |
if r.match(line): | |
return True | |
return False | |
def line_count(self, file: TextIOWrapper, include_all: bool = False) -> int: | |
count = 0 | |
lines = file.readlines() | |
lines = [x.strip() for x in lines if x.strip() != ""] | |
for line in lines: | |
if not include_all: | |
if not self.__matches_exclusion(line): | |
count += 1 | |
else: | |
count += 1 | |
return count | |
def excluded_directories(self) -> List[str]: | |
return self._excluded_dir | |
def lang_type(self) -> 'CodeTypes': | |
return self._type | |
def glob_file(self) -> str: | |
return self.glob_file | |
def get_langs(filter: List[CodeTypes] = None) -> List[ILanguage]: | |
subclasses: List[ILanguage] = [] | |
g = globals() | |
for n in g: | |
obj = g[n] | |
if inspect.isclass(obj) and (obj is not Language) and (Language in inspect.getmro(obj)): | |
cl: ILanguage = obj() | |
if filter is None: | |
subclasses.append(cl) | |
elif cl.lang_type() in filter: | |
subclasses.append(cl) | |
return subclasses | |
class JavaScript(Language): | |
def __init__(self): | |
super().__init__() | |
self._type = CodeTypes.javascript | |
self.glob_file = 'js' | |
self._excluded_dir = ['/bin/'] | |
self._lines_ignore = ['^({|}|\)|\()*;?$'] | |
self.initialize() | |
class Csharp(Language): | |
def __init__(self): | |
super().__init__() | |
self._type = CodeTypes.csharp | |
self.glob_file = 'cs' | |
self._excluded_dir = ['/obj/', '/bin/'] | |
self._lines_ignore = ['^({|}|\)|\()*;?$'] | |
self.initialize() | |
class Python(Language): | |
def __init__(self): | |
super().__init__() | |
self._type = CodeTypes.python | |
self.glob_file = 'py' | |
self._excluded_dir = [] | |
self._lines_ignore = [] | |
self.initialize() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment