import re # regex # This will be used to capture a group () within a larger string, and save that group to a variable # Set regex pattern to match dcpattern = re.compile( r'DC=[-]?(\d*\.?\d*)V?' , flags=(re.IGNORECASE) ) #regex expression within 'raw' python string, to prevent interpretation/escaping (%f etc.). # The Regular Expression # DC=[-]?(\d*\.?\d*)V? # searches for exact text match # DC= # 0 or 1 instances (the '?') of the set of characters # - # Start a group to capture via () parentheses # This is how we extract the part of the string we're looking for. # Find any number (*) of decimals (\d), via # \d* # maybe (?) a period (maybe doesn't need the \ escaping?) # \.? # And more decimals (assume this just goes away if there's no decimal point) # This forms the 1st 'group' to capture, since it's enclosed in () # Here we captured only the numbers. # Maybe (?) has a # V # Ignored case in the whole expression, but I believe we could have also specified # [Dd][Cc] # and # [Vv] # (sets of characters that include both upper & lower case) to accomplish the same thing # perform the search: m = dcpattern.search( f1 ) # use regex pattern to extract DC value from filename (see above for regex definition, dcpat.compile() ) # m will contain any 'groups' () defined in the RegEx pattern. if m: Vdc = float( m.group(1) ) # grab 1st group from RegEx & convert to float print 'DC value found:', m.groups(), ' --> ', Vdc, '(V)' #groups() prints all captured groups # for example, if f1 = 'Iinj=1.0mA, Vdc=2.220V - 08 Oct 2013, 1130_28- Optical Spectrum.jpg' # after the search, we'd get: Vdc = 2.22 '''Other useful RegEx tokens . - any single character .* - any single character, any number of times (eg. any number of characters) + - like *, but can't be zero characters (only one or more) \d - a single decimal number ? - may or may not have the preceeding char, eg. 0? means maybe has a 0 \s - any whitespace (tab, space, newline etc.) Capture number with possible decimal point: (\d+\.?\d*) - One or more decimals (must include left-most 0 then, ie. ".045" won't match, only "0.045"), followed by Maybe a ".", followed by any number of (including none) decimals Match either or two words: /(?:wordone|wordtwo)/ (?: means group but don't capture (eg. not giving the group a name) '''