Created
April 9, 2014 07:13
-
-
Save leftspin/10234722 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'URI' | |
# string to be parsed | |
rawString = 'This is the http://sameurl.com and a http://sameurl.com and yet another http://sameurl.com. Regardless, here\'s https://google.com and how much the Amazon Fire (http://arstechnica.com/gadgets/2014/04/amazon-fire-tv-misses-the-same-marks-as-ouya-other-media-boxes/) sucks' | |
puts "-- ORIGINAL --\n#{rawString}" | |
# extract an array of URLs from rawString | |
urls = URI.extract(rawString) | |
# with each URL, construct a hash {location,length,replacementString} and add it to replacements | |
replacements = Array.new | |
workingString = String.new(str=rawString) | |
urls.each { |url| | |
location = workingString.index(url) | |
replacements.push({'location' => location, 'length' => url.length, 'replacementString' => "<a href=\"#{url}\">#{url}</a>"}) | |
# replace this string in the workingString with XXXXXXX's of the same length as the original detected URL so that if subsequent URLs are exactly the same, they won't be found again | |
xxxString = String.new | |
for charPos in 0..url.length-1 | |
xxxString << 'X' | |
end | |
workingString[location..(location+url.length)-1] = xxxString | |
} | |
# each time we replace a string in rawString, the length of rawString will change, and all locations we found in the last step past the first one will be wrong. We use the difference in length between each original URL and each replacementString to adjust the locations of the subsequent replacements | |
replacements.each_with_index { |replacement,index| | |
# don't have to adjust anything in the first replacement (index 0) | |
if index > 0 | |
# get the difference in length from previous replacement | |
previousReplacement = replacements[index-1] | |
diffLength = previousReplacement['replacementString'].length - previousReplacement['length'] | |
# adjust the current and all subsequent replacement locations | |
remainder = replacements[index..replacements.length-1] | |
remainder.each { |remainderReplacement| | |
remainderReplacement['location'] = remainderReplacement['location'] + diffLength | |
} | |
end | |
} | |
# perform each replacement | |
modifiedString = String.new(str=rawString) | |
replacements.each { |replacement| | |
theStart = replacement['location'] | |
theEnd = (theStart + replacement['length']) - 1 | |
modifiedString[theStart..theEnd] = replacement['replacementString'] | |
} | |
puts "-- MODIFIED --\n#{modifiedString}" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment