Last active
          September 25, 2020 05:10 
        
      - 
      
 - 
        
Save flovv/63e79a3149729b57d0397bb22a589856 to your computer and use it in GitHub Desktop.  
    scrapeGoogleImages_file1
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | var url ='https://www.google.de/search?q=Yahoo+logo&source=lnms&tbm=isch&sa=X'; | |
| var page = new WebPage() | |
| var fs = require('fs'); | |
| var vWidth = 1080; | |
| var vHeight = 1920; | |
| page.viewportSize = { | |
| width: vWidth , | |
| height: vHeight | |
| }; | |
| //Scroll throu! | |
| var s = 0; | |
| var sBase = page.evaluate(function () { return document.body.scrollHeight; }); | |
| page.scrollPosition = { | |
| top: sBase, | |
| left: 0 | |
| }; | |
| function sc() { | |
| var sBase2 = page.evaluate(function () { return document.body.scrollHeight; }); | |
| if (sBase2 != sBase) { | |
| sBase = sBase2; | |
| } | |
| if (s> sBase) { | |
| page.viewportSize = {width: vWidth, height: vHeight}; | |
| return; | |
| } | |
| page.scrollPosition = { | |
| top: s, | |
| left: 0 | |
| }; | |
| page.viewportSize = {width: vWidth, height: s}; | |
| s += Math.min(sBase/20,400); | |
| setTimeout(sc, 110); | |
| } | |
| function just_wait() { | |
| setTimeout(function() { | |
| fs.write('1.html', page.content, 'w'); | |
| phantom.exit(); | |
| }, 2500); | |
| } | |
| page.open(url, function (status) { | |
| sc(); | |
| just_wait(); | |
| }); | |
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | library(plyr) | |
| library(reshape2) | |
| require(rvest) | |
| scrapeJSSite <- function(searchTerm){ | |
| url <- paste0("https://www.google.de/search?q=",searchTerm, "&source=lnms&tbm=isch&sa=X") | |
| lines <- readLines("imageScrape.js") | |
| lines[1] <- paste0("var url ='", url ,"';") | |
| writeLines(lines, "imageScrape.js") | |
| ## Download website | |
| system("phantomjs imageScrape.js") | |
| pg <- read_html("1.html") | |
| files <- pg %>% html_nodes("img") %>% html_attr("src") | |
| df <- data.frame(images=files, search=searchTerm) | |
| return(df) | |
| } | |
| downloadImages <- function(files, brand, outPath="images"){ | |
| for(i in 1:length(files)){ | |
| download.file(files[i], destfile = paste0(outPath, "/", brand, "_", i, ".jpg"), mode = 'wb') | |
| } | |
| } | |
| ### exchange the search terms here! | |
| gg <- scrapeJSSite(searchTerm = "Adidas+logo") | |
| downloadImages(as.character(gg$images), i) | |
I run your code but it returns this error:
Error in paste0(outPath, "/", brand, "_", i, ".jpg") :
object 'i' not found
@andreaangeli, went good for me like this, hope it can help :
line 25 to 34 in  scrapeGoogleImages.r
`
#"outPath" has to be adapt !
downloadImages <- function(files, brand, outPath="D://scrape_images//brand"){
for(i in 1:length(files)){
download.file(files[i], destfile = paste0(outPath, "/", brand, "_", i, ".jpg"), mode = 'wb')
}
}
exchange the search terms here!
gg <- scrapeJSSite(searchTerm = "Hermes+logo")
downloadImages(as.character(gg$images), 'Hermes')
`
How can I download more than 20 images?
How to Download more than 20 images? Please help
Have you seen that Link: http://flovv.github.io/scrape_images_google/
I assume that you have to set the scroll parameter var s = 0;such that
phantom scrolls for a couple of pages.
Note, this might increase page load time.
…On Thu, Dec 26, 2019 at 9:31 AM ArindamRouth ***@***.***> wrote:
 How to Download more than 20 images? Please help
 —
 You are receiving this because you authored the thread.
 Reply to this email directly, view it on GitHub
 <https://gist.github.com/63e79a3149729b57d0397bb22a589856?email_source=notifications&email_token=AASCD4TTT5HXJYXNLGPXCM3Q2RTU5A5CNFSM4J7JN2M2YY3PNVWWK3TUL52HS4DFVNDWS43UINXW23LFNZ2KUY3PNVWWK3TUL5UWJTQAF6PK6#gistcomment-3120815>,
 or unsubscribe
 <https://github.com/notifications/unsubscribe-auth/AASCD4TVPGZONEXMNH5KYJTQ2RTU5ANCNFSM4J7JN2MQ>
 .
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment
  
            
34:
downloadImages(as.character(gg$images), 'yahoo')