misner · October 4, 2021 09:30
diff --git a/Baqend-file.js b/Baqend-file.js
 //current code
 import { 
  ErrorHandlingHOF,
  log,
  stringExtractedIsInValid,
  organicExtractData_LogIfValueIsInvalid } from "./utils/index.js";
 import {
  slugifyStr,
  getFirstNWordsInStr,
  longestWord } from "./utils/stringManipulations.js";
 import {
  COUNTRY_CODE_ISO3166_MAPPING } from "./utils/countryNameToCountryIso3166Code.js";
 import {
  addWebsiteNameTrackingParams,
  getLwnRootDomain } from "./utils/urlManipulations.js";
 import { WEBSITE_NAME_INCLUDING_DEV_MODE } from "./utils/fnConstants.js"; 

 /* Database 
 */
 //import DB from "baqend";
 var DB = require("baqend");
 const DB_NAME = 'Job';

 let connecting;
 const connect_to_baqend = (app) => {
  if (typeof connecting === 'object') {
    log("on this lambda execution, the baqend database is already connected thanks to shared-memory ...")
    return connecting;
  }

  connecting = DB.connect(app, true).then(() => {

    console.log("Baqend Connected. Awaiting login..");

    //DB.__baqend_connection = true; //using this variable to avoid "user is already logged in" issue from baqend

    //using promise to avoid "user is already logged in" issue from baqend
    const loginIfNecessaryPromise = !DB.User.me
      ? DB.User.login(
          "aws-to-baqend",
          process.env.AWS_TO_BAQEND
        )
      : Promise.resolve();

    return loginIfNecessaryPromise
      .then(function() {
        // Work!
        console.log('Logged into Baqend. Connection is ready to be used...')
      })
      .catch(function(loginIfNecessaryError) {
        console.log("loginIfNecessaryPromise", loginIfNecessaryPromise);
      });
  });
    
  return connecting;
 };


 const handler = ErrorHandlingHOF(async function(event, context, callback) {  

  //console.log('>>>>>', DB_NAME, DB[DB_NAME]);

  const {
    position,
    link,
    pubDate,
    companyName,
    jobLocationCity,
    base_url,
    extract_data_type,
    base_url_country,
    jkNumber,
    description,
    cleanFinalApplyDestinationUrl,
    tagsMatched,
    companyCertainDomain,
    companyClearbitAutocompApproxDomain,
    companyCertainLogoUrl,
    companyClearbitAutocomppeApproxLogoUrl
  } = event

  log("Got", {
    position,
    link,
    pubDate,
    companyName,
    jobLocationCity,
    base_url,
    extract_data_type,
    base_url_country,
    jkNumber,
    description,
    companyCertainDomain,
    cleanFinalApplyDestinationUrl,
    tagsMatched,
    companyClearbitAutocompApproxDomain,
    companyClearbitAutocomppeApproxLogoUrl
  });

  /* Standardization & create the inputs for the database
     Note: keep same order as columns in the database for easier work on db<->code
  */

  /*************  value for listing_id  *************************/
  //not performed on Lambda level but on db level (on Baqend performed by "Modules")
  //only solution to avoid concurrency issues and allow the "last mile"=the db
  //to make sure it never sets the same value for 2 jobs
  
  /*************  standardize extract_data_type  *************************/
  const jobDataSourceType  = extract_data_type;
  //check is string and not empty
  if ( stringExtractedIsInValid(jobDataSourceType) ) {
    organicExtractData_LogIfValueIsInvalid("extracttion data type");
    return;
  }
  //check is authorized value
  let possibleDataExtractionType = ['scraping', 'api'];

  //put in utils
  function isAuthorizedValue(text, acceptedValues) { 
    return acceptedValues.indexOf(text) !== -1;
  }
  if (!isAuthorizedValue(jobDataSourceType, possibleDataExtractionType)) {
    organicExtractData_LogIfValueIsInvalid("extracttion data type");
    return;
  }
  log("jobDataSourceType is : " + jobDataSourceType);
  
  
  /*************  standardize listing_master_scraping_query  *************************/
  const jobMasterScrapingQuery = base_url;
  //check is string and not empty
  if ( stringExtractedIsInValid(jobMasterScrapingQuery) ) {
    organicExtractData_LogIfValueIsInvalid("jobMasterScrapingQuery");
    return;
  }
  log("jobMasterScrapingQuery is : " + jobMasterScrapingQuery);
  
  /*************  standardize listing_data_source_url  *************************/
  const jobDataSourceUrl  = link;
  //check is string and not empty
  if ( stringExtractedIsInValid(jobDataSourceUrl) ) {
    organicExtractData_LogIfValueIsInvalid("job details url on source website");
    return;
  }
  log("jobDataSourceUrl is : " + jobDataSourceUrl);

  /*************  standardize listing_title  *************************/
  let jobPosition  = position;
  //check is string and not empty
  if ( stringExtractedIsInValid(jobPosition) ) {
    organicExtractData_LogIfValueIsInvalid("job details url on source website");
    return;
  }  
  //fix issue #558
  //where huge string without any white space was breaking feed rendering
  const longestWordInPosition = longestWord(jobPosition);
  if ( longestWordInPosition.length > 30 ) { 
    jobPosition = jobPosition.replace(longestWordInPosition, longestWordInPosition.replace(/\//g, ' / '));//replace/ by ' / '
  }   
  
  //remove neutral words present when they indicate job position includes
  //company name or cities
  //remove everything after detection of the word
  //not super modular, but we put here all the keywors in all languages
  var excludeCity = new RegExp("basé à|based in", "gi");//case insensitive match  
  jobPosition = jobPosition.split(excludeCity)[0];
  
  let normalizedJobPosition = jobPosition
                              //.toLowerCase(); not required as would put some acronyms like "IT" into "it" 
                              .trim() //remove any white space at the beginnign or the end of the string
                              .replace(/\s+/g, ' ');//remove any multiple whites spaces into only one white space   
  
  let jobPositionTest = getFirstNWordsInStr(normalizedJobPosition, 3);  
  if ( jobPositionTest == jobPositionTest.toUpperCase() ) {
    //if true means the whole string is 100% uppercase in the first 3 words
    //ex: https://www.indeed.fr/voir-emploi?jk=f5472bfe2b5a5f00 => "COMMERCIAL H/F"
    //in that case, to enforce consistent formatting on critical listing feed between different job's positions format
    //lowercase the string (with one loophole where inside the lower-cased string, you have a word
    //which should stay uppercase such as IT, which will become "it" and then via css capitalize It...
    //but it's the less bad option
    //Note: why not check the whole string and only 3 words:
    //This was leaving "BUSINESS DEVELOPMENT INTERN PARIS (M/F/D) GmbH" unhandled by above condition
    normalizedJobPosition = normalizedJobPosition.toLowerCase(); 
  }  
  log("normalizedJobPosition is : " + normalizedJobPosition);
  
  /*************  set value for listing_location_restricted  *************************/
  const jobLocationRestricted = false;
  
  /*************  standardize base_url_country  *************************/
  let jobLocationCountry  = base_url_country;
  //check is string and not empty
  if ( stringExtractedIsInValid(jobLocationCountry) ) {
    organicExtractData_LogIfValueIsInvalid("base_url_country");
    return;
  }
  let normalizedjobLocationCountry = jobLocationCountry
                                      .toLowerCase()
                                      .trim() //remove any white space at the beginnign or the end of the string
                                      .replace(/\s+/g, ' ');//remove any multiple whites spaces into only one white space
  log("normalizedjobLocationCountry is : " + normalizedjobLocationCountry);

  /*************  set value for listing_location_country_iso3166  *************************/
  let countryToCountryCodeIso3166 = COUNTRY_CODE_ISO3166_MAPPING;
  const jobLocationCountryIso3166 = countryToCountryCodeIso3166[normalizedjobLocationCountry];
  if (typeof jobLocationCountryIso3166 === 'undefined') {
    log("there was a problem : we couldn't find the iso 3166 country code for " + 
        base_url_country + "inside functions-src/utils/countryNameToCountryIso3166Code.js");
    return;
  } else {
    log("jobLocationCountryIso3166 is : " + jobLocationCountryIso3166);
  }  

  
  /*************  standardize listing_location_city  *************************/
  //check is string and not empty
  if ( stringExtractedIsInValid(jobLocationCity) ) {
    organicExtractData_LogIfValueIsInvalid("jobLocationCity");
    return;
  }
  let normalizedjobLocationCity = jobLocationCity
                                    .toLowerCase()
                                    .trim() //remove any white space at the beginnign or the end of the string
                                    .replace(/\s+/g, ' ');//remove any multiple whites spaces into only one white space
  log("normalizedjobLocationCity is : " + normalizedjobLocationCity);

  /*************  set the value of listing_location_city_slug  *************************/    
  let jobLocationCitySlug = slugifyStr(normalizedjobLocationCity);
  log("jobLocationCitySlug is : " + jobLocationCitySlug);
          
  /*************  standardize tagsMatched  *************************/
  let jobsTagArr  = tagsMatched;
  let normalizedJobsTagArr = jobsTagArr.filter(function(s) {
    return !stringExtractedIsInValid(s);
  }).map(function(s) {
    return s                                    
      .toLowerCase()
      .trim() //remove any white space at the beginningg or the end of the string
      .replace(/\s+/g, ' '); //remove any multiple whites spaces into only one white space
  });
  if (normalizedJobsTagArr.length < jobsTagArr.length) {
    console.log('This is invalid: ', jobsTagArr);
  }
  log("normalizedJobsTagArr is:");
  log(normalizedJobsTagArr);

  /*************  set the value of listing_tags_slug  *************************/    
  let jobTagsSlugArr = normalizedJobsTagArr.map(function(s) {
    return slugifyStr(s)                                    
  });
  log("jobTagsSlugArr is:");
  log(jobTagsSlugArr);

  /*************  set the value of listing_description  *************************/
  let jobDescription  = description;
  //remove any occurence of the word "null"
  //fixes issue #560
  jobDescription = jobDescription.replace(/ null /g, ' ');
  log("jobDescription is : " + jobDescription);
  
  /*************  standardize cleanFinalApplyDestinationUrl  *************************/
  let jobApplyUrl  = cleanFinalApplyDestinationUrl;
  //check is string and not empty
  if ( stringExtractedIsInValid(jobApplyUrl) ) {
    organicExtractData_LogIfValueIsInvalid("final destination url");
    return;
  }  
  //note on process.env.url below
  //theoretically, we should have used a conditional assignment like we do on /sitemaps-gen.js
  //but here as we'll click often on links even while working,
  //we'd rather refer trafic and have destination website know it's a clean for example
  //aijobs.tech rather than a https://deploy-preview-296--zen-colden-2b17b5.netlify.com
  //which is referring traffic to them. 
  const targetWebsiteRootUrl = getLwnRootDomain(process.env.URL);
  const normalizedJobApplyUrl = addWebsiteNameTrackingParams(jobApplyUrl, targetWebsiteRootUrl); 
  log("normalizedJobApplyUrl is : " + normalizedJobApplyUrl);       

  /*************  standardize content_entity_name  *************************/  
  //check is string and not empty
  if ( stringExtractedIsInValid(companyName) ) {
    organicExtractData_LogIfValueIsInvalid("companyName");
    return;
  }
  let normalizedCompanyName = companyName
                              .toLowerCase()
                              .trim() //remove any white space at the beginnign or the end of the string
                              .replace(/\s+/g, ' ');//remove any multiple whites spaces into only one white space
  log("normalizedCompanyName is : " + normalizedCompanyName);
  
  /*************  set the value of content_entity_name_slug  *************************/    
  let companyNameSlug = slugifyStr(normalizedCompanyName);
  log("companyNameSlug is : " + companyNameSlug);
  
  /*************  set the value of content_entity_initials  *************************/    
  //note: using companyNameSlug instead of normalizedCompanyName
  //for multiple reasons, but especially don't want accents on initials
  //special characters might mess our script to get initials and apostrophes 
  //like on l'oreal would make initial as l only (or would reuire js heavy lifting) 
  //So in a way it's great we can leverage at this point the fact we "rationalized"
  //stuff with companyNameSlug
  let companyInitials = companyNameSlug
                          .split("-") //extract words seperated by the -
                          .map((n)=>n[0]) //take the first letter of each word
                          .join("")
                          .slice(0,2); //limit to the first 2 initial letters 
  log("companyInitials is : " + companyInitials);
 
  /*************  set value for content_entity_domain   *************************/
  let companyDomain;  
  if (companyCertainDomain) { 
    companyDomain = companyCertainDomain;
  } else if ( !stringExtractedIsInValid(companyClearbitAutocompApproxDomain) ) {//best option after companyCertainDomain
    companyDomain = companyClearbitAutocompApproxDomain;
    //note: no need to sanitize/normalize as brought by Clearbit API which is already
    //cleaning things any wonky cases for us
  } else {
    companyDomain = '';
    log("unfortunately we could not find the company domain for this job post (no biggie)");
  }  
  log("companyDomain is : " + companyDomain);

  /*************  set value for content_entity_logo_url   *************************/
  let companyLogoUrl;  
  if (companyCertainLogoUrl) {
    companyLogoUrl = companyCertainLogoUrl;
  } else if (companyClearbitAutocomppeApproxLogoUrl) {//best option after companyCertainLogoUrl
    companyLogoUrl = companyClearbitAutocomppeApproxLogoUrl;
    //note: no need to sanitize/normalize as brought by Clearbit API which is already
    //cleaning things any wonky cases for us
  } else {
    companyLogoUrl = '';
    log("unfortunately we could not find the company logo url for this job post (no biggie)");
  }  
  log("companyLogoUrl is : " + companyLogoUrl);
               
  /*************  set value for listing_duration  *************************/
  //n/a no limit on organic listings
    
  /*************  set value for listing_type  *************************/
  const listingType = "organic";    
  log("listingType is : " + listingType);
  
  /*************  value for paid_listing_job_requirement  *************************/
  //n/a
  
  /*************  value for paid_listing_job_responsibility  *************************/
  //n/a 
  
  /*************  value for paid_listing_job_application_instructions  *************************/
  //n/a    
  
  /*************  value for paid_listing_payment_id  *************************/
  //n/a 
  
  /*************  value for paid_listing_paid_amount  *************************/
  //n/a 
  
  /*************  value for paid_listing_payment_currency  *************************/
  //n/a 
  
  /*************  value for paid_listing_payment_status  *************************/
  //n/a 
  
  /*************  value for paid_listing_contact_email  *************************/
  //security safeguard so that if destinaiton_url empty too the feature when destinaiton_url AND empty
  //results in appearance on the Fe of a button "report broken link/empty link)
  //if not empty, this feature could fail
  //should be extremely rare as 99% of listings will anyway have a destination_url
  const listingApplyEmail = "";    
  log("listingApplyEmail is : " + listingApplyEmail); 
 
  /*************  value for listing_url  *************************/
  //not performed on Lambda level but on db level (on Baqend performed by "Modules")
  //only solution to avoid concurrency issues and allow the "last mile"=the db
  //to make sure it never sets the same value for 2 jobs (uniqueness achieved via
  //job_offer_id, itself being set on db level 
 
  /*
    move it to the top of file to return early and not do all these expensive noemziation
  */
  //check all baqend comuns and check if i have them: a lot will require normazation ex: slugs, ex initials...
  //but only filter out/return if thoe other job with the same desitnaiton url
  //is recent(<2 weeks ago) 
  
  /* Connect to Db and inject the above calculated values
    Note: rejection of listings with a normalizedJobApplyUrl already present in an existing record
    is only performed at db level by a Baqend module in order to stay as close as possible as db server
    (for efficiency and concurrency aspects)
  */

  try {
    await connect_to_baqend("listings-network");
    const response = await DB[DB_NAME](
      {
        target_website: WEBSITE_NAME_INCLUDING_DEV_MODE,
        listing_category: 'j',
        listing_master_scraping_query: jobMasterScrapingQuery,
        listing_data_source_type: jobDataSourceType,
        listing_data_source_url: jobDataSourceUrl,
        listing_title: normalizedJobPosition,
        listing_location_restricted: jobLocationRestricted,
        listing_location_country: normalizedjobLocationCountry,
        listing_location_country_iso3166: jobLocationCountryIso3166,
        listing_location_city: normalizedjobLocationCity,
        listing_location_city_slug: jobLocationCitySlug,
        listing_tags: normalizedJobsTagArr,
        listing_tags_slug: jobTagsSlugArr,
        listing_description: jobDescription,
        listing_destination_url: normalizedJobApplyUrl,
        listing_apply_email: listingApplyEmail,
        content_entity_name: normalizedCompanyName,
        content_entity_name_slug: companyNameSlug,
        content_entity_initials: companyInitials,
        content_entity_domain: companyDomain,
        content_entity_logo_url: companyLogoUrl,
        listing_type: listingType
      }).insert();
      log("we injected into the db the data of " + context.awsRequestId);
  } catch (e) {
    //note: if we have an error in the code inside Baqend handlers (ex: onInsert),
    //then the error will be "conveyed"/transferred here below and we'll get
    //the error message below
    
    //note: do we really xwant this on dashbird: not sure!!!
    //it can happen and then? is iut realyl serious?...that life
    console.log('Something went wrong connecting to baqend', e)
  }
 
  return {
    statusCode: 200,
    body: 'Baqend: Inserted'
  }
 });

 export {handler};
	//current code
	import {
	ErrorHandlingHOF,
	log,
	stringExtractedIsInValid,
	organicExtractData_LogIfValueIsInvalid } from "./utils/index.js";
	import {
	slugifyStr,
	getFirstNWordsInStr,
	longestWord } from "./utils/stringManipulations.js";
	import {
	COUNTRY_CODE_ISO3166_MAPPING } from "./utils/countryNameToCountryIso3166Code.js";
	import {
	addWebsiteNameTrackingParams,
	getLwnRootDomain } from "./utils/urlManipulations.js";
	import { WEBSITE_NAME_INCLUDING_DEV_MODE } from "./utils/fnConstants.js";

	/* Database
	*/
	//import DB from "baqend";
	var DB = require("baqend");
	const DB_NAME = 'Job';

	let connecting;
	const connect_to_baqend = (app) => {
	if (typeof connecting === 'object') {
	log("on this lambda execution, the baqend database is already connected thanks to shared-memory ...")
	return connecting;
	}

	connecting = DB.connect(app, true).then(() => {

	console.log("Baqend Connected. Awaiting login..");

	//DB.__baqend_connection = true; //using this variable to avoid "user is already logged in" issue from baqend

	//using promise to avoid "user is already logged in" issue from baqend
	const loginIfNecessaryPromise = !DB.User.me
	? DB.User.login(
	"aws-to-baqend",
	process.env.AWS_TO_BAQEND
	)
	: Promise.resolve();

	return loginIfNecessaryPromise
	.then(function() {
	// Work!
	console.log('Logged into Baqend. Connection is ready to be used...')
	})
	.catch(function(loginIfNecessaryError) {
	console.log("loginIfNecessaryPromise", loginIfNecessaryPromise);
	});
	});

	return connecting;
	};


	const handler = ErrorHandlingHOF(async function(event, context, callback) {

	//console.log('>>>>>', DB_NAME, DB[DB_NAME]);

	const {
	position,
	link,
	pubDate,
	companyName,
	jobLocationCity,
	base_url,
	extract_data_type,
	base_url_country,
	jkNumber,
	description,
	cleanFinalApplyDestinationUrl,
	tagsMatched,
	companyCertainDomain,
	companyClearbitAutocompApproxDomain,
	companyCertainLogoUrl,
	companyClearbitAutocomppeApproxLogoUrl
	} = event

	log("Got", {
	position,
	link,
	pubDate,
	companyName,
	jobLocationCity,
	base_url,
	extract_data_type,
	base_url_country,
	jkNumber,
	description,
	companyCertainDomain,
	cleanFinalApplyDestinationUrl,
	tagsMatched,
	companyClearbitAutocompApproxDomain,
	companyClearbitAutocomppeApproxLogoUrl
	});

	/* Standardization & create the inputs for the database
	Note: keep same order as columns in the database for easier work on db<->code
	*/

	/*********** value for listing_id ***********************/
	//not performed on Lambda level but on db level (on Baqend performed by "Modules")
	//only solution to avoid concurrency issues and allow the "last mile"=the db
	//to make sure it never sets the same value for 2 jobs

	/*********** standardize extract_data_type ***********************/
	const jobDataSourceType = extract_data_type;
	//check is string and not empty
	if ( stringExtractedIsInValid(jobDataSourceType) ) {
	organicExtractData_LogIfValueIsInvalid("extracttion data type");
	return;
	}
	//check is authorized value
	let possibleDataExtractionType = ['scraping', 'api'];

	//put in utils
	function isAuthorizedValue(text, acceptedValues) {
	return acceptedValues.indexOf(text) !== -1;
	}
	if (!isAuthorizedValue(jobDataSourceType, possibleDataExtractionType)) {
	organicExtractData_LogIfValueIsInvalid("extracttion data type");
	return;
	}
	log("jobDataSourceType is : " + jobDataSourceType);


	/*********** standardize listing_master_scraping_query ***********************/
	const jobMasterScrapingQuery = base_url;
	//check is string and not empty
	if ( stringExtractedIsInValid(jobMasterScrapingQuery) ) {
	organicExtractData_LogIfValueIsInvalid("jobMasterScrapingQuery");
	return;
	}
	log("jobMasterScrapingQuery is : " + jobMasterScrapingQuery);

	/*********** standardize listing_data_source_url ***********************/
	const jobDataSourceUrl = link;
	//check is string and not empty
	if ( stringExtractedIsInValid(jobDataSourceUrl) ) {
	organicExtractData_LogIfValueIsInvalid("job details url on source website");
	return;
	}
	log("jobDataSourceUrl is : " + jobDataSourceUrl);

	/*********** standardize listing_title ***********************/
	let jobPosition = position;
	//check is string and not empty
	if ( stringExtractedIsInValid(jobPosition) ) {
	organicExtractData_LogIfValueIsInvalid("job details url on source website");
	return;
	}
	//fix issue #558
	//where huge string without any white space was breaking feed rendering
	const longestWordInPosition = longestWord(jobPosition);
	if ( longestWordInPosition.length > 30 ) {
	jobPosition = jobPosition.replace(longestWordInPosition, longestWordInPosition.replace(/\//g, ' / '));//replace/ by ' / '
	}

	//remove neutral words present when they indicate job position includes
	//company name or cities
	//remove everything after detection of the word
	//not super modular, but we put here all the keywors in all languages
	var excludeCity = new RegExp("basé à\|based in", "gi");//case insensitive match
	jobPosition = jobPosition.split(excludeCity)[0];

	let normalizedJobPosition = jobPosition
	//.toLowerCase(); not required as would put some acronyms like "IT" into "it"
	.trim() //remove any white space at the beginnign or the end of the string
	.replace(/\s+/g, ' ');//remove any multiple whites spaces into only one white space

	let jobPositionTest = getFirstNWordsInStr(normalizedJobPosition, 3);
	if ( jobPositionTest == jobPositionTest.toUpperCase() ) {
	//if true means the whole string is 100% uppercase in the first 3 words
	//ex: https://www.indeed.fr/voir-emploi?jk=f5472bfe2b5a5f00 => "COMMERCIAL H/F"
	//in that case, to enforce consistent formatting on critical listing feed between different job's positions format
	//lowercase the string (with one loophole where inside the lower-cased string, you have a word
	//which should stay uppercase such as IT, which will become "it" and then via css capitalize It...
	//but it's the less bad option
	//Note: why not check the whole string and only 3 words:
	//This was leaving "BUSINESS DEVELOPMENT INTERN PARIS (M/F/D) GmbH" unhandled by above condition
	normalizedJobPosition = normalizedJobPosition.toLowerCase();
	}
	log("normalizedJobPosition is : " + normalizedJobPosition);

	/*********** set value for listing_location_restricted ***********************/
	const jobLocationRestricted = false;

	/*********** standardize base_url_country ***********************/
	let jobLocationCountry = base_url_country;
	//check is string and not empty
	if ( stringExtractedIsInValid(jobLocationCountry) ) {
	organicExtractData_LogIfValueIsInvalid("base_url_country");
	return;
	}
	let normalizedjobLocationCountry = jobLocationCountry
	.toLowerCase()
	.trim() //remove any white space at the beginnign or the end of the string
	.replace(/\s+/g, ' ');//remove any multiple whites spaces into only one white space
	log("normalizedjobLocationCountry is : " + normalizedjobLocationCountry);

	/*********** set value for listing_location_country_iso3166 ***********************/
	let countryToCountryCodeIso3166 = COUNTRY_CODE_ISO3166_MAPPING;
	const jobLocationCountryIso3166 = countryToCountryCodeIso3166[normalizedjobLocationCountry];
	if (typeof jobLocationCountryIso3166 === 'undefined') {
	log("there was a problem : we couldn't find the iso 3166 country code for " +
	base_url_country + "inside functions-src/utils/countryNameToCountryIso3166Code.js");
	return;
	} else {
	log("jobLocationCountryIso3166 is : " + jobLocationCountryIso3166);
	}


	/*********** standardize listing_location_city ***********************/
	//check is string and not empty
	if ( stringExtractedIsInValid(jobLocationCity) ) {
	organicExtractData_LogIfValueIsInvalid("jobLocationCity");
	return;
	}
	let normalizedjobLocationCity = jobLocationCity
	.toLowerCase()
	.trim() //remove any white space at the beginnign or the end of the string
	.replace(/\s+/g, ' ');//remove any multiple whites spaces into only one white space
	log("normalizedjobLocationCity is : " + normalizedjobLocationCity);

	/*********** set the value of listing_location_city_slug ***********************/
	let jobLocationCitySlug = slugifyStr(normalizedjobLocationCity);
	log("jobLocationCitySlug is : " + jobLocationCitySlug);

	/*********** standardize tagsMatched ***********************/
	let jobsTagArr = tagsMatched;
	let normalizedJobsTagArr = jobsTagArr.filter(function(s) {
	return !stringExtractedIsInValid(s);
	}).map(function(s) {
	return s
	.toLowerCase()
	.trim() //remove any white space at the beginningg or the end of the string
	.replace(/\s+/g, ' '); //remove any multiple whites spaces into only one white space
	});
	if (normalizedJobsTagArr.length < jobsTagArr.length) {
	console.log('This is invalid: ', jobsTagArr);
	}
	log("normalizedJobsTagArr is:");
	log(normalizedJobsTagArr);

	/*********** set the value of listing_tags_slug ***********************/
	let jobTagsSlugArr = normalizedJobsTagArr.map(function(s) {
	return slugifyStr(s)
	});
	log("jobTagsSlugArr is:");
	log(jobTagsSlugArr);

	/*********** set the value of listing_description ***********************/
	let jobDescription = description;
	//remove any occurence of the word "null"
	//fixes issue #560
	jobDescription = jobDescription.replace(/ null /g, ' ');
	log("jobDescription is : " + jobDescription);

	/*********** standardize cleanFinalApplyDestinationUrl ***********************/
	let jobApplyUrl = cleanFinalApplyDestinationUrl;
	//check is string and not empty
	if ( stringExtractedIsInValid(jobApplyUrl) ) {
	organicExtractData_LogIfValueIsInvalid("final destination url");
	return;
	}
	//note on process.env.url below
	//theoretically, we should have used a conditional assignment like we do on /sitemaps-gen.js
	//but here as we'll click often on links even while working,
	//we'd rather refer trafic and have destination website know it's a clean for example
	//aijobs.tech rather than a https://deploy-preview-296--zen-colden-2b17b5.netlify.com
	//which is referring traffic to them.
	const targetWebsiteRootUrl = getLwnRootDomain(process.env.URL);
	const normalizedJobApplyUrl = addWebsiteNameTrackingParams(jobApplyUrl, targetWebsiteRootUrl);
	log("normalizedJobApplyUrl is : " + normalizedJobApplyUrl);

	/*********** standardize content_entity_name ***********************/
	//check is string and not empty
	if ( stringExtractedIsInValid(companyName) ) {
	organicExtractData_LogIfValueIsInvalid("companyName");
	return;
	}
	let normalizedCompanyName = companyName
	.toLowerCase()
	.trim() //remove any white space at the beginnign or the end of the string
	.replace(/\s+/g, ' ');//remove any multiple whites spaces into only one white space
	log("normalizedCompanyName is : " + normalizedCompanyName);

	/*********** set the value of content_entity_name_slug ***********************/
	let companyNameSlug = slugifyStr(normalizedCompanyName);
	log("companyNameSlug is : " + companyNameSlug);

	/*********** set the value of content_entity_initials ***********************/
	//note: using companyNameSlug instead of normalizedCompanyName
	//for multiple reasons, but especially don't want accents on initials
	//special characters might mess our script to get initials and apostrophes
	//like on l'oreal would make initial as l only (or would reuire js heavy lifting)
	//So in a way it's great we can leverage at this point the fact we "rationalized"
	//stuff with companyNameSlug
	let companyInitials = companyNameSlug
	.split("-") //extract words seperated by the -
	.map((n)=>n[0]) //take the first letter of each word
	.join("")
	.slice(0,2); //limit to the first 2 initial letters
	log("companyInitials is : " + companyInitials);

	/*********** set value for content_entity_domain ***********************/
	let companyDomain;
	if (companyCertainDomain) {
	companyDomain = companyCertainDomain;
	} else if ( !stringExtractedIsInValid(companyClearbitAutocompApproxDomain) ) {//best option after companyCertainDomain
	companyDomain = companyClearbitAutocompApproxDomain;
	//note: no need to sanitize/normalize as brought by Clearbit API which is already
	//cleaning things any wonky cases for us
	} else {
	companyDomain = '';
	log("unfortunately we could not find the company domain for this job post (no biggie)");
	}
	log("companyDomain is : " + companyDomain);

	/*********** set value for content_entity_logo_url ***********************/
	let companyLogoUrl;
	if (companyCertainLogoUrl) {
	companyLogoUrl = companyCertainLogoUrl;
	} else if (companyClearbitAutocomppeApproxLogoUrl) {//best option after companyCertainLogoUrl
	companyLogoUrl = companyClearbitAutocomppeApproxLogoUrl;
	//note: no need to sanitize/normalize as brought by Clearbit API which is already
	//cleaning things any wonky cases for us
	} else {
	companyLogoUrl = '';
	log("unfortunately we could not find the company logo url for this job post (no biggie)");
	}
	log("companyLogoUrl is : " + companyLogoUrl);

	/*********** set value for listing_duration ***********************/
	//n/a no limit on organic listings

	/*********** set value for listing_type ***********************/
	const listingType = "organic";
	log("listingType is : " + listingType);

	/*********** value for paid_listing_job_requirement ***********************/
	//n/a

	/*********** value for paid_listing_job_responsibility ***********************/
	//n/a

	/*********** value for paid_listing_job_application_instructions ***********************/
	//n/a

	/*********** value for paid_listing_payment_id ***********************/
	//n/a

	/*********** value for paid_listing_paid_amount ***********************/
	//n/a

	/*********** value for paid_listing_payment_currency ***********************/
	//n/a

	/*********** value for paid_listing_payment_status ***********************/
	//n/a

	/*********** value for paid_listing_contact_email ***********************/
	//security safeguard so that if destinaiton_url empty too the feature when destinaiton_url AND empty
	//results in appearance on the Fe of a button "report broken link/empty link)
	//if not empty, this feature could fail
	//should be extremely rare as 99% of listings will anyway have a destination_url
	const listingApplyEmail = "";
	log("listingApplyEmail is : " + listingApplyEmail);

	/*********** value for listing_url ***********************/
	//not performed on Lambda level but on db level (on Baqend performed by "Modules")
	//only solution to avoid concurrency issues and allow the "last mile"=the db
	//to make sure it never sets the same value for 2 jobs (uniqueness achieved via
	//job_offer_id, itself being set on db level

	/*
	move it to the top of file to return early and not do all these expensive noemziation
	*/
	//check all baqend comuns and check if i have them: a lot will require normazation ex: slugs, ex initials...
	//but only filter out/return if thoe other job with the same desitnaiton url
	//is recent(<2 weeks ago)

	/* Connect to Db and inject the above calculated values
	Note: rejection of listings with a normalizedJobApplyUrl already present in an existing record
	is only performed at db level by a Baqend module in order to stay as close as possible as db server
	(for efficiency and concurrency aspects)
	*/

	try {
	await connect_to_baqend("listings-network");
	const response = await DB[DB_NAME](
	{
	target_website: WEBSITE_NAME_INCLUDING_DEV_MODE,
	listing_category: 'j',
	listing_master_scraping_query: jobMasterScrapingQuery,
	listing_data_source_type: jobDataSourceType,
	listing_data_source_url: jobDataSourceUrl,
	listing_title: normalizedJobPosition,
	listing_location_restricted: jobLocationRestricted,
	listing_location_country: normalizedjobLocationCountry,
	listing_location_country_iso3166: jobLocationCountryIso3166,
	listing_location_city: normalizedjobLocationCity,
	listing_location_city_slug: jobLocationCitySlug,
	listing_tags: normalizedJobsTagArr,
	listing_tags_slug: jobTagsSlugArr,
	listing_description: jobDescription,
	listing_destination_url: normalizedJobApplyUrl,
	listing_apply_email: listingApplyEmail,
	content_entity_name: normalizedCompanyName,
	content_entity_name_slug: companyNameSlug,
	content_entity_initials: companyInitials,
	content_entity_domain: companyDomain,
	content_entity_logo_url: companyLogoUrl,
	listing_type: listingType
	}).insert();
	log("we injected into the db the data of " + context.awsRequestId);
	} catch (e) {
	//note: if we have an error in the code inside Baqend handlers (ex: onInsert),
	//then the error will be "conveyed"/transferred here below and we'll get
	//the error message below

	//note: do we really xwant this on dashbird: not sure!!!
	//it can happen and then? is iut realyl serious?...that life
	console.log('Something went wrong connecting to baqend', e)
	}

	return {
	statusCode: 200,
	body: 'Baqend: Inserted'
	}
	});

	export {handler};