313 lines
9.4 KiB
JavaScript
313 lines
9.4 KiB
JavaScript
'use strict';
|
|
|
|
const Axios = require('axios');
|
|
const Cheerio = require('cheerio');
|
|
const Utils = require('../utils/utils');
|
|
const Constants = require('../utils/constants');
|
|
|
|
const OrganicResults = require('./nodes/OrganicResults');
|
|
const KnowledgeGraph = require('./nodes/KnowledgeGraph');
|
|
const FeaturedSnippet = require('./nodes/FeaturedSnippet');
|
|
|
|
const Location = require('./nodes/Location');
|
|
const Translation = require('./nodes/Translation');
|
|
const Dictionary = require('./nodes/Dictionary');
|
|
const Converters = require('./nodes/Converters');
|
|
const Videos = require('./nodes/Videos');
|
|
const TopStories = require('./nodes/TopStories');
|
|
const Weather = require('./nodes/Weather');
|
|
const Time = require('./nodes/Time');
|
|
const PAA = require('./nodes/PAA');
|
|
const PAS = require('./nodes/PAS');
|
|
|
|
const FormData = require('form-data');
|
|
|
|
/**
|
|
* Searches a given query on Google.
|
|
* @param {string | object} query - The query to search for.
|
|
* @param {object} [options] - Search options.
|
|
* @param {boolean} [options.ris] - Weather this is a reverse image search or not.
|
|
* @param {boolean} [options.safe] - Weather to use safe search or not.
|
|
* @param {number} [options.page] - Page number.
|
|
* @param {boolean} [options.parse_ads] - Weather or not to parse ads.
|
|
* @param {boolean} [options.use_mobile_ua] - Weather or not to use a mobile user agent.
|
|
* @param {object} [options.additional_params] - parameters that will be passed to Google
|
|
*/
|
|
async function search(query, options = {}) {
|
|
let response;
|
|
|
|
const ris = options.ris || false;
|
|
const safe = options.safe || false;
|
|
const page = options.page ? options.page * 10 : 0;
|
|
const use_mobile_ua = Reflect.has(options, 'use_mobile_ua') ? options.use_mobile_ua : true;
|
|
const parse_ads = options.parse_ads || false;
|
|
const additional_params = options.additional_params || null;
|
|
|
|
if (typeof query === 'object' && ris) {
|
|
response = await uploadImage(query);
|
|
} else {
|
|
const _query = query.trim().split(/ +/).join('+');
|
|
|
|
const url = encodeURI(
|
|
ris ?
|
|
`${Constants.URLS.W_GOOGLE}searchbyimage?image_url=${_query}`:
|
|
`${Constants.URLS.GOOGLE}search?q=${_query}&ie=UTF-8&aomd=1${(safe ? '&safe=active' : '')}&start=${page}`
|
|
);
|
|
|
|
response = await Axios.get(url, {
|
|
params: additional_params,
|
|
headers: Utils.getHeaders({ mobile: use_mobile_ua })
|
|
}).catch((err) => err);
|
|
}
|
|
|
|
if (response instanceof Error)
|
|
throw new Utils.SearchError('Could not execute search', {
|
|
status_code: response?.status || 0, message: response?.message
|
|
});
|
|
|
|
const $ = Cheerio.load(Utils.refineData(response.data, parse_ads, use_mobile_ua));
|
|
|
|
const results = {};
|
|
|
|
results.results = OrganicResults.parse($, parse_ads, use_mobile_ua);
|
|
results.videos = Videos.parse($);
|
|
|
|
results.knowledge_panel = new KnowledgeGraph(response.data, $);
|
|
results.featured_snippet = new FeaturedSnippet($);
|
|
|
|
const did_you_mean = $(Constants.SELECTORS.DID_YOU_MEAN).text();
|
|
results.did_you_mean = did_you_mean ? did_you_mean: null;
|
|
|
|
// These use the same selectors, so we have to check before parsing.
|
|
results.weather = new Weather($);
|
|
results.time = !results.weather.location ? new Time($): null;
|
|
results.location = !results.time?.hours ? new Location($): null;
|
|
|
|
results.dictionary = new Dictionary($);
|
|
results.translation = new Translation($);
|
|
results.top_stories = TopStories.parse($);
|
|
results.unit_converter = new Converters($);
|
|
results.people_also_ask = PAA.parse($, response.data);
|
|
results.people_also_search = PAS.parse($);
|
|
|
|
return results;
|
|
}
|
|
|
|
async function uploadImage(buffer) {
|
|
const form_data = new FormData();
|
|
|
|
form_data.append('encoded_image', buffer);
|
|
|
|
const response = await Axios.post(`${Constants.URLS.GIS}searchbyimage/upload`, form_data, {
|
|
headers: {
|
|
...form_data.getHeaders(),
|
|
...Utils.getHeaders({ mobile: true })
|
|
}
|
|
});
|
|
|
|
return response;
|
|
}
|
|
|
|
/**
|
|
* Google image search.
|
|
*
|
|
* @param {string} query - The query to search for.
|
|
* @param {object} [options] - Search options.
|
|
* @param {boolean} [options.safe] - Weather to use safe search or not.
|
|
* @param {object} [options.additional_params] - Additional parameters that will be passed to Google.
|
|
* @returns {Promise.<{
|
|
* id: string;
|
|
* url: string;
|
|
* width: number;
|
|
* height: number;
|
|
* color: number;
|
|
* preview: {
|
|
* url: string;
|
|
* width: number;
|
|
* height: number;
|
|
* },
|
|
* origin: {
|
|
* title: string;
|
|
* website: {
|
|
* name: string;
|
|
* domain: string;
|
|
* url: string;
|
|
* }
|
|
* }
|
|
*}[]>}
|
|
*/
|
|
async function image(query, options = {}) {
|
|
const safe = options.safe || false;
|
|
const additional_params = options.additional_params || {};
|
|
|
|
const form_data = new URLSearchParams();
|
|
|
|
const payload = [
|
|
[
|
|
[
|
|
'HoAMBc',
|
|
JSON.stringify([
|
|
null, null, [
|
|
0, null, 2529, 85, 2396,
|
|
[], [9429, 9520], [194, 194],
|
|
false, null, null, 9520
|
|
],
|
|
null, null, null, null, null, null, null, null,
|
|
null, null, null, null, null, null, null, null,
|
|
null, null, null, null, null, null, null, null,
|
|
null, [
|
|
query,
|
|
],
|
|
null, null, null,
|
|
null, null, null,
|
|
null, null, [
|
|
null, 'CAE=', 'GGwgAA=='
|
|
], null, true
|
|
]),
|
|
null,
|
|
'generic'
|
|
]
|
|
]
|
|
];
|
|
|
|
form_data.append('f.req', JSON.stringify(payload));
|
|
form_data.append('at', `${Utils.generateRandomString(29)}:${Date.now()}`);
|
|
|
|
const params = {
|
|
...additional_params
|
|
};
|
|
|
|
if (safe) {
|
|
params.safe = 'active';
|
|
}
|
|
|
|
const response = await Axios.post(`${Constants.URLS.W_GOOGLE}_/VisualFrontendUi/data/batchexecute`, form_data, {
|
|
params: {
|
|
'rpcids': 'HoAMBc',
|
|
'source-path': '/search',
|
|
'f.sid': -Utils.getRandomInt(0, 9e10),
|
|
'bl': 'boq_visualfrontendserver_20220505.05_p0',
|
|
'hl': 'en',
|
|
'authuser': 0,
|
|
'_reqid': -Utils.getRandomInt(0, 9e5),
|
|
...params
|
|
},
|
|
headers: {
|
|
'content-type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
|
...Utils.getHeaders({ mobile: false })
|
|
}
|
|
}).catch((err) => err);
|
|
|
|
if (response instanceof Error)
|
|
throw new Utils.SearchError('Could not execute search', {
|
|
status_code: response?.response?.status || 0, message: response?.message
|
|
});
|
|
|
|
const res = '[null'+(Utils.getStringBetweenStrings(response.data, '"[null', ']"') || '') + ']';
|
|
const data = JSON.parse(res.replace(/\\"/g, '"').replace(/\\\\"/g, '\''));
|
|
|
|
if (data.length <= 1)
|
|
throw new Utils.SearchError('Got unexpected response from BatchExecute API', data);
|
|
|
|
if (!data[56]?.[1])
|
|
throw new Utils.SearchError(data[53]?.[1] || 'Unexpected response structure', data[53]?.[2] || data);
|
|
|
|
const items = data[56]?.[1]?.[0]?.[0]?.[1]?.[0];
|
|
|
|
if (!items)
|
|
throw new Utils.SearchError('Unexpected response structure', data);
|
|
|
|
const results = items.map((el) => {
|
|
const item = el[0]?.[0]?.['444383007']; // TODO: refactor this
|
|
|
|
if (!item?.[1])
|
|
return;
|
|
|
|
const image = item[1]?.[3];
|
|
const preview = item[1]?.[2];
|
|
const origin = item[1]?.[9];
|
|
|
|
if (image && preview && origin)
|
|
return {
|
|
id: item[1][1],
|
|
url: decodeURIComponent(JSON.parse('"' + image[0].replace(/"/g, '"') + '"')),
|
|
width: image[1],
|
|
height: image[2],
|
|
color: item[1][6],
|
|
preview: {
|
|
url: decodeURIComponent(JSON.parse('"' + preview[0].replace(/"/g, '"') + '"')),
|
|
width: preview[1],
|
|
height: preview[2]
|
|
},
|
|
origin: {
|
|
title: origin['2008'][1],
|
|
website: {
|
|
name: origin['2003'][12],
|
|
domain: origin['2003'][17],
|
|
url: origin['2003'][2]
|
|
}
|
|
}
|
|
}
|
|
}).filter((item) => item);
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Retrieves news from Google.
|
|
*
|
|
* @param {string} [language] - two digits language code.
|
|
* @param {string} [region] - two digits region code.
|
|
*
|
|
* @returns {Promise.<{
|
|
* headline_stories: {
|
|
* title: string;
|
|
* url: string;
|
|
* image: string;
|
|
* published: string;
|
|
* by: string;
|
|
* }[]
|
|
* }>}
|
|
*/
|
|
async function getTopNews(language = 'en', region = 'US') {
|
|
const url = Constants.URLS.GOOGLE_NEWS + `topstories?tab=in&hl=${language.toLocaleLowerCase()}-${region.toLocaleUpperCase()}&gl=${region.toLocaleUpperCase()}&ceid=${region.toLocaleUpperCase()}:${language.toLocaleLowerCase()}`;
|
|
|
|
const response = await Axios.get(url,
|
|
{
|
|
headers: Utils.getHeaders({
|
|
mobile: true
|
|
})
|
|
}).catch((err) => err);
|
|
if (response instanceof Error) throw new Error('Could not retrieve top news: ' + response.message);
|
|
|
|
const $ = Cheerio.load(response.data);
|
|
|
|
const results = {
|
|
headline_stories: []
|
|
};
|
|
|
|
const headline_stories_publishers = $(Constants.SELECTORS.PUBLISHER).map((i, el) => $(el).text()).get();
|
|
const headline_stories_imgs = $(Constants.SELECTORS.STORY_IMG).map((i, el) => $(el).attr('src')).get();
|
|
const headline_stories_time = $(Constants.SELECTORS.STORY_TIME).map((i, el) => $(el).text()).get();
|
|
|
|
$(Constants.SELECTORS.STORY_TITLE).each((i, el) => {
|
|
const headline_stories_title = $(el).text();
|
|
const headline_stories_url = $(el).attr('href');
|
|
|
|
results.headline_stories.push({
|
|
title: headline_stories_title,
|
|
url: `${Constants.URLS.GOOGLE_NEWS}${headline_stories_url.slice(2)}`,
|
|
image: headline_stories_imgs[i],
|
|
published: headline_stories_time[i],
|
|
by: headline_stories_publishers[i]
|
|
});
|
|
});
|
|
|
|
return results;
|
|
}
|
|
|
|
module.exports = {
|
|
getTopNews,
|
|
search,
|
|
image
|
|
};
|