odin/node_modules/googlethis/lib/core/main.js
2022-11-19 01:49:12 +01:00

313 lines
9.4 KiB
JavaScript

'use strict';
const Axios = require('axios');
const Cheerio = require('cheerio');
const Utils = require('../utils/utils');
const Constants = require('../utils/constants');
const OrganicResults = require('./nodes/OrganicResults');
const KnowledgeGraph = require('./nodes/KnowledgeGraph');
const FeaturedSnippet = require('./nodes/FeaturedSnippet');
const Location = require('./nodes/Location');
const Translation = require('./nodes/Translation');
const Dictionary = require('./nodes/Dictionary');
const Converters = require('./nodes/Converters');
const Videos = require('./nodes/Videos');
const TopStories = require('./nodes/TopStories');
const Weather = require('./nodes/Weather');
const Time = require('./nodes/Time');
const PAA = require('./nodes/PAA');
const PAS = require('./nodes/PAS');
const FormData = require('form-data');
/**
* Searches a given query on Google.
* @param {string | object} query - The query to search for.
* @param {object} [options] - Search options.
* @param {boolean} [options.ris] - Weather this is a reverse image search or not.
* @param {boolean} [options.safe] - Weather to use safe search or not.
* @param {number} [options.page] - Page number.
* @param {boolean} [options.parse_ads] - Weather or not to parse ads.
* @param {boolean} [options.use_mobile_ua] - Weather or not to use a mobile user agent.
* @param {object} [options.additional_params] - parameters that will be passed to Google
*/
async function search(query, options = {}) {
let response;
const ris = options.ris || false;
const safe = options.safe || false;
const page = options.page ? options.page * 10 : 0;
const use_mobile_ua = Reflect.has(options, 'use_mobile_ua') ? options.use_mobile_ua : true;
const parse_ads = options.parse_ads || false;
const additional_params = options.additional_params || null;
if (typeof query === 'object' && ris) {
response = await uploadImage(query);
} else {
const _query = query.trim().split(/ +/).join('+');
const url = encodeURI(
ris ?
`${Constants.URLS.W_GOOGLE}searchbyimage?image_url=${_query}`:
`${Constants.URLS.GOOGLE}search?q=${_query}&ie=UTF-8&aomd=1${(safe ? '&safe=active' : '')}&start=${page}`
);
response = await Axios.get(url, {
params: additional_params,
headers: Utils.getHeaders({ mobile: use_mobile_ua })
}).catch((err) => err);
}
if (response instanceof Error)
throw new Utils.SearchError('Could not execute search', {
status_code: response?.status || 0, message: response?.message
});
const $ = Cheerio.load(Utils.refineData(response.data, parse_ads, use_mobile_ua));
const results = {};
results.results = OrganicResults.parse($, parse_ads, use_mobile_ua);
results.videos = Videos.parse($);
results.knowledge_panel = new KnowledgeGraph(response.data, $);
results.featured_snippet = new FeaturedSnippet($);
const did_you_mean = $(Constants.SELECTORS.DID_YOU_MEAN).text();
results.did_you_mean = did_you_mean ? did_you_mean: null;
// These use the same selectors, so we have to check before parsing.
results.weather = new Weather($);
results.time = !results.weather.location ? new Time($): null;
results.location = !results.time?.hours ? new Location($): null;
results.dictionary = new Dictionary($);
results.translation = new Translation($);
results.top_stories = TopStories.parse($);
results.unit_converter = new Converters($);
results.people_also_ask = PAA.parse($, response.data);
results.people_also_search = PAS.parse($);
return results;
}
async function uploadImage(buffer) {
const form_data = new FormData();
form_data.append('encoded_image', buffer);
const response = await Axios.post(`${Constants.URLS.GIS}searchbyimage/upload`, form_data, {
headers: {
...form_data.getHeaders(),
...Utils.getHeaders({ mobile: true })
}
});
return response;
}
/**
* Google image search.
*
* @param {string} query - The query to search for.
* @param {object} [options] - Search options.
* @param {boolean} [options.safe] - Weather to use safe search or not.
* @param {object} [options.additional_params] - Additional parameters that will be passed to Google.
* @returns {Promise.<{
* id: string;
* url: string;
* width: number;
* height: number;
* color: number;
* preview: {
* url: string;
* width: number;
* height: number;
* },
* origin: {
* title: string;
* website: {
* name: string;
* domain: string;
* url: string;
* }
* }
*}[]>}
*/
async function image(query, options = {}) {
const safe = options.safe || false;
const additional_params = options.additional_params || {};
const form_data = new URLSearchParams();
const payload = [
[
[
'HoAMBc',
JSON.stringify([
null, null, [
0, null, 2529, 85, 2396,
[], [9429, 9520], [194, 194],
false, null, null, 9520
],
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null,
null, [
query,
],
null, null, null,
null, null, null,
null, null, [
null, 'CAE=', 'GGwgAA=='
], null, true
]),
null,
'generic'
]
]
];
form_data.append('f.req', JSON.stringify(payload));
form_data.append('at', `${Utils.generateRandomString(29)}:${Date.now()}`);
const params = {
...additional_params
};
if (safe) {
params.safe = 'active';
}
const response = await Axios.post(`${Constants.URLS.W_GOOGLE}_/VisualFrontendUi/data/batchexecute`, form_data, {
params: {
'rpcids': 'HoAMBc',
'source-path': '/search',
'f.sid': -Utils.getRandomInt(0, 9e10),
'bl': 'boq_visualfrontendserver_20220505.05_p0',
'hl': 'en',
'authuser': 0,
'_reqid': -Utils.getRandomInt(0, 9e5),
...params
},
headers: {
'content-type': 'application/x-www-form-urlencoded;charset=UTF-8',
...Utils.getHeaders({ mobile: false })
}
}).catch((err) => err);
if (response instanceof Error)
throw new Utils.SearchError('Could not execute search', {
status_code: response?.response?.status || 0, message: response?.message
});
const res = '[null'+(Utils.getStringBetweenStrings(response.data, '"[null', ']"') || '') + ']';
const data = JSON.parse(res.replace(/\\"/g, '"').replace(/\\\\"/g, '\''));
if (data.length <= 1)
throw new Utils.SearchError('Got unexpected response from BatchExecute API', data);
if (!data[56]?.[1])
throw new Utils.SearchError(data[53]?.[1] || 'Unexpected response structure', data[53]?.[2] || data);
const items = data[56]?.[1]?.[0]?.[0]?.[1]?.[0];
if (!items)
throw new Utils.SearchError('Unexpected response structure', data);
const results = items.map((el) => {
const item = el[0]?.[0]?.['444383007']; // TODO: refactor this
if (!item?.[1])
return;
const image = item[1]?.[3];
const preview = item[1]?.[2];
const origin = item[1]?.[9];
if (image && preview && origin)
return {
id: item[1][1],
url: decodeURIComponent(JSON.parse('"' + image[0].replace(/"/g, '"') + '"')),
width: image[1],
height: image[2],
color: item[1][6],
preview: {
url: decodeURIComponent(JSON.parse('"' + preview[0].replace(/"/g, '"') + '"')),
width: preview[1],
height: preview[2]
},
origin: {
title: origin['2008'][1],
website: {
name: origin['2003'][12],
domain: origin['2003'][17],
url: origin['2003'][2]
}
}
}
}).filter((item) => item);
return results;
}
/**
* Retrieves news from Google.
*
* @param {string} [language] - two digits language code.
* @param {string} [region] - two digits region code.
*
* @returns {Promise.<{
* headline_stories: {
* title: string;
* url: string;
* image: string;
* published: string;
* by: string;
* }[]
* }>}
*/
async function getTopNews(language = 'en', region = 'US') {
const url = Constants.URLS.GOOGLE_NEWS + `topstories?tab=in&hl=${language.toLocaleLowerCase()}-${region.toLocaleUpperCase()}&gl=${region.toLocaleUpperCase()}&ceid=${region.toLocaleUpperCase()}:${language.toLocaleLowerCase()}`;
const response = await Axios.get(url,
{
headers: Utils.getHeaders({
mobile: true
})
}).catch((err) => err);
if (response instanceof Error) throw new Error('Could not retrieve top news: ' + response.message);
const $ = Cheerio.load(response.data);
const results = {
headline_stories: []
};
const headline_stories_publishers = $(Constants.SELECTORS.PUBLISHER).map((i, el) => $(el).text()).get();
const headline_stories_imgs = $(Constants.SELECTORS.STORY_IMG).map((i, el) => $(el).attr('src')).get();
const headline_stories_time = $(Constants.SELECTORS.STORY_TIME).map((i, el) => $(el).text()).get();
$(Constants.SELECTORS.STORY_TITLE).each((i, el) => {
const headline_stories_title = $(el).text();
const headline_stories_url = $(el).attr('href');
results.headline_stories.push({
title: headline_stories_title,
url: `${Constants.URLS.GOOGLE_NEWS}${headline_stories_url.slice(2)}`,
image: headline_stories_imgs[i],
published: headline_stories_time[i],
by: headline_stories_publishers[i]
});
});
return results;
}
module.exports = {
getTopNews,
search,
image
};