From b54566bfeff37e220bf24145124d7487173a3168 Mon Sep 17 00:00:00 2001 From: seyedjavad rafiee Date: Mon, 22 Sep 2025 15:44:44 +0330 Subject: [PATCH] Added Cutom-search CLI script (#460) --- Custom-search CLI/output.csv | 1 + Custom-search CLI/readme.md | 41 +++++++++++++ Custom-search CLI/scraper.py | 101 +++++++++++++++++++++++++++++++++ Custom-search CLI/setting.json | 4 ++ README.md | 1 + 5 files changed, 148 insertions(+) create mode 100644 Custom-search CLI/output.csv create mode 100644 Custom-search CLI/readme.md create mode 100644 Custom-search CLI/scraper.py create mode 100644 Custom-search CLI/setting.json diff --git a/Custom-search CLI/output.csv b/Custom-search CLI/output.csv new file mode 100644 index 0000000..de7ad94 --- /dev/null +++ b/Custom-search CLI/output.csv @@ -0,0 +1 @@ +#,Title,Link \ No newline at end of file diff --git a/Custom-search CLI/readme.md b/Custom-search CLI/readme.md new file mode 100644 index 0000000..edebe7b --- /dev/null +++ b/Custom-search CLI/readme.md @@ -0,0 +1,41 @@ +# Custom-search CLI +A simple Python script that uses the **Google Custom Search API** to fetch search results and export them into a CSV file. + + +## Requirements +- Python 3.8+ +- A Google API key +- A Google Custom Search Engine (CX) ID +- Install dependencies: + ```bash + pip install requests + pip install beautifulsoup4 + pip install python-csv + pip install argparse + ``` + +## Setup +1. Get a Google API key from [Google Cloud Console](https://console.cloud.google.com/) +2. Create a Custom Search Engine (CX) at [Google CSE](https://cse.google.com/cse/all) +3. Run the script with your API key to create setting.json: + + python main.py -sq [SEARCH_QUERY] --add_api_key [YOUR_API_KEY] + +## Usage +Search with query: +```bash +python scraper.py -sq "github" +``` +Fetch multiple pages (10 results per page): +```bash +python scraper.py -sq "github" --pages 3 +``` +## Output +- Results are saved in output.csv in the following columns: + +\# , Title , Link + +> [!NOTE]
+> Free quota: 100 queries/day (10 results per query).
+> If `setting.json` is missing or doesn’t have an API key, use `--add_api_key`. +--- \ No newline at end of file diff --git a/Custom-search CLI/scraper.py b/Custom-search CLI/scraper.py new file mode 100644 index 0000000..60571ab --- /dev/null +++ b/Custom-search CLI/scraper.py @@ -0,0 +1,101 @@ +import requests +import json +import os +import csv +import argparse +from typing import List, Dict, Tuple, Any + +SETTING_ROUTE = 'setting.json' +DEFAULT_CX = 'b0264518c3d104eda' + + +def load_settings(api_key: str | None = None) -> Dict[str, str]: + """ + Load API settings from setting.json, or create it if missing. + """ + if os.path.exists(SETTING_ROUTE): + with open(SETTING_ROUTE, 'r', encoding="utf-8") as f: + settings = json.load(f) + + if not settings.get("API_KEY"): + if api_key: + settings["API_KEY"] = api_key + with open(SETTING_ROUTE, 'w', encoding="utf-8") as f: + json.dump(settings, f, indent=4) + else: + raise ValueError("API_KEY is missing in setting.json. Use --add_api_key to add one.") + else: + if not api_key: + raise FileNotFoundError("No setting.json found. Please run with --add_api_key to create one.") + settings = {"API_KEY": api_key, "CX": DEFAULT_CX} + with open(SETTING_ROUTE, 'w', encoding="utf-8") as f: + json.dump(settings, f, indent=4) + + return settings + + +def scrape(search_query: str, api_key: str, cx: str, pages: int = 1) -> Tuple[List[Dict[str, Any]], float]: + """ + Perform a Google Custom Search and return results. + """ + results = [] + search_time = 0.0 + + for page in range(pages): + start = page * 10 + 1 + url = ( + f"https://www.googleapis.com/customsearch/v1" + f"?key={api_key}&q={search_query}&cx={cx}&start={start}" + ) + + response = requests.get(url) + if response.status_code != 200: + raise RuntimeError(f"API request failed: {response.status_code} {response.text}") + + data = response.json() + + if "items" not in data: + print("No results found or error:", data) + break + + results.extend(data["items"]) + search_time += float(data['searchInformation']['searchTime']) + + return results, search_time + + +def export_to_csv(results: List[Dict[str, Any]], filename: str = "output.csv") -> None: + """ + Export search results to a CSV file. + """ + rows = [[i + 1, item.get("title", ""), item.get("link", "")] for i, item in enumerate(results)] + + with open(filename, "w", encoding="utf-8", newline="") as f: + writer = csv.writer(f) + writer.writerow(["#", "Title", "Link"]) + writer.writerows(rows) + + print(f"Exported {len(results)} results to {filename}") + + +def main(): + parser = argparse.ArgumentParser(description="Google Custom Search scraper") + parser.add_argument("-sq", "--search_query", required=True, help="Search query to search for") + parser.add_argument("--add_api_key", type=str, help="Your Google API key") + parser.add_argument("--pages", type=int, default=1, help="Number of pages of results to fetch") + args = parser.parse_args() + + settings = load_settings(args.add_api_key) + api_key = settings["API_KEY"] + cx = settings["CX"] + + print(f"Using API key: {api_key}") + + results, elapsed_time = scrape(args.search_query, api_key, cx, args.pages) + + export_to_csv(results) + print(f"Completed in {elapsed_time:.2f} seconds.") + + +if __name__ == "__main__": + main() diff --git a/Custom-search CLI/setting.json b/Custom-search CLI/setting.json new file mode 100644 index 0000000..abc5373 --- /dev/null +++ b/Custom-search CLI/setting.json @@ -0,0 +1,4 @@ +{ + "API_KEY": "", + "CX": "" +} \ No newline at end of file diff --git a/README.md b/README.md index 6f9cbe5..99942f8 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,7 @@ More information on contributing and the general code of conduct for discussion | CSV to Excel | [CSV to Excel](https://github.com/DhanushNehru/Python-Scripts/tree/main/CSV%20to%20Excel) | A Python script to convert a CSV to an Excel file. | | CSV_TO_NDJSON | [CSV to Excel](https://github.com/DhanushNehru/Python-Scripts/tree/main/CSV_TO_NDJSON) | A Python script to convert a CSV to an NDJSON files file. | | Currency Script | [Currency Script](https://github.com/DhanushNehru/Python-Scripts/tree/main/Currency%20Script) | A Python script to convert the currency of one country to that of another. | +| Custom-search CLI | [Custom-search CLI](https://github.com/DhanushNehru/Python-Scripts/tree/main/Custom-search%20CLI) | Python script to search a query through internet and save the results in a .csv file. | | Digital Clock | [Digital Clock](https://github.com/DhanushNehru/Python-Scripts/tree/main/Digital%20Clock) | A Python script to preview a digital clock in the terminal. | | Display Popup Window | [Display Popup Window](https://github.com/DhanushNehru/Python-Scripts/tree/main/Display%20Popup%20Window) | A Python script to preview a GUI interface to the user. | | Distance Calculator | [Distance Calculator](https://github.com/Mathdallas-code/Python-Scripts/tree/main/Distance%20Calculator) | A Python script to calculate the distance between two points.