diff --git a/README.md b/README.md index 63bab55..a9128ca 100644 --- a/README.md +++ b/README.md @@ -193,4 +193,76 @@ Slide deck and technical report Model cards for stakeholders -Documentation for regulators \ No newline at end of file +Documentation for regulators +# Step 1: Clone this repo +git clone https://github.com/sumeyaaaa/Credit-Risk-Probability-Model +cd Credit-Risk-Probability-Model + + +# Step 2: Install dependencies +pip install -r requirements.txt + + +\`\`\` +""" +## βš™οΈ CI/CD – GitHub Actions + +- Implemented GitHub Actions workflow to validate notebook execution and maintain reproducibility. + +--- + +## πŸ“ˆ Key Skills Demonstrated + +- βœ… Git & GitHub Workflow +- βœ… EDA and data storytelling +- βœ… feauture engineering +- βœ… RFM metrics +- βœ… model training and evaluation + +--- + +## 🧰 Technologies + +- Python 3.10+ +- Pandas, Seaborn, Matplotlib +- Docker +- Git & GitHub + +# Repository structure + +β”œβ”€β”€ .github +β”‚ └── workflows +β”‚ └── ci.yml # Continuous Integration workflow configuration +β”œβ”€β”€ data +β”‚ β”œβ”€β”€ raw # Raw input data files (unprocessed) +β”‚ └── processed # Cleaned and transformed data ready for analysis +β”œβ”€β”€ notebook/ +β”‚ β”œβ”€β”€ task 1 and 2 +β”‚ β”‚ └── load_EDA.ipynb # Data loading and exploratory data analysis notebook +β”‚ β”œβ”€β”€ task-3 # Feature engineering and web scraping using google-play-scraper +β”‚ β”‚ └── feature-engineering.ipynb +β”‚ β”œβ”€β”€ task-4 # Customer segmentation and risk labeling (RFM metrics) +β”‚ β”‚ └── RFMmetrics.ipynb +β”‚ β”œβ”€β”€ task-5 # Model building, evaluation, and final insights +β”‚ β”‚ └── modeling.ipynb +β”‚ β”œβ”€β”€ task-6 # Sentiment labeling and thematic keyword extraction +β”‚ └── sentiment_analysis.ipynb +β”œβ”€β”€ src/ # Core Python modules containing business logic +β”‚ β”œβ”€β”€ __init__.py # Oracle database connection and utilities +β”‚ β”œβ”€β”€ load.py # Data loading and preprocessing functions +β”‚ β”œβ”€β”€ PreProcessing.py # Text preprocessing utilities (e.g., lemmatization) +β”‚ β”œβ”€β”€ RFMmetrics.py # Customer segmentation and clustering logic +β”‚ β”œβ”€β”€ saveFile.py # Sentiment classification model and related functions +β”‚ β”œβ”€β”€ visualization.py # Visualization utilities for model comparison and analysis +β”‚ β”œβ”€β”€ api/ # API backend code for deployment and data export +β”‚ β”‚ β”œβ”€β”€ main.py # FastAPI app main entrypoint +β”‚ β”‚ └── pydantic_models.py # Data validation schemas using Pydantic +β”‚ └── models/ # Saved machine learning models and related artifacts +β”‚ └── best_model.pkl # Serialized best-performing model +β”œβ”€β”€ test/ # Tests and SQL schema definitions for Oracle database +β”œβ”€β”€ docker-compose.yml # Docker Compose file for multi-container orchestration +β”œβ”€β”€ dockerfile # Docker image build instructions +β”œβ”€β”€ LICENSE # Project license file +β”œβ”€β”€ README.md # Project overview, methodology, and results summary +β”œβ”€β”€ .gitignore # Specifies files and folders to be excluded from Git +└── venv/ # Python virtual environment (excluded from version control) diff --git a/notebooks/task-5/modeling.ipynb b/notebooks/task-5/modeling.ipynb index 45b819d..a923379 100644 --- a/notebooks/task-5/modeling.ipynb +++ b/notebooks/task-5/modeling.ipynb @@ -24,7 +24,7 @@ "base_uri": "https://localhost:8080/" }, "id": "8GSjqYq-D3RE", - "outputId": "ae4059ba-e0d2-4e77-90ba-72fec6d88851" + "outputId": "ffbfa123-2d8e-44f8-feb0-30c578f491c0" }, "execution_count": 1, "outputs": [ @@ -42,13 +42,233 @@ } ] }, + { + "cell_type": "code", + "source": [ + "from google.colab import files\n", + "\n", + "uploaded = files.upload()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 73 + }, + "id": "RepXyAR_PO5h", + "outputId": "96456f4a-682b-4378-9d7c-a786cee44f6c" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving final_data.csv to final_data.csv\n" + ] + } + ] + }, { "cell_type": "code", "source": [ "import pandas as pd\n", "\n", "# Load the already-existing file\n", - "final_data = pd.read_csv('/content/final_data.csv')\n", + "final_data = pd.read_csv('final_data.csv')\n", "\n", "# Preview it\n", "final_data.head()\n" @@ -59,9 +279,9 @@ "height": 255 }, "id": "Mau_5ANc-jBT", - "outputId": "df814b5f-5fc8-4660-b3fc-76b0a6fc4eb6" + "outputId": "b1517fed-9b84-4fe9-886f-a7b52660abed" }, - "execution_count": 2, + "execution_count": 3, "outputs": [ { "output_type": "execute_result", @@ -106,7 +326,7 @@ ], "text/html": [ "\n", - "
\n", + "
\n", "
\n", "