Skip to content
This repository was archived by the owner on Nov 5, 2024. It is now read-only.

Commit 7e85061

Browse files
committed
Initial Commit
0 parents  commit 7e85061

88 files changed

Lines changed: 10266 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
name: Run Upgraider Experiment
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
model:
7+
description: "Model to use for fixing (gpt-3.5, gpt-4)"
8+
type: string
9+
default: "gpt-3.5"
10+
useModelOnly:
11+
description: "Run experiment with no external sources"
12+
type: boolean
13+
default: false
14+
useDoc:
15+
description: "Run experiment with references from Documentation/release notes"
16+
type: boolean
17+
default: true
18+
compareTo:
19+
description: "Run number of previous run to compare to (leave empty to skip comparison)"
20+
default: ""
21+
simthreshold:
22+
description: "Similarity threshold for retrieval"
23+
default: "0" # include all info
24+
debug_enabled:
25+
type: boolean
26+
description: "Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)"
27+
default: false
28+
jobs:
29+
setup:
30+
runs-on: ubuntu-latest
31+
outputs:
32+
libraries: "${{ steps.parse_libraries.outputs.libraries }}"
33+
model: "${{ github.event.inputs.model }}"
34+
useModelOnly: "${{ github.event.inputs.useModelOnly || false }}"
35+
useDoc: "${{ github.event.inputs.useDoc || true }}"
36+
threshold: "${{ github.event.inputs.simthreshold || 0 }}"
37+
steps:
38+
- uses: actions/checkout@v3
39+
40+
- uses: actions/setup-python@v4
41+
with:
42+
python-version: '3.10'
43+
44+
- run: |
45+
pip install -r requirements.txt
46+
python setup.py develop
47+
48+
- id: parse_libraries
49+
run: |
50+
libraries=$(python ${GITHUB_WORKSPACE}/src/benchmark/list_libraries.py)
51+
echo "got libraries $libraries"
52+
echo "libraries=$libraries" >> $GITHUB_OUTPUT
53+
54+
benchmark:
55+
needs:
56+
- setup
57+
runs-on: ubuntu-latest
58+
continue-on-error: true
59+
strategy:
60+
fail-fast: false
61+
matrix:
62+
library: ${{ fromJson(needs.setup.outputs.libraries) }}
63+
steps:
64+
- name: Checkout github repo (+ download lfs dependencies)
65+
uses: actions/checkout@v3
66+
with:
67+
lfs: true
68+
69+
- name: Pull LFS objects
70+
run: git lfs pull
71+
72+
- uses: actions/setup-python@v4
73+
with:
74+
python-version: '3.10'
75+
76+
- name: Install dependencies
77+
run: |
78+
pip install -r requirements.txt
79+
python setup.py develop
80+
81+
- name: Setup scratch venv
82+
run: |
83+
curr_dir=`pwd`
84+
SCRATCH_VENV="$curr_dir/../scratchvenv"
85+
echo "SCRATCH_VENV=$SCRATCH_VENV" >> $GITHUB_ENV
86+
mkdir $SCRATCH_VENV
87+
cd $SCRATCH_VENV
88+
python -m venv .venv
89+
90+
- name: Setup tmate session
91+
uses: mxschmitt/action-tmate@v3
92+
if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
93+
94+
- name: Run example update with no sources
95+
if: ${{ github.event.inputs.useModelOnly == 'true' }}
96+
env:
97+
OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
98+
OPENAI_ORG: "${{ secrets.OPENAI_ORG }}"
99+
GPT4_ENDPOINT: ${{ secrets.GPT4_ENDPOINT }}
100+
GPT4_AUTH_HEADERS: ${{ secrets.GPT4_AUTH_HEADERS }}
101+
run: |
102+
library_name=${{ matrix.library.name }}
103+
curr_dir=`pwd`
104+
outputdir="$curr_dir/results/$library_name/modelonly"
105+
mkdir -p $outputdir
106+
python src/upgraider/fix_lib_examples.py \
107+
--libpath ${{ matrix.library.path }} \
108+
--outputDir $outputdir \
109+
--dbsource modelonly \
110+
--threshold ${{ needs.setup.outputs.threshold }} \
111+
--model ${{ needs.setup.outputs.model }} \
112+
113+
- name: Run example update with doc sources
114+
if: ${{ needs.setup.outputs.useDoc == 'true' }}
115+
env:
116+
OPENAI_API_KEY: "${{ secrets.OPENAI_API_KEY }}"
117+
OPENAI_ORG: "${{ secrets.OPENAI_ORG }}"
118+
GPT4_ENDPOINT: ${{ secrets.GPT4_ENDPOINT }}
119+
GPT4_AUTH_HEADERS: ${{ secrets.GPT4_AUTH_HEADERS }}
120+
run: |
121+
library_name=${{ matrix.library.name }}
122+
curr_dir=`pwd`
123+
outputdir="$curr_dir/results/$library_name/doc"
124+
mkdir -p $outputdir
125+
python src/upgraider/fix_lib_examples.py \
126+
--libpath ${{ matrix.library.path }} \
127+
--outputDir $outputdir \
128+
--dbsource doc \
129+
--threshold ${{ needs.setup.outputs.threshold }} \
130+
--model ${{ needs.setup.outputs.model }} \
131+
132+
- name: Zip up results
133+
run: |
134+
zip -r results.zip results
135+
136+
- name: Upload artifacts
137+
uses: actions/upload-artifact@v3
138+
with:
139+
name: results-${{ matrix.library.name }}
140+
path: "results.zip"
141+
142+
combine_output:
143+
name: Combine output from all benchmarks
144+
needs:
145+
- benchmark
146+
runs-on: ubuntu-latest
147+
steps:
148+
- name: Download output zips
149+
uses: actions/download-artifact@v3
150+
151+
- name: Combine output zips
152+
run: |
153+
mkdir results
154+
for zip in results-*/results.zip
155+
do
156+
unzip -oq $zip
157+
done
158+
zip -r results.zip results
159+
- name: Upload combined output files
160+
uses: actions/upload-artifact@v3
161+
with:
162+
name: results-all
163+
path: results.zip
164+
165+
generate-report:
166+
needs:
167+
- combine_output
168+
runs-on: ubuntu-latest
169+
steps:
170+
- uses: actions/checkout@v3
171+
172+
- uses: actions/setup-python@v4
173+
with:
174+
python-version: '3.10'
175+
176+
- name: Install dependencies
177+
run: |
178+
pip install -r requirements.txt
179+
python setup.py develop
180+
181+
- name: Download artifacts for this run
182+
uses: actions/download-artifact@v3
183+
with:
184+
name: results-all
185+
path: results
186+
187+
- name: Download artifacts for comparison run
188+
if: ${{ github.event.inputs.compareTo != '' }}
189+
uses: dawidd6/action-download-artifact@v2
190+
with:
191+
run_number: ${{ github.event.inputs.compareTo }}
192+
name: results-all
193+
path: baseline
194+
195+
- name: Setup tmate session
196+
uses: mxschmitt/action-tmate@v3
197+
if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
198+
199+
- name: Generate report
200+
run: |
201+
cd results
202+
unzip results.zip
203+
cd ..
204+
if [ -d baseline ]; then
205+
cd baseline
206+
unzip results.zip
207+
cd ..
208+
python ${GITHUB_WORKSPACE}/src/benchmark/parse_reports.py --outputdir results/results --baselinedir baseline/results > $GITHUB_STEP_SUMMARY
209+
else
210+
python ${GITHUB_WORKSPACE}/src/benchmark/parse_reports.py --outputdir results/results > $GITHUB_STEP_SUMMARY
211+
fi

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
*.swp
2+
__pycache__
3+
.env
4+
.venv
5+
.tox
6+
.ipynb_checkpoints/
7+
.DS_Store
8+
src/soretrieval.egg-info/

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023 GitHub
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# UpgrAIder: Automatically Updating Deprecated API Usage through LLMs and documentation retrieval
2+
3+
The goal of this project is to update outdated code snippets (specifically those that use deprecated library APIs). The technique relies on the usage of a Large Language Model (hence the "AI" in the name), augmented with information retrieved from release notes. More details about the project can be found in [this presentation](https://github.com/githubnext/Upgraider/blob/main/Show-and-Tell/Nadi_ShowAndTell.pdf)
4+
5+
## Setup
6+
7+
- `git clone <this repo>`
8+
9+
- Install dependencies:
10+
11+
```
12+
python -m venv .venv
13+
source .venv/bin/activate
14+
pip install -r requirements
15+
python setup.py develop
16+
```
17+
18+
- Create environment variables
19+
- You will need an OpenAI key to run this project.
20+
- When running evaluation experiments, we use a separate virtual environment to install the specific version of the library we want to analyze. Create a virtual environment in a separate folder from this project and include its path in the `.env file` (`SCRATCH_VENV`)
21+
- Create a `.env` file to hold these environment variables:
22+
23+
```
24+
cat > .env <<EOL
25+
OPENAI_API_KEY=...
26+
OPENAI_ORG=...
27+
SCRATCH_VENV=<path to a folder that already has a venv we can activate>
28+
```
29+
30+
## Running
31+
32+
### Populating the DB
33+
34+
To populate the database with the information of the available release notes for each library, run `python src/upgraider/populate_doc_db.py`
35+
36+
Note that this is a one time step (unless you add libraries or release notes). The `libraries` folder contains information for all current target libraries, including the code examples we evaluate on. Each library folder contains a `library.json` file that specifies the base version, which is the library version available around the training date of the model (~ May 2022) and the current version of the library. The base version is useful to know which release notes to consider (those after that date) while the current version is useful since this is the one we want to use for our experiments.
37+
38+
Right now, each library folder already contains the release notes between the base and current library version. These were manually retrieved; in the future, it would be useful to create a script that automatically retrieves release notes for a given library.
39+
40+
The above script looks for sections with certain keywords related to APIs and/or deprecation. It then creates a DB entry which has an embedding for the content of each item in those sections.
41+
42+
### Updating a single code example
43+
44+
`src/upgraider/fix_code_examples.py` is the file responsible for this. Run `python upgraider/fix_lib_examples.py --help` to see the required command lines. To run a single example, make sure to specify `--examplefile`; otherwise, it will run on all the examples available for that library.
45+
46+
### Running a full experiment
47+
48+
Run `python src/upgraider/run_experiment.py`. This will attempt to run upgraider on *all* code examples avaiable for *all* libraries in the `libraries` folder. The output data and reports will be written to the `output` folder.
49+
50+
### Using Actions to run experiments
51+
52+
The `run_experiment` workflow allows you to run a full experiment on the available libraries. It produces a markdown report of the results. Note that you need to set the required environment variables (i.e., API keys etc) as repository secrets.
53+
54+
### Running Tests
55+
56+
`python -m pytest`
57+
58+
## Extra Functionality
59+
60+
Experimental/not current used any more: To find differences between two versions of an API, you can run
61+
62+
`python src/apiexploration/run_api_diff.py`
63+
64+
which will use the library version info in the `libraries` folders.

Show-and-Tell/Nadi_ShowAndTell.pdf

4.11 MB
Binary file not shown.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import networkx as nx
2+
import numpy as np
3+
4+
A = np.matrix([[0, 1, 1, 0, 0], [1, 0, 1, 1, 0], [1, 1, 0, 1, 1], [0, 1, 1, 0, 1], [0, 0, 1, 1, 0]])
5+
G = nx.from_numpy_matrix(A)
6+
print(G.edges)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import networkx as nx
2+
import numpy as np
3+
4+
G = nx.Graph()
5+
G.add_nodes_from([1, 2, 3, 4, 5])
6+
G.add_edges_from([(1, 2), (1, 3), (2, 3), (2, 4), (3, 4), (3, 5), (4, 5)])
7+
matrix = nx.to_numpy_matrix(G)
8+
print(matrix)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import networkx as nx
2+
3+
SG=nx.OrderedGraph()
4+
SG.add_nodes_from("HelloWorld")
5+
SG.add_edges_from([(0, 1), (1, 2), (3,4), (6,8)])
6+
print(SG)

libraries/networkx/library.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"name": "networkx",
3+
"ghurl": "https://github.com/networkx/networkx",
4+
"baseversion": "2.8.2",
5+
"currentversion": "3.0"
6+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
NetworkX 2.8.3
2+
==============
3+
4+
Release date: 4 June 2022
5+
6+
Supports Python 3.8, 3.9, and 3.10.
7+
8+
NetworkX is a Python package for the creation, manipulation, and study of the
9+
structure, dynamics, and functions of complex networks.
10+
11+
For more information, please visit our `website <https://networkx.org/>`_
12+
and our :ref:`gallery of examples <examples_gallery>`.
13+
Please send comments and questions to the `networkx-discuss mailing list
14+
<http://groups.google.com/group/networkx-discuss>`_.
15+
16+
Highlights
17+
----------
18+
19+
Minor documentation and bug fixes.
20+
21+
Merged PRs
22+
----------
23+
24+
- Bump release version
25+
- Update release process
26+
- added example to closeness.py (#5645)
27+
- Extract valid kwds from the function signature for draw_networkx_* (#5660)
28+
- Error out when pydot fails to correctly parse node names (#5667)
29+
- Remove redundant py2 numeric conversions (#5661)
30+
- Correcting a typo in the references (#5677)
31+
- Add workaround for pytest failures on 3.11-beta2 (#5680)
32+
- Moved random_spanning_tree to public API (#5656)
33+
- More tests for clustering (upstreaming from graphblas-algorithms) (#5673)
34+
- Remove unused logic in nonisomorphic_trees (#5682)
35+
- equitable_coloring: Get lazily first item instead of creating whole list (#5668)
36+
- Update subgraph views tests to pass with out of order execution (#5683)
37+
- Use isort with pre-commit to enforce import guidelines (#5659)
38+
- ignore isort commit from git blame (#5684)
39+
- Another catch by pytest-randomly (#5685)
40+
- Remove unused file from utils.test (#5687)
41+
- Update release requirements (#5690)
42+
- Update developer requirements (#5689)
43+
- Fix old release notes
44+
45+
Contributors
46+
------------
47+
48+
- Ross Barnowski
49+
- Jon Crall
50+
- Lukong123
51+
- Jarrod Millman
52+
- RATCOinc
53+
- Matt Schwennesen
54+
- Mridul Seth
55+
- Matus Valo
56+
- Erik Welch

0 commit comments

Comments
 (0)