Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests(pypi): add tests for fetching package from pypi #39

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions internal/helper/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,18 @@ import (
)

type Client struct {
HTTP *http.Client
HTTP *http.Client
BaseURL string
}

// NewClient
// todo: complete proper client settings
func NewClient() *Client {
func NewClient(baseURL string) *Client {
return &Client{
HTTP: &http.Client{
Timeout: time.Second * 5,
},
BaseURL: baseURL,
}
}

Expand All @@ -42,3 +44,13 @@ func (c *Client) CheckURL(url string) bool {

return r.StatusCode == http.StatusOK
}

// Get makes a GET request to the specified url and returns the response.
func (c *Client) Get(url string) (*http.Response, error) {
r, err := c.HTTP.Get(url)
if err != nil {
return nil, err
}

return r, nil
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is good for now. Thanks!

4 changes: 3 additions & 1 deletion pip/pipenv/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const (
placeholderPkgName = "{PACKAGE}"
packageSrcLocation = "/src/"
packageSiteLocation = "/site-packages"
pypiURL = "https://pypi.org"
)

var (
Expand Down Expand Up @@ -113,7 +114,8 @@ func (m *PipEnv) ListUsedModules(path string) ([]meta.Package, error) {
return m.allModules, errFailedToConvertModules
}

decoder := worker.NewMetadataDecoder(m.GetPackageDetails)
pypiDataFactory := worker.NewPypiPackageDataFactory(helper.NewClient(pypiURL))
decoder := worker.NewMetadataDecoder(m.GetPackageDetails, pypiDataFactory)
metainfo, err := decoder.ConvertMetadataToModules(m.pkgs, &m.allModules)
if err != nil {
return m.allModules, err
Expand Down
1 change: 1 addition & 0 deletions pip/testdata/requests_pypi_data.json

Large diffs are not rendered by default.

38 changes: 20 additions & 18 deletions pip/worker/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,32 @@ type GetPackageDetailsFunc = func(PackageName string) (string, error)

type MetadataDecoder struct {
getPkgDetailsFunc GetPackageDetailsFunc
pf PypiPackageDataFactory
}

// New Metadata Decoder ...
func NewMetadataDecoder(pkgDetailsFunc GetPackageDetailsFunc) *MetadataDecoder {
// NewMetadataDecoder ...
func NewMetadataDecoder(pkgDetailsFunc GetPackageDetailsFunc, pf PypiPackageDataFactory) *MetadataDecoder {
return &MetadataDecoder{
getPkgDetailsFunc: pkgDetailsFunc,
pf: pf,
}
}

func SetMetadataValues(matadata *Metadata, datamap map[string]string) {
matadata.Name = datamap[KeyName]
matadata.Version = datamap[KeyVersion]
matadata.Description = datamap[KeySummary]
matadata.HomePage = datamap[KeyHomePage]
matadata.Author = datamap[KeyAuthor]
matadata.AuthorEmail = datamap[KeyAuthorEmail]
matadata.License = datamap[KeyLicense]
matadata.Location = datamap[KeyLocation]
func SetMetadataValues(metadata *Metadata, datamap map[string]string) {
metadata.Name = datamap[KeyName]
metadata.Version = datamap[KeyVersion]
metadata.Description = datamap[KeySummary]
metadata.HomePage = datamap[KeyHomePage]
metadata.Author = datamap[KeyAuthor]
metadata.AuthorEmail = datamap[KeyAuthorEmail]
metadata.License = datamap[KeyLicense]
metadata.Location = datamap[KeyLocation]

// Parsing "Requires"
if len(datamap[KeyRequires]) != 0 {
matadata.Modules = strings.Split(datamap[KeyRequires], ",")
for i, v := range matadata.Modules {
matadata.Modules[i] = strings.TrimSpace(v)
metadata.Modules = strings.Split(datamap[KeyRequires], ",")
for i, v := range metadata.Modules {
metadata.Modules[i] = strings.TrimSpace(v)
}
}
}
Expand Down Expand Up @@ -144,7 +146,7 @@ func (d *MetadataDecoder) BuildModule(metadata Metadata) meta.Package {
module.PackageURL = metadata.HomePage
}

pypiData, err := GetPackageDataFromPyPi(metadata.PackageJSONURL)
pypiData, err := d.pf.GetPackageData(metadata.PackageJSONURL)
if err != nil {
log.Warnf("Unable to get `%s` package details from pypi.org", metadata.Name)
if (len(metadata.HomePage) > 0) && (metadata.HomePage != "None") {
Expand All @@ -154,7 +156,7 @@ func (d *MetadataDecoder) BuildModule(metadata Metadata) meta.Package {

// Prepare supplier contact
if len(metadata.Author) > 0 && metadata.Author == "None" {
metadata.Author, metadata.AuthorEmail = GetMaintenerDataFromPyPiPackageData(pypiData)
metadata.Author, metadata.AuthorEmail = d.pf.GetMaintainerData(pypiData)
}

contactType := meta.Person
Expand All @@ -169,11 +171,11 @@ func (d *MetadataDecoder) BuildModule(metadata Metadata) meta.Package {
}

// Prepare checksum
checksum := GetChecksumeFromPyPiPackageData(pypiData, metadata)
checksum := d.pf.GetChecksum(pypiData, metadata)
module.Checksum = *checksum

// Prepare download location
downloadURL := GetDownloadLocationFromPyPiPackageData(pypiData, metadata)
downloadURL := d.pf.GetDownloadLocationFromPyPiPackageData(pypiData, metadata)
module.PackageDownloadLocation = downloadURL
if len(downloadURL) == 0 {
if metadata.Root {
Expand Down
143 changes: 75 additions & 68 deletions pip/worker/pypi.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ package worker
import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"reflect"
"strings"

"github.com/opensbom-generator/parsers/internal/helper"
"github.com/opensbom-generator/parsers/meta"
)

Expand Down Expand Up @@ -82,32 +84,36 @@ var HashAlgoPickOrder []meta.HashAlgorithm = []meta.HashAlgorithm{
meta.HashAlgoMD2,
}

func makeGetRequest(packageJSONURL string) (*http.Response, error) {
url := "https://" + packageJSONURL

request, _ := http.NewRequest("GET", url, nil)
request.Header.Set("Accept", "application/json")
type pypiPackageDataFactory struct {
client *helper.Client
}

client := &http.Client{}
response, err := client.Do(request)
if err != nil {
return nil, err
}
type PypiPackageDataFactory interface {
GetPackageData(packageJSONURL string) (PypiPackageData, error)
Comment on lines +88 to +93
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to understand why we need a new interface type. Could you explain?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The interface was introduced so that all receiver functions could be mocked in tests.
For instance, BuildModule fetches package data using GetPackageData and for writing unit tests for BuildModule it might be necessary to mock the call to GetPackageData.

GetMaintainerData(pkgData PypiPackageData) (string, string)
GetChecksum(pkgData PypiPackageData, metadata Metadata) *meta.Checksum
GetDownloadLocationFromPyPiPackageData(pkgData PypiPackageData, metadata Metadata) string
}

if response.StatusCode != http.StatusOK {
return nil, errorPypiCouldNotFetchPkgData
// NewPypiPackageDataFactory ...
func NewPypiPackageDataFactory(client *helper.Client) PypiPackageDataFactory {
return &pypiPackageDataFactory{
client: client,
}

return response, err
}

func GetPackageDataFromPyPi(packageJSONURL string) (PypiPackageData, error) {
func (pf *pypiPackageDataFactory) GetPackageData(packageJSONURL string) (PypiPackageData, error) {
packageInfo := PypiPackageData{}

response, err := makeGetRequest(packageJSONURL)
packageJSONURL = strings.Replace(packageJSONURL, "pypi.org", "", 1)
response, err := pf.client.HTTP.Get(fmt.Sprintf("%s%s", pf.client.BaseURL, packageJSONURL))
if err != nil {
return packageInfo, err
}

if response.StatusCode != http.StatusOK {
return packageInfo, errorPypiCouldNotFetchPkgData
}
defer response.Body.Close()

jsondata, _ := io.ReadAll(response.Body)
Expand All @@ -119,7 +125,7 @@ func GetPackageDataFromPyPi(packageJSONURL string) (PypiPackageData, error) {
return packageInfo, nil
}

func GetMaintenerDataFromPyPiPackageData(pkgData PypiPackageData) (string, string) {
func (pf *pypiPackageDataFactory) GetMaintainerData(pkgData PypiPackageData) (string, string) {
var name string
var email string
if len(pkgData.Info.Maintainer) > 0 {
Expand All @@ -131,27 +137,54 @@ func GetMaintenerDataFromPyPiPackageData(pkgData PypiPackageData) (string, strin
return name, email
}

func GetHighestOrderHashData(digests DigestTypes) (meta.HashAlgorithm, string) {
var algoType meta.HashAlgorithm
var digestValue string
func (pf *pypiPackageDataFactory) GetChecksum(pkgData PypiPackageData, metadata Metadata) *meta.Checksum {
checksum := meta.Checksum{
Algorithm: meta.HashAlgoSHA1,
Content: []byte(pkgData.Info.Name),
}

v := reflect.ValueOf(digests)
for _, algo := range HashAlgoPickOrder {
f := v.FieldByName(string(algo))
if f.IsValid() {
algoType = algo
digestValue = f.String()
return algoType, digestValue
for _, packageDistInfo := range pkgData.Urls {
distInfo, status := getPackageBDistWheelInfo(packageDistInfo, metadata.Generator, metadata.Tag, metadata.CPVersion)
if status {
algo, value := getHighestOrderHashData(distInfo.Digests)
checksum.Algorithm = algo
checksum.Value = value
return &checksum
}

distInfo, status = getPackageSDistInfo(packageDistInfo, "sdist")
if status {
algo, value := getHighestOrderHashData(distInfo.Digests)
checksum.Algorithm = algo
checksum.Value = value
return &checksum
}
}

return algoType, digestValue
return &checksum
}

func (pf *pypiPackageDataFactory) GetDownloadLocationFromPyPiPackageData(pkgData PypiPackageData, metadata Metadata) string {
for _, packageDistInfo := range pkgData.Urls {
distInfo, status := getPackageBDistWheelInfo(packageDistInfo, metadata.Generator, metadata.Tag, metadata.CPVersion)
if status {
return distInfo.URL
}

distInfo, status = getPackageSDistInfo(packageDistInfo, "sdist")
if status {
return distInfo.URL
}
}

return ""
}

func GetPackageBDistWheelInfo(distInfo PypiPackageDistInfo, generator string, tag string, cpversion string) (PypiPackageDistInfo, bool) {
func getPackageBDistWheelInfo(distInfo PypiPackageDistInfo, generator string,
tag string, cpVersion string) (PypiPackageDistInfo, bool) {
PackageType := strings.EqualFold(distInfo.PackageType, generator)
Tag := strings.Contains(strings.ToLower(distInfo.Filename), strings.ToLower(tag))
CPVersion := strings.EqualFold(distInfo.PythonVersion, cpversion)
CPVersion := strings.EqualFold(distInfo.PythonVersion, cpVersion)
Py2Py3 := strings.Contains(strings.ToLower("py2.py3"), strings.ToLower(distInfo.PythonVersion))

status := false
Expand All @@ -163,7 +196,7 @@ func GetPackageBDistWheelInfo(distInfo PypiPackageDistInfo, generator string, ta
return distInfo, status
}

func GetPackageSDistInfo(distInfo PypiPackageDistInfo, generator string) (PypiPackageDistInfo, bool) {
func getPackageSDistInfo(distInfo PypiPackageDistInfo, generator string) (PypiPackageDistInfo, bool) {
PackageType := strings.EqualFold(distInfo.PackageType, generator)
Source := strings.EqualFold(distInfo.PythonVersion, "source")

Expand All @@ -176,45 +209,19 @@ func GetPackageSDistInfo(distInfo PypiPackageDistInfo, generator string) (PypiPa
return distInfo, status
}

func GetChecksumeFromPyPiPackageData(pkgData PypiPackageData, metadata Metadata) *meta.Checksum {
checksum := meta.Checksum{
Algorithm: meta.HashAlgoSHA1,
Content: []byte(pkgData.Info.Name),
}

for _, packageDistInfo := range pkgData.Urls {
distInfo, status := GetPackageBDistWheelInfo(packageDistInfo, metadata.Generator, metadata.Tag, metadata.CPVersion)
if status {
algo, value := GetHighestOrderHashData(distInfo.Digests)
checksum.Algorithm = algo
checksum.Value = value
return &checksum
}

distInfo, status = GetPackageSDistInfo(packageDistInfo, "sdist")
if status {
algo, value := GetHighestOrderHashData(distInfo.Digests)
checksum.Algorithm = algo
checksum.Value = value
return &checksum
}
}

return &checksum
}

func GetDownloadLocationFromPyPiPackageData(pkgData PypiPackageData, metadata Metadata) string {
for _, packageDistInfo := range pkgData.Urls {
distInfo, status := GetPackageBDistWheelInfo(packageDistInfo, metadata.Generator, metadata.Tag, metadata.CPVersion)
if status {
return distInfo.URL
}
func getHighestOrderHashData(digests DigestTypes) (meta.HashAlgorithm, string) {
var algoType meta.HashAlgorithm
var digestValue string

distInfo, status = GetPackageSDistInfo(packageDistInfo, "sdist")
if status {
return distInfo.URL
v := reflect.ValueOf(digests)
for _, algo := range HashAlgoPickOrder {
f := v.FieldByName(string(algo))
if f.IsValid() {
algoType = algo
digestValue = f.String()
return algoType, digestValue
}
}

return ""
return algoType, digestValue
}
48 changes: 48 additions & 0 deletions pip/worker/pypi_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package worker

import (
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"

"github.com/opensbom-generator/parsers/internal/helper"
"github.com/stretchr/testify/require"
)

func TestGetPackageDataFromPyPi(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch strings.TrimSpace(r.URL.Path) {
case "/pypi/requests/jso":
byteData, err := os.ReadFile("../testdata/requests_pypi_data.json")
if err != nil {
panic(err)
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
w.Write(byteData)
default:
http.NotFoundHandler().ServeHTTP(w, r)
}
}))
defer server.Close()

for name, tc := range map[string]struct {
packageJSONUrl string
expectedErr error
}{
"valid package url": {
packageJSONUrl: "/pypi/requests/jso",
expectedErr: nil,
},
} {
t.Run(name, func(t *testing.T) {
mockClient := helper.NewClient(server.URL)
factory := NewPypiPackageDataFactory(mockClient)
packageInfo, err := factory.GetPackageData(tc.packageJSONUrl)
require.ErrorIs(t, tc.expectedErr, err)
require.NotNil(t, packageInfo)
})
}
}