Source code for grub.examples.pypi

"""Searching for available pypi names, with word2vec query expansion"""

import re
from functools import lru_cache, cached_property

import numpy as np

from py2store.slib.s_zipfile import FileStreamsOfZip
from py2store.base import Stream
from py2store import groupby


def line_to_raw_word_vec(line):
    word, vec = line.split(maxsplit=1)
    return word.decode(), vec


[docs]class WordVecStream(Stream): _obj_of_data = line_to_raw_word_vec
[docs]class StreamsOfZip(FileStreamsOfZip): def _obj_of_data(self, data): return line_to_raw_word_vec(data)
def word_and_vecs(fp): # fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore') # consume the first line (n_lines, n_dims) not yielded n_lines, n_dims = map(int, fp.readline().decode().split()) for line in fp: tok, *vec = line.decode().rstrip().split(' ') yield tok, tuple(map(float, vec)) @lru_cache def get_html(): simple_index_url = 'https://pypi.org/simple' try: from graze.base import graze age_threshold = 7 * 24 * 60 * 60 # one week return graze(simple_index_url, max_age=age_threshold) except ModuleNotFoundError: from urllib.request import urlopen with urlopen(simple_index_url) as f: return f.read() _parse_names_from_html = re.compile('<a href="/simple/.+/">(.+)</a>').findall def get_distributions(html=None): html = html or get_html() return _parse_names_from_html(html.decode()) def _get_distributions_old_version_using_xml(html=None): from xml.etree import ElementTree from io import BytesIO html = html or get_html() tree = ElementTree.parse(BytesIO(html)) return [a.text for a in tree.iter('a')] # from py2store import lazyprop class Pypi: @cached_property def pypi_words(self): return set(get_distributions()) def is_available(self, word): return word not in self.pypi_words def available_and_not(self, words): return groupby(words, key=self.is_available) def live_is_available(self, pkg_name): """Check if a package name is available, but live (directly on pypi, not a cache)""" import urllib try: with urllib.request.urlopen(f'https://pypi.org/project/{pkg_name}') as u: return False except urllib.error.HTTPError as e: return True # if url is invalid, package exists