phind major improvement ( stream )

removed timeout error, added data streaming. Soon integration into gpt clone
2023-04-20 10:22:44 +01:00 · 2023-04-20 10:22:44 +01:00 · b2459a5897
commit b2459a5897
parent b31d053191
3 changed files with 141 additions and 28 deletions
--- a/README.md
+++ b/README.md
@ -102,16 +102,27 @@ print(response.completion.choices[0].text)
 ### Example: `phind` (use like openai pypi package) <a name="example-phind"></a>
 ```python
 # HELP WANTED: tls_client does not accept stream and timeout gets hit with long responses
 import phind
-prompt = 'hello world'
+prompt = 'who won the quatar world cup'
 # help needed: not getting newlines from the stream, please submit a PR if you know how to fix this
 # stream completion
 for result in phind.StreamingCompletion.create(
    model  = 'gpt-4',
    prompt = prompt,
    results     = phind.Search.create(prompt, actualSearch = True), # create search (set actualSearch to False to disable internet)
    creative    = False,
    detailed    = False,
    codeContext = ''):  # up to 3000 chars of code
    print(result.completion.choices[0].text, end='', flush=True)
 # normal completion
 result = phind.Completion.create(
    model  = 'gpt-4',
    prompt = prompt,
-    results     = phind.Search.create(prompt, actualSearch = False), # create search (set actualSearch to False to disable internet)
+    results     = phind.Search.create(prompt, actualSearch = True), # create search (set actualSearch to False to disable internet)
    creative    = False,
    detailed    = False,
    codeContext = '') # up to 3000 chars of code
--- a/phind/init.py
+++ b/phind/init.py
@ -1,24 +1,11 @@
 from urllib.parse import quote
 from tls_client   import Session
 from time         import time
 from datetime     import datetime
 from queue        import Queue, Empty
 from threading    import Thread
 from re           import findall
-client         = Session(client_identifier='chrome110')
+from curl_cffi.requests import post
 client.headers = {
    'authority': 'www.phind.com',
    'accept': '*/*',
    'accept-language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3',
    'content-type': 'application/json',
    'origin': 'https://www.phind.com',
    'referer': 'https://www.phind.com/search',
    'sec-ch-ua': '"Chromium";v="110", "Google Chrome";v="110", "Not:A-Brand";v="99"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"macOS"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
 }
 class PhindResponse:
@ -81,11 +68,19 @@ class Search:
                }
            }
-        return client.post('https://www.phind.com/api/bing/search', json = { 
+        headers = {
            'authority'    : 'www.phind.com',
            'origin'       : 'https://www.phind.com',
            'referer'      : 'https://www.phind.com/search',
            'user-agent'   : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
        }
        return post('https://www.phind.com/api/bing/search', headers = headers, json = { 
            'q': prompt,
            'userRankList': {},
            'browserLanguage': language}).json()['rawBingResults']
 class Completion:
    def create(
        model = 'gpt-4', 
@ -121,8 +116,15 @@ class Completion:
            }
        }
        headers = {
            'authority'    : 'www.phind.com',
            'origin'       : 'https://www.phind.com',
            'referer'      : f'https://www.phind.com/search?q={quote(prompt)}&c=&source=searchbox&init=true',
            'user-agent'   : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
        }
        completion = ''
-        response   = client.post('https://www.phind.com/api/infer/answer', json=json_data, timeout_seconds=200)
+        response   = post('https://www.phind.com/api/infer/answer', headers = headers, json = json_data, timeout=99999)
        for line in response.text.split('\r\n\r\n'):
            completion += (line.replace('data: ', ''))
@ -143,3 +145,88 @@ class Completion:
                'total_tokens'      : len(prompt) + len(completion)
            }
        })
 class StreamingCompletion:
    message_queue    = Queue()
    stream_completed = False
    def request(model, prompt, results, creative, detailed, codeContext, language) -> None:
        models = {
            'gpt-4' : 'expert',
            'gpt-3.5-turbo' : 'intermediate',
            'gpt-3.5': 'intermediate',
        }
        json_data = {
            'question'    : prompt,
            'bingResults' : results,
            'codeContext' : codeContext,
            'options': {
                'skill'   : models[model],
                'date'    : datetime.now().strftime("%d/%m/%Y"),
                'language': language,
                'detailed': detailed,
                'creative': creative
            }
        }
        stream_req = post('https://www.phind.com/api/infer/answer', json=json_data, timeout=99999,
            content_callback = StreamingCompletion.handle_stream_response,
            headers = {
                'authority'    : 'www.phind.com',
                'origin'       : 'https://www.phind.com',
                'referer'      : f'https://www.phind.com/search?q={quote(prompt)}&c=&source=searchbox&init=true',
                'user-agent'   : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
        })
        StreamingCompletion.stream_completed = True
    @staticmethod
    def create(
        model       : str = 'gpt-4', 
        prompt      : str = '', 
        results     : dict = None, 
        creative    : bool = False, 
        detailed    : bool = False, 
        codeContext : str = '',
        language    : str = 'en'):
        if results is None:
            results = Search.create(prompt, actualSearch = True)
        if len(codeContext) > 2999:
            raise ValueError('codeContext must be less than 3000 characters')
        Thread(target = StreamingCompletion.request, args = [
            model, prompt, results, creative, detailed, codeContext, language]).start()
        while StreamingCompletion.stream_completed != True or not StreamingCompletion.message_queue.empty():
            try:
                message = StreamingCompletion.message_queue.get(timeout=0)
                for token in findall(r'(?<=data: )(.+?)(?=\r\n\r\n)', message.decode()):
                    yield PhindResponse({
                        'id'     : f'cmpl-1337-{int(time())}', 
                        'object' : 'text_completion', 
                        'created': int(time()), 
                        'model'  : model, 
                        'choices': [{
                                'text'          : token, 
                                'index'         : 0, 
                                'logprobs'      : None, 
                                'finish_reason' : 'stop'
                        }], 
                        'usage': {
                            'prompt_tokens'     : len(prompt), 
                            'completion_tokens' : len(token), 
                            'total_tokens'      : len(prompt) + len(token)
                        }
                    })
            except Empty:
                pass
    @staticmethod
    def handle_stream_response(response):
        StreamingCompletion.message_queue.put(response)
--- a/testing/phind_test.py
+++ b/testing/phind_test.py
@ -2,6 +2,7 @@ import phind
 prompt = 'hello world'
 # normal completion
 result = phind.Completion.create(
    model  = 'gpt-4',
    prompt = prompt,
@ -11,3 +12,17 @@ result = phind.Completion.create(
    codeContext = '') # up to 3000 chars of code
 print(result.completion.choices[0].text)
 prompt = 'who won the quatar world cup'
 # help needed: not getting newlines from the stream, please submit a PR if you know how to fix this
 # stream completion
 for result in phind.StreamingCompletion.create(
    model  = 'gpt-3.5',
    prompt = prompt,
    results     = phind.Search.create(prompt, actualSearch = True), # create search (set actualSearch to False to disable internet)
    creative    = False,
    detailed    = False,
    codeContext = ''):  # up to 3000 chars of code
    print(result.completion.choices[0].text, end='', flush=True)