1
- import { ollamaTokenGenerator } from '../modules/ollamaTokenGenerator' ;
2
- import { countSymbol } from '../modules/text' ;
3
- import { info } from '../modules/log' ;
1
+ import { makeOllamaRequest } from "../modules/ollamaRequest" ;
2
+
3
+ type OllamaToken = {
4
+ model : string ,
5
+ response : string ,
6
+ } ;
4
7
5
8
export async function autocomplete ( args : {
6
9
endpoint : string ,
@@ -20,30 +23,27 @@ export async function autocomplete(args: {
20
23
prompt : args . prefix ,
21
24
suffix : args . suffix ,
22
25
raw : true ,
26
+ stream : false ,
23
27
options : {
24
28
num_predict : args . maxTokens ,
25
29
temperature : args . temperature
26
30
}
27
31
} ;
28
32
29
- // Receiving tokens
30
- let res = '' ;
31
- let totalLines = 1 ;
32
- for await ( let tokens of ollamaTokenGenerator ( args . endpoint + '/api/generate' , data , args . bearerToken ) ) {
33
+ const res = await makeOllamaRequest ( args . endpoint + '/api/generate' , data , args . bearerToken ) ;
34
+ try {
35
+ const tokens = JSON . parse ( res ) as OllamaToken ;
33
36
if ( args . canceled && args . canceled ( ) ) {
34
- break ;
35
- }
36
-
37
- res = res + tokens . response ;
38
-
39
- // Update total lines
40
- totalLines += countSymbol ( tokens . response , '\n' ) ;
41
- // Break if too many lines and on top level
42
- if ( totalLines > args . maxLines ) {
43
- info ( 'Too many lines, breaking.' ) ;
44
- break ;
37
+ return "" ;
45
38
}
39
+ const response = tokens . response ;
40
+
41
+ // take only args.maLines lines from the response
42
+ let lines = response . split ( '\n' ) ;
43
+ lines = lines . slice ( 0 , args . maxLines ) ;
44
+ return lines . join ( '\n' ) ;
45
+ } catch ( e ) {
46
+ console . warn ( 'Receive wrong line: ' + res ) ;
47
+ return "" ;
46
48
}
47
-
48
- return res ;
49
49
}
0 commit comments