o
    5c5%                     @   s"  d Z ddlmZ ddlZddlZddlZddlZddlZddl	m
Z
mZ ddlmZ ejdZedd Zedd	 Zeejd
ddgejdg ddd Zedd Zedd Zeejddi e
ddgifdddie
ddgifdddgie
dddgifd dgd!d"e
ddgifd dgd#d"e
dejdgifgd$d% Zed&d' Zeejd(g d)d*d+ Zeejd,d!d#gd-d. Zed/d0 Zejdg d1d2d3 Zeejdg d4d5d6 Zed7d8 Z ed9d: Z!ejd;ejd<d=d>gd?d@ Z"dS )AzZ
Tests encoding functionality during parsing
for all of the parsers defined in parsers.py
    )BytesION)	DataFrameread_csvpyarrow_skipc                 C   sL   d}| }t d|}|j|d|d}tddggddgd	}t|| d S )
Ncp1255u   שלום:1234
562:123:)sepencodingi2  {   u   שלום1234columnsr   encoder   r   tmassert_frame_equal)all_parsersr	   parserdataresultexpected r   Z/var/www/html/gps/gps/lib/python3.10/site-packages/pandas/tests/io/parser/test_encoding.pytest_bytes_io_input   s   r   c                 C   s@   | }t d }|j|ddd d}tddgg}t|| d S )Nu   Łaski, Jan;1;utf-8)r   r	   headeru   Łaski, Jan   r   )r   r   r   r   r   r   r   r   test_read_csv_unicode"   s
   r   r   ,	r	   )utf-16zutf-16lezutf-16bec              	   C   s  | }d d|}dt  d}|dd}d}t|[}dd	lm} ||}	t|d
}
|
	|	 W d    n1 s=w   Y  t
||}|||d}|j|fd|i|}|j|fd|i|}|  t|| W d    d S 1 szw   Y  d S )Nz)skip this
skip this too
A,B,C
1,2,3
4,5,6r   __z__.csv   )r   skiprowsr   r   )TextIOWrapperwbr	   r	   )replaceuuiduuid4r   ensure_cleanior%   r   openwriter   r   closer   )r   r   r	   r   r   pathkwargsutf8r%   
bytes_datafbytes_bufferr   r   r   r   r   test_utf16_bom_skiprows,   s,   

"r6   c                 C   s6   t j|d}| }|j|ddd}t|dksJ d S )Nzutf16_ex.txtr!   r    )r	   r   2   )osr0   joinr   len)r   csv_dir_pathr0   r   r   r   r   r   test_utf16_exampleO   s   r<   c                 C   sL   t j|d}| }|j|d dd}|d}|d d }d}||ks$J d S )Nunicode_series.csvlatin-1)r   r	   r   r   i`  u$   Á köldum klaka (Cold Fever) (1994))r8   r0   r9   r   	set_index)r   r;   r0   r   r   gotr   r   r   r   test_unicode_encodingW   s   
rA   zdata,kwargs,expectedza
1ar   z"a"
1	quotechar"zb
1namesb1z
1T)rE   skip_blank_linesFc                    sD   | }d d fdd}|j ||fdi|}t|| d S )Nu   ﻿r   c                    s    |   }t|S )N)r   r   )_databom_databomr2   r   r   _encode_data_with_bom~   s   z,test_utf8_bom.<locals>._encode_data_with_bomr	   )r   r   r   )r   r   r1   r   r   rM   r   r   rK   r   test_utf8_bomd   s   rN   c                 C   sL   t dgdgd}| }||}d|}|jt||d}t|| d S )Ng333333@test)mb_num	multibytezmb_num,multibyte
4.8,testr'   )r   formatr   r   r   r   r   )r   	utf_valueencoding_fmtr   r   r	   r   r   r   r   r   test_read_csv_utf_aliases   s   

rU   zfile_path,encoding)))r,   r   csvz	test1.csvr   ))r,   r   r   r=   r>   ))r,   r   r   zsauron.SHIFT_JIS.csvshiftjisc           
      C   s  | }|| }|j ||d}t||d}| |}|jrJ W d    n1 s(w   Y  t|| t|dd}	|j |	|d}|	jrFJ W d    n1 sPw   Y  t|| t|ddd}	|j |	|d}|	jroJ W d    n1 syw   Y  t|| d S )Nr'   rbmoder   )rZ   	buffering)r   r-   closedr   r   )
r   	file_pathr	   datapathr   fpathr   far   fbr   r   r   test_binary_mode_file_buffers   s$   
rb   pass_encodingc           	      C   s   | }| |}tddgi}tjd|dd$}|d |d |j||r(|nd d}t|| W d    d S 1 s=w   Y  d S )	Nfoobarzw+T)rZ   r	   return_filelikezfoo
barr   r'   )rR   r   r   r+   r.   seekr   r   )	r   rS   rT   rc   r   r	   r   r4   r   r   r   r   test_encoding_temp_file   s   


"rh   c                 C   s   | }d}d}d}t ||gi}t -}|| d| | |d |j||d}t|| |j	r8J W d    d S 1 sCw   Y  d S )Nz	shift-jisu	   てすとu   こむ
r   r'   )
r   tempfileNamedTemporaryFiler.   r   rg   r   r   r   r\   )r   r   r	   titler   r   r4   r   r   r   r   test_encoding_named_temp_file   s   

"rm   )r   r!   z	utf-16-bez	utf-16-lezutf-32c                 C   sR   d}t || }t|d| d}tddgddgdd	ggd
dgd}t|| d S )Nu   a	b
：foo	0
bar	1
baz	2r    )	delimiterr	   u   ：foor   re   r   bazr#   rB   rF   )r   r   r   )r	   r   encoded_datar   r   r   r   r   %test_parse_encoded_special_characters   s
   "rq   )r   Nr!   r   r>   c                 C   sx   | }t g dg dg dd}t }|j|d|d |j||dd}W d    n1 s/w   Y  t|| d S )	N)Raphael	DonatellozMiguel AngelLeonardo)redpurpleorangeblue)saizbo staffnunchunkkatana)namemaskweaponF)indexr	   T)r	   
memory_map)r   r   r+   to_csvr   r   )r   r	   r   r   filedfr   r   r   test_encoding_memory_map   s   
r   c                 C   s|   | }t dgd d}d|jd< td}|j|dddd	 |j|d
ddd}W d
   n1 s1w   Y  t|| d
S )zO
    Chunk splits a multibyte character with memory_map=True

    GH 43540
    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaai   )r   u   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaąi  zbug-gh43540.csvFr   r   r   r	   NTc)r   r   engine)r   ilocr   r+   r   r   r   )r   r   r   fnamedfrr   r   r    test_chunk_splits_multibyte_char   s   
r   c              	   C   s   g }d}d}d}t t|t||D ])}ddd t ||d D d }z|d W n	 ty5   Y qw || q| }t|}td	}	|j	|	d
d
dd |j
|	ddddd}
W d   n1 sew   Y  t||
 dS )zg
    GH 43787

    Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
        u   𐂀 c                 S   s   g | ]}t |qS r   )chr).0r   r   r   r   
<listcomp>  s    z,test_readcsv_memmap_utf8.<locals>.<listcomp>ri   r   zutf8test.csvFr   NTr   )r   r   r   r	   )rangeordr9   r   UnicodeEncodeErrorappendr   r   r+   r   r   r   )r   linesline_length
start_charend_charlnumliner   r   r   r   r   r   r   test_readcsv_memmap_utf8  s*   "
r   pyarrow_xfailrZ   zw+bzw+tc                 C   s|   | }d}d|v r
d}t j|d}|| |d ||}W d    n1 s*w   Y  tg dgd}t|| d S )Ns   abcdtabcdrY   r   r   )rj   SpooledTemporaryFiler.   rg   r   r   r   r   )r   rZ   r   contenthandler   r   r   r   r   test_not_readable,  s   

r   )#__doc__r,   r   r8   rj   r)   numpynppytestpandasr   r   pandas._testing_testingr   markusefixturesskip_pyarrowr   r   parametrizer6   r<   rA   nanrN   rU   rb   rh   rm   rq   r   r   r   r   r   r   r   r   <module>   s~    

	 








