spark问题记录

11/17/2023 spark

# 用spark本地写s3时报错,PartialGroupNameException Does not support partial group name resolution on Windows

解决方法

注释掉System.setProperty("HADOOP_USER_NAME", "hdfs") 连接hdfs时,才需要这个配置
1

# 用spark本地写s3时报错,java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z

解决方法
重写NativeIO

# spark报错:org.apache.thrift.TApplicationException: Required field 'filesAdded' is unset! Struct:InsertEventRequestData(filesAdded:null)

报错图片

解决方法
不知道为什么,在代码和hive-site.xml中设置无报错,但通过spark 提交命令配置就无法生效

hive.metadata.dml.events=false
1

# spark写s3报错:java.lang.UnsupportedOperationException: S3AFileSystem doesn't support getAclStatus

报错图片
原因

S3AFileSystem类没有实现getAclStatus方法
1

解决方法
在S3AFileSystem类加入以下代码

/**
 * TODO 获取ACLStatus需要
 *
 * @param aclGrants
 * @return
 */
private static List<AclEntry> convertAclGrants(List<Grant> aclGrants){
        List<AclEntry> aclEntries=new ArrayList<>();
        for(Grant grant:aclGrants){
        String granteeId=grant.getGrantee().getIdentifier();
        FsAction fsAction=convertPermission(grant.getPermission().toString());

        AclEntry aclEntry=new AclEntry.Builder()
        .setPermission(fsAction)
        .setScope(AclEntryScope.ACCESS)
        .setType(AclEntryType.USER)
        .setName(granteeId)
        .build();
        aclEntries.add(aclEntry);
        }
        return aclEntries;
        }

/**
 * TODO 获取ACLStatus需要
 *
 * @param permission
 * @return
 */
private static FsAction convertPermission(String permission){
        switch(permission){
        case"FullControl":
        case"FULL_CONTROL":
        return FsAction.ALL;
        case"Write":
        return FsAction.WRITE;
        case"Read":
        return FsAction.READ;
default:
        return FsAction.NONE;
        }
        }


/**
 * TODO 手动实现 java.lang.UnsupportedOperationException: S3AFileSystem doesn't support getAclStatus
 * @param path
 * @return
 * @throws IOException
 */
@Override
public AclStatus getAclStatus(Path path)throws IOException{
        GetBucketAclRequest request=new GetBucketAclRequest(this.bucket);
        AccessControlList acl=this.s3.getBucketAcl(request);
        // 将 Amazon S3 的 AclStatus 转换为 Hadoop 中的 AclStatus
        return new AclStatus.Builder()
        .owner(acl.getOwner().getId())
        .group("")
        .addEntries(convertAclGrants(acl.getGrantsAsList()))
        .build();
        }


private void initCannedAcls(Configuration conf){
        String cannedACLName=conf.get("fs.s3a.acl.default",CannedAccessControlList.BucketOwnerFullControl.name());
        if(!cannedACLName.isEmpty()){
        this.cannedACL=CannedAccessControlList.valueOf(cannedACLName);
        }else{
        this.cannedACL=null;
        }

        }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
Last Updated: 9/23/2024, 1:24:58 AM